diff --git a/CLAUDE.md b/CLAUDE.md index f8383e3..c1e6080 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -106,6 +106,16 @@ | 第三方评估 | `/Volumes/LinuxDev/OrangePi_CM5_Project/docs/OrangePi_CM5/MD_Document/KWS唤醒方案适配评估_Unity.md` | Unity APP 团队的对接评估(含 v2 微调建议)| | 工程骨架 README | `/Volumes/LinuxDev/OrangePi_CM5_Project/docs/OrangePi_CM5/MD_Document/KWS-APK-工程骨架/README.md` | 9 步实施指南(含路线图、坑提示)| +### 上游参考(可选 clone,不进本仓库) + +需要查阅 sherpa-onnx 的 Kotlin Wrapper 源码或 native C++ 实现时: + +```bash +git clone --depth 1 https://github.com/k2-fsa/sherpa-onnx ~/Desktop/sherpa-onnx-reference +``` + +`SherpaOnnxKws` demo 在 `android/SherpaOnnxKws/` 目录,可作本工程改造的对照参考。**不进 LilaWakeup_App 仓库**——它是上游开源依赖(Apache 2.0),公开可拿,避免重复存储和 license 复杂化。 + --- ## 六、用户偏好与代码风格(继承自主项目) diff --git a/app/src/main/assets/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx b/app/src/main/assets/kws/decoder.onnx similarity index 100% rename from app/src/main/assets/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx rename to app/src/main/assets/kws/decoder.onnx diff --git a/app/src/main/assets/kws/encoder.int8.onnx b/app/src/main/assets/kws/encoder.int8.onnx new file mode 100644 index 0000000..a739d3a Binary files /dev/null and b/app/src/main/assets/kws/encoder.int8.onnx differ diff --git a/app/src/main/assets/kws/joiner.int8.onnx b/app/src/main/assets/kws/joiner.int8.onnx new file mode 100644 index 0000000..fc88aba Binary files /dev/null and b/app/src/main/assets/kws/joiner.int8.onnx differ diff --git a/app/src/main/assets/kws/keywords.txt b/app/src/main/assets/kws/keywords.txt new file mode 100644 index 0000000..f294655 --- /dev/null +++ b/app/src/main/assets/kws/keywords.txt @@ -0,0 +1,43 @@ +n ǐ h ǎo l ì l ā @你好Lila +n ǐ h ǎo l í l ā @你好Lila +n ǐ h ǎo l ǐ l ā @你好Lila +n ǐ h ǎo l ī l ā @你好Lila +n ǐ h ǎo l à l ā @你好Lila +n ǐ h ǎo l ā l ā @你好Lila +n ǐ h ǎo l ì l à @你好Lila +n ǐ h ǎo l ì l á @你好Lila +n ǐ h ǎo l ǐ l à @你好Lila +n ǐ h ǎo m ǐ l ā @你好Lila +n ǐ h ǎo m ī l ā @你好Lila +n ǐ h ǎo m í l ā @你好Lila +n ǐ h ǎo m ì l ā @你好Lila +n ǐ h ǎo n ǐ l ā @你好Lila +n ǐ h ǎo n í l ā @你好Lila +n ǐ h ǎo n ī l ā @你好Lila +n ǐ h ǎo n ì l ā @你好Lila +n ǐ h ǎo l ái l ā @你好Lila +n ǐ h ǎo l ái n á @你好Lila +n ǐ h ǎo l ì n á @你好Lila +n ǐ h ǎo l í n á @你好Lila +n ǐ h ǎo l ǐ n á @你好Lila +n ǐ h ǎo m ǐ n á @你好Lila +n ǐ h ǎo m ī n á @你好Lila +n ǐ h ǎo m í n á @你好Lila +n ǐ h ǎo n ǐ n á @你好Lila +n ǐ h ǎo n ǐ n ǎ @你好Lila +n ǐ h ǎo n ǐ n à @你好Lila +n ǐ h ǎo l ì n ǎ @你好Lila +n ǐ h ǎo l ì n à @你好Lila +n ǐ h ǎo l e l ā @你好Lila +n ǐ h ǎo l ē l ā @你好Lila +n ǐ h ǎo y ī l ā @你好Lila +n ǐ h ǎo y ǐ l ā @你好Lila +n ǐ h ào l ì l ā @你好Lila +n í h ǎo l ì l ā @你好Lila +l ì l ā @Lila +l ǐ l ā @Lila +l í l ā @Lila +l ī l ā @Lila +m ī l ā @Lila +n ǐ n á @Lila +l ì n á @Lila diff --git a/app/src/main/assets/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt b/app/src/main/assets/kws/tokens.txt similarity index 100% rename from app/src/main/assets/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt rename to app/src/main/assets/kws/tokens.txt diff --git a/app/src/main/assets/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-99-avg-1-chunk-16-left-64.onnx b/app/src/main/assets/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-99-avg-1-chunk-16-left-64.onnx deleted file mode 100644 index 1d78310..0000000 Binary files a/app/src/main/assets/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-99-avg-1-chunk-16-left-64.onnx and /dev/null differ diff --git a/app/src/main/assets/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx b/app/src/main/assets/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx deleted file mode 100644 index 9d0beb3..0000000 Binary files a/app/src/main/assets/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx and /dev/null differ diff --git a/app/src/main/assets/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-99-avg-1-chunk-16-left-64.onnx b/app/src/main/assets/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-99-avg-1-chunk-16-left-64.onnx deleted file mode 100644 index 9d0beb3..0000000 Binary files a/app/src/main/assets/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-99-avg-1-chunk-16-left-64.onnx and /dev/null differ diff --git a/app/src/main/assets/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx b/app/src/main/assets/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx deleted file mode 100644 index 5687aef..0000000 Binary files a/app/src/main/assets/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx and /dev/null differ diff --git a/app/src/main/assets/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-99-avg-1-chunk-16-left-64.onnx b/app/src/main/assets/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-99-avg-1-chunk-16-left-64.onnx deleted file mode 100644 index 5687aef..0000000 Binary files a/app/src/main/assets/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-99-avg-1-chunk-16-left-64.onnx and /dev/null differ diff --git a/app/src/main/assets/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/keywords.txt b/app/src/main/assets/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/keywords.txt deleted file mode 100644 index d296696..0000000 --- a/app/src/main/assets/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/keywords.txt +++ /dev/null @@ -1,5 +0,0 @@ -n ǐ h ǎo l ì l ā @你好Lila -h è l ōu l ì l ā @hello Lila -l ì l ā t óng x ué @Lila同学 -l ì l ā n ǐ h ǎo @Lila你好 -x i ǎo l ì l ā @小Lila diff --git a/app/src/main/java/com/k2fsa/sherpa/onnx/OfflineRecognizer.kt b/app/src/main/java/com/k2fsa/sherpa/onnx/OfflineRecognizer.kt new file mode 100644 index 0000000..1488768 --- /dev/null +++ b/app/src/main/java/com/k2fsa/sherpa/onnx/OfflineRecognizer.kt @@ -0,0 +1,1468 @@ +package com.k2fsa.sherpa.onnx + +import android.content.res.AssetManager + +data class OfflineRecognizerResult( + val text: String, + val tokens: Array, + val timestamps: FloatArray, + val lang: String, + val emotion: String, + val event: String, + + // valid only for TDT models + val durations: FloatArray, +) + +data class OfflineTransducerModelConfig( + var encoder: String = "", + var decoder: String = "", + var joiner: String = "", +) + +data class OfflineParaformerModelConfig( + var model: String = "", + var qnnConfig: QnnConfig = QnnConfig(), +) + +data class OfflineNemoEncDecCtcModelConfig( + var model: String = "", +) + +data class OfflineDolphinModelConfig( + var model: String = "", +) + +data class OfflineZipformerCtcModelConfig( + var model: String = "", + var qnnConfig: QnnConfig = QnnConfig(), +) + +data class OfflineWenetCtcModelConfig( + var model: String = "", +) + +data class OfflineOmnilingualAsrCtcModelConfig( + var model: String = "", +) + +data class OfflineMedAsrCtcModelConfig( + var model: String = "", +) + +data class OfflineFireRedAsrCtcModelConfig( + var model: String = "", +) + +data class OfflineFunAsrNanoModelConfig( + var encoderAdaptor: String = "", + var llm: String = "", + var embedding: String = "", + var tokenizer: String = "", + var systemPrompt: String = "You are a helpful assistant.", + var userPrompt: String = "语音转写:", + var maxNewTokens: Int = 512, + var temperature: Float = 1e-6f, + var topP: Float = 0.8f, + var seed: Int = 42, + var language: String = "", + var itn: Boolean = true, + var hotwords: String = "", +) + +data class OfflineQwen3AsrModelConfig( + var convFrontend: String = "", + var encoder: String = "", + var decoder: String = "", + var tokenizer: String = "", + var maxTotalLen: Int = 512, + var maxNewTokens: Int = 128, + var temperature: Float = 1e-6f, + var topP: Float = 0.8f, + var seed: Int = 42, + var hotwords: String = "", +) + +data class OfflineWhisperModelConfig( + var encoder: String = "", + var decoder: String = "", + var language: String = "en", // Used with multilingual model + var task: String = "transcribe", // transcribe or translate + var tailPaddings: Int = 1000, // Padding added at the end of the samples + var enableTokenTimestamps: Boolean = false, + var enableSegmentTimestamps: Boolean = false, +) + +data class OfflineCanaryModelConfig( + var encoder: String = "", + var decoder: String = "", + var srcLang: String = "en", + var tgtLang: String = "en", + var usePnc: Boolean = true, +) + +data class OfflineCohereTranscribeModelConfig( + var encoder: String = "", + var decoder: String = "", + var language: String = "", + var usePunct: Boolean = true, + var useItn: Boolean = true, +) + +data class OfflineFireRedAsrModelConfig( + var encoder: String = "", + var decoder: String = "", +) + +// For moonshine v1, you need four models. +// For moonshine v2, you need two models. +// - v1: preprocessor, encoder, uncachedDecoder, cachedDecoder +// - v2: encoder, mergedDecoder +data class OfflineMoonshineModelConfig( + var preprocessor: String = "", + var encoder: String = "", + var uncachedDecoder: String = "", + var cachedDecoder: String = "", + var mergedDecoder: String = "", +) + +data class OfflineSenseVoiceModelConfig( + var model: String = "", + var language: String = "", + var useInverseTextNormalization: Boolean = true, + var qnnConfig: QnnConfig = QnnConfig(), +) + +data class OfflineModelConfig( + var transducer: OfflineTransducerModelConfig = OfflineTransducerModelConfig(), + var paraformer: OfflineParaformerModelConfig = OfflineParaformerModelConfig(), + var whisper: OfflineWhisperModelConfig = OfflineWhisperModelConfig(), + var fireRedAsr: OfflineFireRedAsrModelConfig = OfflineFireRedAsrModelConfig(), + var moonshine: OfflineMoonshineModelConfig = OfflineMoonshineModelConfig(), + var nemo: OfflineNemoEncDecCtcModelConfig = OfflineNemoEncDecCtcModelConfig(), + var senseVoice: OfflineSenseVoiceModelConfig = OfflineSenseVoiceModelConfig(), + var dolphin: OfflineDolphinModelConfig = OfflineDolphinModelConfig(), + var zipformerCtc: OfflineZipformerCtcModelConfig = OfflineZipformerCtcModelConfig(), + var wenetCtc: OfflineWenetCtcModelConfig = OfflineWenetCtcModelConfig(), + var omnilingual: OfflineOmnilingualAsrCtcModelConfig = OfflineOmnilingualAsrCtcModelConfig(), + var medasr: OfflineMedAsrCtcModelConfig = OfflineMedAsrCtcModelConfig(), + var funasrNano: OfflineFunAsrNanoModelConfig = OfflineFunAsrNanoModelConfig(), + var qwen3Asr: OfflineQwen3AsrModelConfig = OfflineQwen3AsrModelConfig(), + var fireRedAsrCtc: OfflineFireRedAsrCtcModelConfig = OfflineFireRedAsrCtcModelConfig(), + var canary: OfflineCanaryModelConfig = OfflineCanaryModelConfig(), + var cohereTranscribe: OfflineCohereTranscribeModelConfig = + OfflineCohereTranscribeModelConfig(), + var teleSpeech: String = "", + var numThreads: Int = 1, + var debug: Boolean = false, + var provider: String = "cpu", + var modelType: String = "", + var tokens: String = "", + var modelingUnit: String = "", + var bpeVocab: String = "", +) + +data class OfflineRecognizerConfig( + var featConfig: FeatureConfig = FeatureConfig(), + var modelConfig: OfflineModelConfig = OfflineModelConfig(), + // var lmConfig: OfflineLMConfig(), // TODO(fangjun): enable it + var hr: HomophoneReplacerConfig = HomophoneReplacerConfig(), + var decodingMethod: String = "greedy_search", + var maxActivePaths: Int = 4, + var hotwordsFile: String = "", + var hotwordsScore: Float = 1.5f, + var ruleFsts: String = "", + var ruleFars: String = "", + var blankPenalty: Float = 0.0f, +) + +class OfflineRecognizer( + assetManager: AssetManager? = null, + val config: OfflineRecognizerConfig, +) { + private var ptr: Long + + init { + ptr = if (assetManager != null) { + newFromAsset(assetManager, config) + } else { + newFromFile(config) + } + } + + protected fun finalize() { + if (ptr != 0L) { + delete(ptr) + ptr = 0 + } + } + + fun release() = finalize() + + fun createStream(): OfflineStream { + val p = createStream(ptr) + return OfflineStream(p) + } + + fun createStream(hotwords: String): OfflineStream { + val p = createStreamWithHotwords(ptr, hotwords) + return OfflineStream(p) + } + + fun getResult(stream: OfflineStream): OfflineRecognizerResult { + return getResult(stream.ptr) + } + + fun decode(stream: OfflineStream) = decode(ptr, stream.ptr) + + fun setConfig(config: OfflineRecognizerConfig) = setConfig(ptr, config) + + private external fun delete(ptr: Long) + + private external fun createStream(ptr: Long): Long + + private external fun createStreamWithHotwords(ptr: Long, hotwords: String): Long + + private external fun setConfig(ptr: Long, config: OfflineRecognizerConfig) + + private external fun newFromAsset( + assetManager: AssetManager, + config: OfflineRecognizerConfig, + ): Long + + private external fun newFromFile( + config: OfflineRecognizerConfig, + ): Long + + private external fun decode(ptr: Long, streamPtr: Long) + + private external fun getResult(streamPtr: Long): OfflineRecognizerResult + + companion object { + init { + System.loadLibrary("sherpa-onnx-jni") + } + + @JvmStatic + external fun prependAdspLibraryPath(newPath: String) // for qnn + } +} + +/* +Please see +https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html +for a list of pre-trained models. + +We only add a few here. Please change the following code +to add your own. (It should be straightforward to add a new model +by following the code) + +@param type + +0 - csukuangfj/sherpa-onnx-paraformer-zh-2023-09-14 (Chinese) + https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-paraformer-zh-2023-09-14-chinese + int8 + +1 - icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04 (English) + https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#icefall-asr-multidataset-pruned-transducer-stateless7-2023-05-04-english + encoder int8, decoder/joiner float32 + +2 - sherpa-onnx-whisper-tiny.en + https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html#tiny-en + encoder int8, decoder int8 + +3 - sherpa-onnx-whisper-base.en + https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html#tiny-en + encoder int8, decoder int8 + +4 - pkufool/icefall-asr-zipformer-wenetspeech-20230615 (Chinese) + https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#pkufool-icefall-asr-zipformer-wenetspeech-20230615-chinese + encoder/joiner int8, decoder fp32 + + */ +fun getOfflineModelConfig(type: Int): OfflineModelConfig? { + when (type) { + 0 -> { + val modelDir = "sherpa-onnx-paraformer-zh-2023-09-14" + return OfflineModelConfig( + paraformer = OfflineParaformerModelConfig( + model = "$modelDir/model.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "paraformer", + ) + } + + 1 -> { + val modelDir = "icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04" + return OfflineModelConfig( + transducer = OfflineTransducerModelConfig( + encoder = "$modelDir/encoder-epoch-30-avg-4.int8.onnx", + decoder = "$modelDir/decoder-epoch-30-avg-4.onnx", + joiner = "$modelDir/joiner-epoch-30-avg-4.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "transducer", + ) + } + + 2 -> { + val modelDir = "sherpa-onnx-whisper-tiny.en" + return OfflineModelConfig( + whisper = OfflineWhisperModelConfig( + encoder = "$modelDir/tiny.en-encoder.int8.onnx", + decoder = "$modelDir/tiny.en-decoder.int8.onnx", + ), + tokens = "$modelDir/tiny.en-tokens.txt", + modelType = "whisper", + ) + } + + 3 -> { + val modelDir = "sherpa-onnx-whisper-base.en" + return OfflineModelConfig( + whisper = OfflineWhisperModelConfig( + encoder = "$modelDir/base.en-encoder.int8.onnx", + decoder = "$modelDir/base.en-decoder.int8.onnx", + ), + tokens = "$modelDir/base.en-tokens.txt", + modelType = "whisper", + ) + } + + + 4 -> { + val modelDir = "icefall-asr-zipformer-wenetspeech-20230615" + return OfflineModelConfig( + transducer = OfflineTransducerModelConfig( + encoder = "$modelDir/encoder-epoch-12-avg-4.int8.onnx", + decoder = "$modelDir/decoder-epoch-12-avg-4.onnx", + joiner = "$modelDir/joiner-epoch-12-avg-4.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "transducer", + ) + } + + 5 -> { + val modelDir = "sherpa-onnx-zipformer-multi-zh-hans-2023-9-2" + return OfflineModelConfig( + transducer = OfflineTransducerModelConfig( + encoder = "$modelDir/encoder-epoch-20-avg-1.int8.onnx", + decoder = "$modelDir/decoder-epoch-20-avg-1.onnx", + joiner = "$modelDir/joiner-epoch-20-avg-1.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "transducer", + ) + } + + 6 -> { + val modelDir = "sherpa-onnx-nemo-ctc-en-citrinet-512" + return OfflineModelConfig( + nemo = OfflineNemoEncDecCtcModelConfig( + model = "$modelDir/model.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 7 -> { + val modelDir = "sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k" + return OfflineModelConfig( + nemo = OfflineNemoEncDecCtcModelConfig( + model = "$modelDir/model.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 8 -> { + val modelDir = "sherpa-onnx-nemo-fast-conformer-ctc-en-24500" + return OfflineModelConfig( + nemo = OfflineNemoEncDecCtcModelConfig( + model = "$modelDir/model.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 9 -> { + val modelDir = "sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288" + return OfflineModelConfig( + nemo = OfflineNemoEncDecCtcModelConfig( + model = "$modelDir/model.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 10 -> { + val modelDir = "sherpa-onnx-nemo-fast-conformer-ctc-es-1424" + return OfflineModelConfig( + nemo = OfflineNemoEncDecCtcModelConfig( + model = "$modelDir/model.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 11 -> { + val modelDir = "sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04" + return OfflineModelConfig( + teleSpeech = "$modelDir/model.int8.onnx", + tokens = "$modelDir/tokens.txt", + modelType = "telespeech_ctc", + ) + } + + 12 -> { + val modelDir = "sherpa-onnx-zipformer-thai-2024-06-20" + return OfflineModelConfig( + transducer = OfflineTransducerModelConfig( + encoder = "$modelDir/encoder-epoch-12-avg-5.int8.onnx", + decoder = "$modelDir/decoder-epoch-12-avg-5.onnx", + joiner = "$modelDir/joiner-epoch-12-avg-5.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "transducer", + ) + } + + 13 -> { + val modelDir = "sherpa-onnx-zipformer-korean-2024-06-24" + return OfflineModelConfig( + transducer = OfflineTransducerModelConfig( + encoder = "$modelDir/encoder-epoch-99-avg-1.int8.onnx", + decoder = "$modelDir/decoder-epoch-99-avg-1.onnx", + joiner = "$modelDir/joiner-epoch-99-avg-1.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "transducer", + ) + } + + 14 -> { + val modelDir = "sherpa-onnx-paraformer-zh-small-2024-03-09" + return OfflineModelConfig( + paraformer = OfflineParaformerModelConfig( + model = "$modelDir/model.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "paraformer", + ) + } + + 15 -> { + val modelDir = "sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17" + return OfflineModelConfig( + senseVoice = OfflineSenseVoiceModelConfig( + model = "$modelDir/model.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 16 -> { + val modelDir = "sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01" + return OfflineModelConfig( + transducer = OfflineTransducerModelConfig( + encoder = "$modelDir/encoder-epoch-99-avg-1.int8.onnx", + decoder = "$modelDir/decoder-epoch-99-avg-1.onnx", + joiner = "$modelDir/joiner-epoch-99-avg-1.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "transducer", + ) + } + + 17 -> { + val modelDir = "sherpa-onnx-zipformer-ru-2024-09-18" + return OfflineModelConfig( + transducer = OfflineTransducerModelConfig( + encoder = "$modelDir/encoder.int8.onnx", + decoder = "$modelDir/decoder.onnx", + joiner = "$modelDir/joiner.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "transducer", + ) + } + + 18 -> { + val modelDir = "sherpa-onnx-small-zipformer-ru-2024-09-18" + return OfflineModelConfig( + transducer = OfflineTransducerModelConfig( + encoder = "$modelDir/encoder.int8.onnx", + decoder = "$modelDir/decoder.onnx", + joiner = "$modelDir/joiner.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "transducer", + ) + } + + 19 -> { + val modelDir = "sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24" + return OfflineModelConfig( + nemo = OfflineNemoEncDecCtcModelConfig( + model = "$modelDir/model.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 20 -> { + val modelDir = "sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24" + return OfflineModelConfig( + transducer = OfflineTransducerModelConfig( + encoder = "$modelDir/encoder.int8.onnx", + decoder = "$modelDir/decoder.onnx", + joiner = "$modelDir/joiner.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "nemo_transducer", + ) + } + + 21 -> { + val modelDir = "sherpa-onnx-moonshine-tiny-en-int8" + return OfflineModelConfig( + moonshine = OfflineMoonshineModelConfig( + preprocessor = "$modelDir/preprocess.onnx", + encoder = "$modelDir/encode.int8.onnx", + uncachedDecoder = "$modelDir/uncached_decode.int8.onnx", + cachedDecoder = "$modelDir/cached_decode.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 22 -> { + val modelDir = "sherpa-onnx-moonshine-base-en-int8" + return OfflineModelConfig( + moonshine = OfflineMoonshineModelConfig( + preprocessor = "$modelDir/preprocess.onnx", + encoder = "$modelDir/encode.int8.onnx", + uncachedDecoder = "$modelDir/uncached_decode.int8.onnx", + cachedDecoder = "$modelDir/cached_decode.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 23 -> { + val modelDir = "sherpa-onnx-zipformer-zh-en-2023-11-22" + return OfflineModelConfig( + transducer = OfflineTransducerModelConfig( + encoder = "$modelDir/encoder-epoch-34-avg-19.int8.onnx", + decoder = "$modelDir/decoder-epoch-34-avg-19.onnx", + joiner = "$modelDir/joiner-epoch-34-avg-19.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "transducer", + ) + } + + 24 -> { + val modelDir = "sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16" + return OfflineModelConfig( + fireRedAsr = OfflineFireRedAsrModelConfig( + encoder = "$modelDir/encoder.int8.onnx", + decoder = "$modelDir/decoder.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 25 -> { + val modelDir = "sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02" + return OfflineModelConfig( + dolphin = OfflineDolphinModelConfig( + model = "$modelDir/model.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 26 -> { + val modelDir = "sherpa-onnx-zipformer-vi-int8-2025-04-20" + return OfflineModelConfig( + transducer = OfflineTransducerModelConfig( + encoder = "$modelDir/encoder-epoch-12-avg-8.int8.onnx", + decoder = "$modelDir/decoder-epoch-12-avg-8.onnx", + joiner = "$modelDir/joiner-epoch-12-avg-8.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "transducer", + ) + } + + 27 -> { + val modelDir = "sherpa-onnx-nemo-ctc-giga-am-v2-russian-2025-04-19" + return OfflineModelConfig( + nemo = OfflineNemoEncDecCtcModelConfig( + model = "$modelDir/model.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 28 -> { + val modelDir = "sherpa-onnx-nemo-transducer-giga-am-v2-russian-2025-04-19" + return OfflineModelConfig( + transducer = OfflineTransducerModelConfig( + encoder = "$modelDir/encoder.int8.onnx", + decoder = "$modelDir/decoder.onnx", + joiner = "$modelDir/joiner.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "nemo_transducer", + ) + } + + 29 -> { + val modelDir = "sherpa-onnx-zipformer-ru-int8-2025-04-20" + return OfflineModelConfig( + transducer = OfflineTransducerModelConfig( + encoder = "$modelDir/encoder.int8.onnx", + decoder = "$modelDir/decoder.onnx", + joiner = "$modelDir/joiner.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "transducer", + ) + } + + 30 -> { + val modelDir = "sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8" + return OfflineModelConfig( + transducer = OfflineTransducerModelConfig( + encoder = "$modelDir/encoder.int8.onnx", + decoder = "$modelDir/decoder.int8.onnx", + joiner = "$modelDir/joiner.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "nemo_transducer", + ) + } + + 31 -> { + val modelDir = "sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03" + return OfflineModelConfig( + zipformerCtc = OfflineZipformerCtcModelConfig( + model = "$modelDir/model.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 32 -> { + val modelDir = "sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8" + return OfflineModelConfig( + canary = OfflineCanaryModelConfig( + encoder = "$modelDir/encoder.int8.onnx", + decoder = "$modelDir/decoder.int8.onnx", + srcLang = "en", + tgtLang = "en", + usePnc = true, + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 33 -> { + val modelDir = "sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000-int8" + return OfflineModelConfig( + nemo = OfflineNemoEncDecCtcModelConfig( + model = "$modelDir/model.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 34 -> { + val modelDir = "sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8" + return OfflineModelConfig( + nemo = OfflineNemoEncDecCtcModelConfig( + model = "$modelDir/model.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 35 -> { + val modelDir = "sherpa-onnx-nemo-transducer-stt_pt_fastconformer_hybrid_large_pc-int8" + return OfflineModelConfig( + transducer = OfflineTransducerModelConfig( + encoder = "$modelDir/encoder.int8.onnx", + decoder = "$modelDir/decoder.int8.onnx", + joiner = "$modelDir/joiner.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "nemo_transducer", + ) + } + + 36 -> { + val modelDir = "sherpa-onnx-nemo-stt_pt_fastconformer_hybrid_large_pc-int8" + return OfflineModelConfig( + nemo = OfflineNemoEncDecCtcModelConfig( + model = "$modelDir/model.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 37 -> { + val modelDir = "sherpa-onnx-nemo-transducer-stt_de_fastconformer_hybrid_large_pc-int8" + return OfflineModelConfig( + transducer = OfflineTransducerModelConfig( + encoder = "$modelDir/encoder.int8.onnx", + decoder = "$modelDir/decoder.int8.onnx", + joiner = "$modelDir/joiner.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "nemo_transducer", + ) + } + + 38 -> { + val modelDir = "sherpa-onnx-nemo-stt_de_fastconformer_hybrid_large_pc-int8" + return OfflineModelConfig( + nemo = OfflineNemoEncDecCtcModelConfig( + model = "$modelDir/model.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 39 -> { + val modelDir = "sherpa-onnx-zipformer-ctc-small-zh-int8-2025-07-16" + return OfflineModelConfig( + zipformerCtc = OfflineZipformerCtcModelConfig( + model = "$modelDir/model.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 40 -> { + val modelDir = "sherpa-onnx-nemo-parakeet-tdt-0.6b-v3-int8" + return OfflineModelConfig( + transducer = OfflineTransducerModelConfig( + encoder = "$modelDir/encoder.int8.onnx", + decoder = "$modelDir/decoder.int8.onnx", + joiner = "$modelDir/joiner.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "nemo_transducer", + ) + } + + 41 -> { + val modelDir = "sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2025-09-09" + return OfflineModelConfig( + senseVoice = OfflineSenseVoiceModelConfig( + model = "$modelDir/model.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 42 -> { + val modelDir = + "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10" + return OfflineModelConfig( + wenetCtc = OfflineWenetCtcModelConfig( + model = "$modelDir/model.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 43 -> { + val modelDir = "sherpa-onnx-paraformer-zh-int8-2025-10-07" + return OfflineModelConfig( + paraformer = OfflineParaformerModelConfig( + model = "$modelDir/model.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "paraformer", + ) + } + + 44 -> { + val modelDir = "sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12" + return OfflineModelConfig( + omnilingual = OfflineOmnilingualAsrCtcModelConfig( + model = "$modelDir/model.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 45 -> { + val modelDir = "sherpa-onnx-medasr-ctc-en-int8-2025-12-25" + return OfflineModelConfig( + medasr = OfflineMedAsrCtcModelConfig( + model = "$modelDir/model.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 46 -> { + val modelDir = "sherpa-onnx-funasr-nano-int8-2025-12-30" + return OfflineModelConfig( + funasrNano = OfflineFunAsrNanoModelConfig( + encoderAdaptor = "$modelDir/encoder_adaptor.int8.onnx", + llm = "$modelDir/llm.int8.onnx", + embedding = "$modelDir/embedding.int8.onnx", + tokenizer = "$modelDir/Qwen3-0.6B", + ), + tokens = "", + numThreads=3, + ) + } + + 47 -> { + val modelDir = "sherpa-onnx-wenetspeech-wu-u2pp-conformer-ctc-zh-int8-2026-02-03" + return OfflineModelConfig( + wenetCtc = OfflineWenetCtcModelConfig( + model = "$modelDir/model.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 48 -> { + val modelDir = "sherpa-onnx-wenetspeech-wu-u2pp-conformer-ctc-zh-2026-02-03" + return OfflineModelConfig( + wenetCtc = OfflineWenetCtcModelConfig( + model = "$modelDir/model.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 49 -> { + val modelDir = "sherpa-onnx-zipformer-vi-30M-int8-2026-02-09" + return OfflineModelConfig( + transducer = OfflineTransducerModelConfig( + encoder = "$modelDir/encoder.int8.onnx", + decoder = "$modelDir/decoder.onnx", + joiner = "$modelDir/joiner.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "transducer", + ) + } + + 50 -> { + val modelDir = "sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25" + return OfflineModelConfig( + fireRedAsrCtc = OfflineFireRedAsrCtcModelConfig( + model = "$modelDir/model.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 51 -> { + val modelDir = "sherpa-onnx-moonshine-tiny-ko-quantized-2026-02-27" + return OfflineModelConfig( + moonshine = OfflineMoonshineModelConfig( + encoder = "$modelDir/encoder_model.ort", + mergedDecoder = "$modelDir/decoder_model_merged.ort", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 52 -> { + val modelDir = "sherpa-onnx-moonshine-tiny-ja-quantized-2026-02-27" + return OfflineModelConfig( + moonshine = OfflineMoonshineModelConfig( + encoder = "$modelDir/encoder_model.ort", + mergedDecoder = "$modelDir/decoder_model_merged.ort", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 53 -> { + val modelDir = "sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27" + return OfflineModelConfig( + moonshine = OfflineMoonshineModelConfig( + encoder = "$modelDir/encoder_model.ort", + mergedDecoder = "$modelDir/decoder_model_merged.ort", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 54 -> { + val modelDir = "sherpa-onnx-moonshine-base-zh-quantized-2026-02-27" + return OfflineModelConfig( + moonshine = OfflineMoonshineModelConfig( + encoder = "$modelDir/encoder_model.ort", + mergedDecoder = "$modelDir/decoder_model_merged.ort", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 55 -> { + val modelDir = "sherpa-onnx-moonshine-base-vi-quantized-2026-02-27" + return OfflineModelConfig( + moonshine = OfflineMoonshineModelConfig( + encoder = "$modelDir/encoder_model.ort", + mergedDecoder = "$modelDir/decoder_model_merged.ort", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 56 -> { + val modelDir = "sherpa-onnx-moonshine-base-uk-quantized-2026-02-27" + return OfflineModelConfig( + moonshine = OfflineMoonshineModelConfig( + encoder = "$modelDir/encoder_model.ort", + mergedDecoder = "$modelDir/decoder_model_merged.ort", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 57 -> { + val modelDir = "sherpa-onnx-moonshine-base-ja-quantized-2026-02-27" + return OfflineModelConfig( + moonshine = OfflineMoonshineModelConfig( + encoder = "$modelDir/encoder_model.ort", + mergedDecoder = "$modelDir/decoder_model_merged.ort", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 58 -> { + val modelDir = "sherpa-onnx-moonshine-base-es-quantized-2026-02-27" + return OfflineModelConfig( + moonshine = OfflineMoonshineModelConfig( + encoder = "$modelDir/encoder_model.ort", + mergedDecoder = "$modelDir/decoder_model_merged.ort", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 59 -> { + val modelDir = "sherpa-onnx-moonshine-base-en-quantized-2026-02-27" + return OfflineModelConfig( + moonshine = OfflineMoonshineModelConfig( + encoder = "$modelDir/encoder_model.ort", + mergedDecoder = "$modelDir/decoder_model_merged.ort", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 60 -> { + val modelDir = "sherpa-onnx-moonshine-base-ar-quantized-2026-02-27" + return OfflineModelConfig( + moonshine = OfflineMoonshineModelConfig( + encoder = "$modelDir/encoder_model.ort", + mergedDecoder = "$modelDir/decoder_model_merged.ort", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 61 -> { + val modelDir = "sherpa-onnx-qwen3-asr-0.6B-int8-2026-03-25" + return OfflineModelConfig( + qwen3Asr = OfflineQwen3AsrModelConfig( + convFrontend = "$modelDir/conv_frontend.onnx", + encoder = "$modelDir/encoder.int8.onnx", + decoder = "$modelDir/decoder.int8.onnx", + tokenizer = "$modelDir/tokenizer", + ), + tokens = "", + numThreads=3, + ) + } + + 62 -> { + val modelDir = "sherpa-onnx-nemo-parakeet-unified-en-0.6b-int8-non-streaming" + return OfflineModelConfig( + transducer = OfflineTransducerModelConfig( + encoder = "$modelDir/encoder.int8.onnx", + decoder = "$modelDir/decoder.int8.onnx", + joiner = "$modelDir/joiner.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "nemo_transducer", + ) + } + + 9000 -> { + val modelDir = + "sherpa-onnx-qnn-5-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8-android-aarch64" + return OfflineModelConfig( + provider = "qnn", + senseVoice = OfflineSenseVoiceModelConfig( + model = "$modelDir/libmodel.so", + qnnConfig = QnnConfig( + // Please copy libQnnHtp.so and libQnnSystem.so to jniLibs/arm64-v8a by yourself + // + // model.bin is created in the first run and is used from the second run + // to speed up the initialization + backendLib = "libQnnHtp.so", + systemLib = "libQnnSystem.so", + contextBinary = "$modelDir/model.bin", + ), + ), + tokens = "$modelDir/tokens.txt", + debug = true, + ) + } + + 9001 -> { + val modelDir = + "sherpa-onnx-qnn-8-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8-android-aarch64" + return OfflineModelConfig( + provider = "qnn", + senseVoice = OfflineSenseVoiceModelConfig( + model = "$modelDir/libmodel.so", + qnnConfig = QnnConfig( + backendLib = "libQnnHtp.so", + systemLib = "libQnnSystem.so", + contextBinary = "$modelDir/model.bin", + ), + ), + tokens = "$modelDir/tokens.txt", + debug = true, + ) + } + + 9002 -> { + val modelDir = + "sherpa-onnx-qnn-10-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8-android-aarch64" + return OfflineModelConfig( + provider = "qnn", + senseVoice = OfflineSenseVoiceModelConfig( + model = "$modelDir/libmodel.so", + qnnConfig = QnnConfig( + backendLib = "libQnnHtp.so", + systemLib = "libQnnSystem.so", + contextBinary = "$modelDir/model.bin", + ), + ), + tokens = "$modelDir/tokens.txt", + debug = true, + ) + } + + 9003 -> { + val modelDir = + "sherpa-onnx-qnn-13-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8-android-aarch64" + return OfflineModelConfig( + provider = "qnn", + senseVoice = OfflineSenseVoiceModelConfig( + model = "$modelDir/libmodel.so", + qnnConfig = QnnConfig( + backendLib = "libQnnHtp.so", + systemLib = "libQnnSystem.so", + contextBinary = "$modelDir/model.bin", + ), + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 9004 -> { + val modelDir = + "sherpa-onnx-qnn-15-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8-android-aarch64" + return OfflineModelConfig( + provider = "qnn", + senseVoice = OfflineSenseVoiceModelConfig( + model = "$modelDir/libmodel.so", + qnnConfig = QnnConfig( + backendLib = "libQnnHtp.so", + systemLib = "libQnnSystem.so", + contextBinary = "$modelDir/model.bin", + ), + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 9005 -> { + val modelDir = + "sherpa-onnx-qnn-18-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8-android-aarch64" + return OfflineModelConfig( + provider = "qnn", + senseVoice = OfflineSenseVoiceModelConfig( + model = "$modelDir/libmodel.so", + qnnConfig = QnnConfig( + backendLib = "libQnnHtp.so", + systemLib = "libQnnSystem.so", + contextBinary = "$modelDir/model.bin", + ), + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 9006 -> { + val modelDir = + "sherpa-onnx-qnn-20-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8-android-aarch64" + return OfflineModelConfig( + provider = "qnn", + senseVoice = OfflineSenseVoiceModelConfig( + model = "$modelDir/libmodel.so", + qnnConfig = QnnConfig( + backendLib = "libQnnHtp.so", + systemLib = "libQnnSystem.so", + contextBinary = "$modelDir/model.bin", + ), + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 9007 -> { + val modelDir = + "sherpa-onnx-qnn-23-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8-android-aarch64" + return OfflineModelConfig( + provider = "qnn", + senseVoice = OfflineSenseVoiceModelConfig( + model = "$modelDir/libmodel.so", + qnnConfig = QnnConfig( + backendLib = "libQnnHtp.so", + systemLib = "libQnnSystem.so", + contextBinary = "$modelDir/model.bin", + ), + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 9008 -> { + val modelDir = + "sherpa-onnx-qnn-25-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8-android-aarch64" + return OfflineModelConfig( + provider = "qnn", + senseVoice = OfflineSenseVoiceModelConfig( + model = "$modelDir/libmodel.so", + qnnConfig = QnnConfig( + backendLib = "libQnnHtp.so", + systemLib = "libQnnSystem.so", + contextBinary = "$modelDir/model.bin", + ), + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 9009 -> { + val modelDir = + "sherpa-onnx-qnn-28-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8-android-aarch64" + return OfflineModelConfig( + provider = "qnn", + senseVoice = OfflineSenseVoiceModelConfig( + model = "$modelDir/libmodel.so", + qnnConfig = QnnConfig( + backendLib = "libQnnHtp.so", + systemLib = "libQnnSystem.so", + contextBinary = "$modelDir/model.bin", + ), + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 9010 -> { + val modelDir = + "sherpa-onnx-qnn-30-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8-android-aarch64" + return OfflineModelConfig( + provider = "qnn", + senseVoice = OfflineSenseVoiceModelConfig( + model = "$modelDir/libmodel.so", + qnnConfig = QnnConfig( + backendLib = "libQnnHtp.so", + systemLib = "libQnnSystem.so", + contextBinary = "$modelDir/model.bin", + ), + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 9011 -> { + val modelDir = + "sherpa-onnx-qnn-5-seconds-zipformer-ctc-zh-2025-07-03-int8-android-aarch64" + return OfflineModelConfig( + provider = "qnn", + zipformerCtc = OfflineZipformerCtcModelConfig( + model = "$modelDir/libmodel.so", + qnnConfig = QnnConfig( + backendLib = "libQnnHtp.so", + systemLib = "libQnnSystem.so", + contextBinary = "$modelDir/model.bin", + ), + ), + tokens = "$modelDir/tokens.txt", + debug = true, + ) + } + + 9012 -> { + val modelDir = + "sherpa-onnx-qnn-8-seconds-zipformer-ctc-zh-2025-07-03-int8-android-aarch64" + return OfflineModelConfig( + provider = "qnn", + zipformerCtc = OfflineZipformerCtcModelConfig( + model = "$modelDir/libmodel.so", + qnnConfig = QnnConfig( + backendLib = "libQnnHtp.so", + systemLib = "libQnnSystem.so", + contextBinary = "$modelDir/model.bin", + ), + ), + tokens = "$modelDir/tokens.txt", + debug = true, + ) + } + + 9013 -> { + val modelDir = + "sherpa-onnx-qnn-10-seconds-zipformer-ctc-zh-2025-07-03-int8-android-aarch64" + return OfflineModelConfig( + provider = "qnn", + zipformerCtc = OfflineZipformerCtcModelConfig( + model = "$modelDir/libmodel.so", + qnnConfig = QnnConfig( + backendLib = "libQnnHtp.so", + systemLib = "libQnnSystem.so", + contextBinary = "$modelDir/model.bin", + ), + ), + tokens = "$modelDir/tokens.txt", + debug = true, + ) + } + + 9014 -> { + val modelDir = + "sherpa-onnx-qnn-13-seconds-zipformer-ctc-zh-2025-07-03-int8-android-aarch64" + return OfflineModelConfig( + provider = "qnn", + zipformerCtc = OfflineZipformerCtcModelConfig( + model = "$modelDir/libmodel.so", + qnnConfig = QnnConfig( + backendLib = "libQnnHtp.so", + systemLib = "libQnnSystem.so", + contextBinary = "$modelDir/model.bin", + ), + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 9015 -> { + val modelDir = + "sherpa-onnx-qnn-15-seconds-zipformer-ctc-zh-2025-07-03-int8-android-aarch64" + return OfflineModelConfig( + provider = "qnn", + zipformerCtc = OfflineZipformerCtcModelConfig( + model = "$modelDir/libmodel.so", + qnnConfig = QnnConfig( + backendLib = "libQnnHtp.so", + systemLib = "libQnnSystem.so", + contextBinary = "$modelDir/model.bin", + ), + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 9016 -> { + val modelDir = + "sherpa-onnx-qnn-18-seconds-zipformer-ctc-zh-2025-07-03-int8-android-aarch64" + return OfflineModelConfig( + provider = "qnn", + zipformerCtc = OfflineZipformerCtcModelConfig( + model = "$modelDir/libmodel.so", + qnnConfig = QnnConfig( + backendLib = "libQnnHtp.so", + systemLib = "libQnnSystem.so", + contextBinary = "$modelDir/model.bin", + ), + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 9017 -> { + val modelDir = + "sherpa-onnx-qnn-20-seconds-zipformer-ctc-zh-2025-07-03-int8-android-aarch64" + return OfflineModelConfig( + provider = "qnn", + zipformerCtc = OfflineZipformerCtcModelConfig( + model = "$modelDir/libmodel.so", + qnnConfig = QnnConfig( + backendLib = "libQnnHtp.so", + systemLib = "libQnnSystem.so", + contextBinary = "$modelDir/model.bin", + ), + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 9018 -> { + val modelDir = + "sherpa-onnx-qnn-23-seconds-zipformer-ctc-zh-2025-07-03-int8-android-aarch64" + return OfflineModelConfig( + provider = "qnn", + zipformerCtc = OfflineZipformerCtcModelConfig( + model = "$modelDir/libmodel.so", + qnnConfig = QnnConfig( + backendLib = "libQnnHtp.so", + systemLib = "libQnnSystem.so", + contextBinary = "$modelDir/model.bin", + ), + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 9019 -> { + val modelDir = + "sherpa-onnx-qnn-25-seconds-zipformer-ctc-zh-2025-07-03-int8-android-aarch64" + return OfflineModelConfig( + provider = "qnn", + zipformerCtc = OfflineZipformerCtcModelConfig( + model = "$modelDir/libmodel.so", + qnnConfig = QnnConfig( + backendLib = "libQnnHtp.so", + systemLib = "libQnnSystem.so", + contextBinary = "$modelDir/model.bin", + ), + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 9020 -> { + val modelDir = + "sherpa-onnx-qnn-28-seconds-zipformer-ctc-zh-2025-07-03-int8-android-aarch64" + return OfflineModelConfig( + provider = "qnn", + zipformerCtc = OfflineZipformerCtcModelConfig( + model = "$modelDir/libmodel.so", + qnnConfig = QnnConfig( + backendLib = "libQnnHtp.so", + systemLib = "libQnnSystem.so", + contextBinary = "$modelDir/model.bin", + ), + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 9021 -> { + val modelDir = + "sherpa-onnx-qnn-30-seconds-zipformer-ctc-zh-2025-07-03-int8-android-aarch64" + return OfflineModelConfig( + provider = "qnn", + zipformerCtc = OfflineZipformerCtcModelConfig( + model = "$modelDir/libmodel.so", + qnnConfig = QnnConfig( + backendLib = "libQnnHtp.so", + systemLib = "libQnnSystem.so", + contextBinary = "$modelDir/model.bin", + ), + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 9022 -> { + // for Xiaomi 17 Pro + val modelDir = + "sherpa-onnx-qnn-SM8850-binary-10-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8" + return OfflineModelConfig( + provider = "qnn", + senseVoice = OfflineSenseVoiceModelConfig( + qnnConfig = QnnConfig( + // Please copy libQnnHtp.so and libQnnSystem.so to jniLibs/arm64-v8a by yourself + backendLib = "libQnnHtp.so", + systemLib = "libQnnSystem.so", + contextBinary = "$modelDir/model.bin", + ), + ), + tokens = "$modelDir/tokens.txt", + debug = true, + ) + } + + 9023 -> { + val modelDir = "sherpa-onnx-qnn-5-seconds-paraformer-zh-2023-03-28-int8-android-aarch64" + return OfflineModelConfig( + provider = "qnn", + paraformer = OfflineParaformerModelConfig( + model = "$modelDir/libencoder.so,$modelDir/libpredictor.so,$modelDir/libdecoder.so", + qnnConfig = QnnConfig( + backendLib = "libQnnHtp.so", + systemLib = "libQnnSystem.so", + // The following three *.bin files are generated during the first run + // and are used to replace the corresponding *.so files in later runs + contextBinary = "$modelDir/encoder.bin,$modelDir/predictor.bin,$modelDir/decoder.bin", + ), + ), + tokens = "$modelDir/tokens.txt", + debug = true, + ) + } + + 9024 -> { + val modelDir = "sherpa-onnx-qnn-5-seconds-paraformer-zh-2025-10-07-int8-android-aarch64" + return OfflineModelConfig( + provider = "qnn", + paraformer = OfflineParaformerModelConfig( + model = "$modelDir/libencoder.so,$modelDir/libpredictor.so,$modelDir/libdecoder.so", + qnnConfig = QnnConfig( + backendLib = "libQnnHtp.so", + systemLib = "libQnnSystem.so", + // The following three *.bin files are generated during the first run + // and are used to replace the corresponding *.so files in later runs + contextBinary = "$modelDir/encoder.bin,$modelDir/predictor.bin,$modelDir/decoder.bin", + ), + ), + tokens = "$modelDir/tokens.txt", + debug = true, + ) + } + + 9025 -> { + // for Xiaomi 17 Pro + val modelDir = "sherpa-onnx-qnn-SM8850-binary-5-seconds-paraformer-zh-2023-03-28-int8" + return OfflineModelConfig( + provider = "qnn", + paraformer = OfflineParaformerModelConfig( + qnnConfig = QnnConfig( + backendLib = "libQnnHtp.so", + systemLib = "libQnnSystem.so", + contextBinary = "$modelDir/encoder.bin,$modelDir/predictor.bin,$modelDir/decoder.bin", + ), + ), + tokens = "$modelDir/tokens.txt", + debug = true, + ) + } + } + return null +} diff --git a/app/src/main/java/com/k2fsa/sherpa/onnx/OfflineStream.kt b/app/src/main/java/com/k2fsa/sherpa/onnx/OfflineStream.kt new file mode 100644 index 0000000..96393a7 --- /dev/null +++ b/app/src/main/java/com/k2fsa/sherpa/onnx/OfflineStream.kt @@ -0,0 +1,38 @@ +package com.k2fsa.sherpa.onnx + +class OfflineStream(var ptr: Long) { + fun acceptWaveform(samples: FloatArray, sampleRate: Int) = + acceptWaveform(ptr, samples, sampleRate) + + fun setOption(key: String, value: String) = setOption(ptr, key, value) + + fun getOption(key: String): String = getOption(ptr, key) + + protected fun finalize() { + if (ptr != 0L) { + delete(ptr) + ptr = 0 + } + } + + fun release() = finalize() + + fun use(block: (OfflineStream) -> Unit) { + try { + block(this) + } finally { + release() + } + } + + private external fun acceptWaveform(ptr: Long, samples: FloatArray, sampleRate: Int) + private external fun setOption(ptr: Long, key: String, value: String) + private external fun getOption(ptr: Long, key: String): String + private external fun delete(ptr: Long) + + companion object { + init { + System.loadLibrary("sherpa-onnx-jni") + } + } +} diff --git a/app/src/main/java/com/k2fsa/sherpa/onnx/QnnConfig.kt b/app/src/main/java/com/k2fsa/sherpa/onnx/QnnConfig.kt new file mode 100644 index 0000000..ea7aa51 --- /dev/null +++ b/app/src/main/java/com/k2fsa/sherpa/onnx/QnnConfig.kt @@ -0,0 +1,7 @@ +package com.k2fsa.sherpa.onnx + +data class QnnConfig( + var backendLib: String = "", + var contextBinary: String = "", + var systemLib: String = "", +) diff --git a/app/src/main/java/com/k2fsa/sherpa/onnx/Vad.kt b/app/src/main/java/com/k2fsa/sherpa/onnx/Vad.kt new file mode 100644 index 0000000..8407e3a --- /dev/null +++ b/app/src/main/java/com/k2fsa/sherpa/onnx/Vad.kt @@ -0,0 +1,149 @@ +// Copyright (c) 2023 Xiaomi Corporation +package com.k2fsa.sherpa.onnx + +import android.content.res.AssetManager + +data class SileroVadModelConfig( + var model: String = "", + var threshold: Float = 0.5F, + var minSilenceDuration: Float = 0.25F, + var minSpeechDuration: Float = 0.25F, + var windowSize: Int = 512, + var maxSpeechDuration: Float = 5.0F, +) + +data class TenVadModelConfig( + var model: String = "", + var threshold: Float = 0.5F, + var minSilenceDuration: Float = 0.25F, + var minSpeechDuration: Float = 0.25F, + var windowSize: Int = 256, + var maxSpeechDuration: Float = 5.0F, +) + +data class VadModelConfig( + var sileroVadModelConfig: SileroVadModelConfig = SileroVadModelConfig(), + var tenVadModelConfig: TenVadModelConfig = TenVadModelConfig(), + var sampleRate: Int = 16000, + var numThreads: Int = 1, + var provider: String = "cpu", + var debug: Boolean = false, +) + +class SpeechSegment(val start: Int, val samples: FloatArray) + +class Vad( + assetManager: AssetManager? = null, + var config: VadModelConfig, +) { + private var ptr: Long + + init { + if (assetManager != null) { + ptr = newFromAsset(assetManager, config) + } else { + ptr = newFromFile(config) + } + } + + protected fun finalize() { + if (ptr != 0L) { + delete(ptr) + ptr = 0 + } + } + + fun release() = finalize() + + fun compute(samples: FloatArray): Float = compute(ptr, samples) + + + fun acceptWaveform(samples: FloatArray) = acceptWaveform(ptr, samples) + + fun empty(): Boolean = empty(ptr) + fun pop() = pop(ptr) + + fun front(): SpeechSegment { + return front(ptr) + } + + fun clear() = clear(ptr) + + fun isSpeechDetected(): Boolean = isSpeechDetected(ptr) + + fun reset() = reset(ptr) + + fun flush() = flush(ptr) + + private external fun delete(ptr: Long) + + private external fun newFromAsset( + assetManager: AssetManager, + config: VadModelConfig, + ): Long + + private external fun newFromFile( + config: VadModelConfig, + ): Long + + private external fun acceptWaveform(ptr: Long, samples: FloatArray) + private external fun compute(ptr: Long, samples: FloatArray): Float + + private external fun empty(ptr: Long): Boolean + private external fun pop(ptr: Long) + private external fun clear(ptr: Long) + private external fun front(ptr: Long): SpeechSegment + private external fun isSpeechDetected(ptr: Long): Boolean + private external fun reset(ptr: Long) + private external fun flush(ptr: Long) + + companion object { + init { + System.loadLibrary("sherpa-onnx-jni") + } + } +} + +// Please visit +// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx +// to download silero_vad.onnx +// and put it inside the assets/ +// directory +// +// For ten-vad, please use +// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx +// +fun getVadModelConfig(type: Int): VadModelConfig? { + when (type) { + 0 -> { + return VadModelConfig( + sileroVadModelConfig = SileroVadModelConfig( + model = "silero_vad.onnx", + threshold = 0.5F, + minSilenceDuration = 0.25F, + minSpeechDuration = 0.25F, + windowSize = 512, + ), + sampleRate = 16000, + numThreads = 1, + provider = "cpu", + ) + } + + 1 -> { + return VadModelConfig( + tenVadModelConfig = TenVadModelConfig( + model = "ten-vad.onnx", + threshold = 0.5F, + minSilenceDuration = 0.25F, + minSpeechDuration = 0.25F, + windowSize = 256, + ), + sampleRate = 16000, + numThreads = 1, + provider = "cpu", + ) + } + } + return null +} diff --git a/app/src/main/java/com/lila/wakeup/Config.kt b/app/src/main/java/com/lila/wakeup/Config.kt index 9bca245..2d572c1 100644 --- a/app/src/main/java/com/lila/wakeup/Config.kt +++ b/app/src/main/java/com/lila/wakeup/Config.kt @@ -47,6 +47,12 @@ object Config { /** PAUSE/RESUME Extras: 调用原因(透传日志) */ const val EXTRA_REASON = "reason" + /** + * 内部广播 Action(仅本 APP 自己接收,用于 MainActivity UI 实时显示命中)。 + * 与对外协议 [ACTION_WAKEUP] 隔离,避免外部 APP 干扰内部 UI 状态。 + */ + const val ACTION_INTERNAL_WAKEUP = "com.lila.wakeup.action.INTERNAL_WAKEUP" + // ============================================================ // 二、引擎参数 // ============================================================ @@ -57,14 +63,10 @@ object Config { /** APP 漏发 RESUME 时的兜底超时(2 分钟) */ const val PAUSE_TIMEOUT_MS = 2 * 60 * 1000L - /** KWS 主阈值("你好Lila" 主词) */ - const val KWS_THRESHOLD_PRIMARY = 0.85f - - /** KWS 次阈值(其他变体词) */ - const val KWS_THRESHOLD_SECONDARY = 0.80f - - /** 后验平滑:连续 N 帧 confidence > 阈值才算命中 */ - const val SMOOTH_FRAMES = 2 + // 注:KWS_THRESHOLD / SMOOTH_FRAMES 已移除,使用 sherpa-onnx 默认值 + // - keywordsThreshold 默认 0.25 (KeywordSpotterConfig) + // - 不做外部后验平滑(sherpa-onnx 内部 numTrailingBlanks 已平滑) + // 原因见 KwsEngine.kt / KwsStateMachine.kt 注释 // ============================================================ // 三、AudioRecord 配置 @@ -79,24 +81,67 @@ object Config { /** 16-bit PCM */ const val ENCODING = AudioFormat.ENCODING_PCM_16BIT - /** 单帧采样点数(10ms 帧 @ 16kHz = 160 samples) */ - const val FRAME_SAMPLES = SAMPLE_RATE / 100 + /** + * 单帧采样点数(100ms 帧 @ 16kHz = 1600 samples)。 + * + * ⚠️ 必须与 sherpa-onnx 官方 demo 一致(用 100ms 帧)。 + * 之前用 10ms 帧(160 sample)第一次能识别,之后再无命中——sherpa-onnx + * 内部 chunk 累积/边界处理对小帧不友好。改用 100ms 后稳定。 + */ + const val FRAME_SAMPLES = SAMPLE_RATE / 10 // ============================================================ // 四、模型路径(assets 内) // ============================================================ /** - * 模型目录,与 sherpa-onnx 官方 demo 一致。 - * 切换模型时改这一行 + 对应的 keywords.txt。 + * 架构演进历史: + * - v0: KWS wenetspeech 3.3M (2024) —— 默认 keywords 命中率 30-40% + * - v1: KWS zh-en 双语 3M —— 英文 Lila 0% 命中 + * - v2: ASR (SenseVoice 228MB) + 拼音替换 —— 板载 mic 全部 empty(模型对短词不敏感) + * - v3: KWS Zipformer wenetspeech-3.3M + 自定义谐音 keywords.txt 流式架构(当前) + * + * v3 关键改动: + * keywords.txt 用多个谐音变体覆盖("你好丽啦/咪啦/丽拉/lai啦"等) + * 流式实时识别,不再切 segment,不再依赖 VAD/能量门 + * 模型 5MB(vs SenseVoice 228MB),推理快 10x */ - const val MODEL_DIR = "sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01" + const val KWS_MODEL_DIR = "kws" + const val KWS_ENCODER = "kws/encoder.int8.onnx" + const val KWS_DECODER = "kws/decoder.onnx" + const val KWS_JOINER = "kws/joiner.int8.onnx" + const val KWS_TOKENS = "kws/tokens.txt" + const val KWS_KEYWORDS_FILE = "kws/keywords.txt" - const val MODEL_ENCODER = "$MODEL_DIR/encoder-epoch-12-avg-2-chunk-16-left-64.onnx" - const val MODEL_DECODER = "$MODEL_DIR/decoder-epoch-12-avg-2-chunk-16-left-64.onnx" - const val MODEL_JOINER = "$MODEL_DIR/joiner-epoch-12-avg-2-chunk-16-left-64.onnx" - const val MODEL_TOKENS = "$MODEL_DIR/tokens.txt" - const val MODEL_KEYWORDS = "$MODEL_DIR/keywords.txt" + /** KWS 推理线程数 */ + const val KWS_NUM_THREADS = 2 + + /** + * 关键词命中阈值(0~1 之间),越低越容易触发。 + * 板载 mic 弱信号 + Lila 非训练词,激进降到 0.05。 + * 误触发风险:需要观察 listening 待机时是否被环境噪声误唤醒。 + */ + const val KWS_THRESHOLD = 0.05f + + /** + * 关键词命中加分,越大越容易触发。弱信号场景拉到 3.0。 + */ + const val KWS_SCORE = 3.0f + + /** + * AudioCapture 软件预增益(在喂 KWS 之前)。 + * 板载 mic 信号偏弱,1.5x 增益让说话峰值更接近满量程,KWS 神经网络 + * 看到的特征对比度更高。clamp 到 [-32768, 32767] 防止溢出失真。 + */ + const val AUDIO_CAPTURE_GAIN = 1.5f + + /** + * 调试: 命中前后 dump 一段 PCM 到 WAV 文件,辅助调试。 + * 路径: /sdcard/Android/data/com.lila.wakeup/files/lila_kws/ + * 拉回: adb pull /sdcard/Android/data/com.lila.wakeup/files/lila_kws/ ./ + * 上线前关掉。 + */ + const val DUMP_HIT_WAV = true // ============================================================ // 五、通知栏 diff --git a/app/src/main/java/com/lila/wakeup/MainActivity.kt b/app/src/main/java/com/lila/wakeup/MainActivity.kt index 02773b1..fb6efbb 100644 --- a/app/src/main/java/com/lila/wakeup/MainActivity.kt +++ b/app/src/main/java/com/lila/wakeup/MainActivity.kt @@ -1,15 +1,22 @@ package com.lila.wakeup import android.Manifest +import android.content.BroadcastReceiver +import android.content.Context import android.content.Intent +import android.content.IntentFilter import android.content.pm.PackageManager import android.os.Build import android.os.Bundle +import android.text.method.ScrollingMovementMethod import android.util.Log import android.widget.TextView import androidx.appcompat.app.AppCompatActivity import androidx.core.app.ActivityCompat import androidx.core.content.ContextCompat +import java.text.SimpleDateFormat +import java.util.Date +import java.util.Locale /** * 状态查看 Activity(开发期辅助 + 首次启动权限申请入口)。 @@ -30,21 +37,58 @@ class MainActivity : AppCompatActivity() { } private lateinit var statusText: TextView + private var wakeupCount = 0 + private val timeFmt = SimpleDateFormat("HH:mm:ss", Locale.getDefault()) + + /** + * 内部唤醒事件接收器,与外部协议广播 [Config.ACTION_WAKEUP] 隔离。 + * 收到后追加到 [statusText] 显示,方便用户在 APP 界面直接看效果。 + */ + private val wakeupReceiver = object : BroadcastReceiver() { + override fun onReceive(context: Context, intent: Intent) { + if (intent.action != Config.ACTION_INTERNAL_WAKEUP) return + val keyword = intent.getStringExtra(Config.EXTRA_KEYWORD) ?: "?" + val confidence = intent.getFloatExtra(Config.EXTRA_CONFIDENCE, 0f) + val ts = intent.getLongExtra(Config.EXTRA_TIMESTAMP, System.currentTimeMillis()) + val timeStr = timeFmt.format(Date(ts)) + wakeupCount++ + statusText.append("\n[$timeStr] #$wakeupCount 命中: $keyword (conf=$confidence)") + Log.i(TAG, "UI shown wakeup #$wakeupCount keyword=$keyword") + } + } override fun onCreate(savedInstanceState: Bundle?) { super.onCreate(savedInstanceState) - // 简化:用一个 TextView 直接显示状态,不用 layout xml + // 简化:用一个可滚动的 TextView 直接显示状态 + 唤醒事件 statusText = TextView(this).apply { text = "Lila 语音唤醒服务\n\n初始化中..." textSize = 18f setPadding(60, 60, 60, 60) + movementMethod = ScrollingMovementMethod() } setContentView(statusText) + // 注册内部唤醒事件接收器(Android 13+ 必须显式声明 NOT_EXPORTED) + val filter = IntentFilter(Config.ACTION_INTERNAL_WAKEUP) + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.TIRAMISU) { + registerReceiver(wakeupReceiver, filter, RECEIVER_NOT_EXPORTED) + } else { + registerReceiver(wakeupReceiver, filter) + } + checkAndRequestPermissions() } + override fun onDestroy() { + super.onDestroy() + try { + unregisterReceiver(wakeupReceiver) + } catch (_: IllegalArgumentException) { + // 接收器从未注册,忽略 + } + } + private fun checkAndRequestPermissions() { val needRequestList = mutableListOf() diff --git a/app/src/main/java/com/lila/wakeup/WakeupForegroundService.kt b/app/src/main/java/com/lila/wakeup/WakeupForegroundService.kt index 93d1b61..b839c69 100644 --- a/app/src/main/java/com/lila/wakeup/WakeupForegroundService.kt +++ b/app/src/main/java/com/lila/wakeup/WakeupForegroundService.kt @@ -12,8 +12,8 @@ import android.os.Build import android.os.IBinder import android.util.Log import androidx.core.app.NotificationCompat +import com.lila.wakeup.kws.AsrEngine import com.lila.wakeup.kws.AudioCapture -import com.lila.wakeup.kws.KwsEngine import com.lila.wakeup.kws.KwsStateMachine import com.lila.wakeup.protocol.BroadcastSender import com.lila.wakeup.protocol.WakeupServiceLocator @@ -34,7 +34,7 @@ class WakeupForegroundService : Service() { private const val TAG = "KwsService.Svc" } - private lateinit var engine: KwsEngine + private lateinit var engine: AsrEngine private lateinit var stateMachine: KwsStateMachine private lateinit var audioCapture: AudioCapture private lateinit var sender: BroadcastSender @@ -48,7 +48,7 @@ class WakeupForegroundService : Service() { // 初始化引擎栈(顺序:sender → engine → state → audio) sender = BroadcastSender(applicationContext) - engine = KwsEngine(applicationContext).also { it.init() } + engine = AsrEngine(applicationContext).also { it.init() } stateMachine = KwsStateMachine( engine = engine, sender = sender, diff --git a/app/src/main/java/com/lila/wakeup/kws/AsrEngine.kt b/app/src/main/java/com/lila/wakeup/kws/AsrEngine.kt new file mode 100644 index 0000000..5d40a68 --- /dev/null +++ b/app/src/main/java/com/lila/wakeup/kws/AsrEngine.kt @@ -0,0 +1,186 @@ +package com.lila.wakeup.kws + +import android.content.Context +import android.util.Log +import com.k2fsa.sherpa.onnx.FeatureConfig +import com.k2fsa.sherpa.onnx.KeywordSpotter +import com.k2fsa.sherpa.onnx.KeywordSpotterConfig +import com.k2fsa.sherpa.onnx.OnlineModelConfig +import com.k2fsa.sherpa.onnx.OnlineStream +import com.k2fsa.sherpa.onnx.OnlineTransducerModelConfig +import com.lila.wakeup.Config +import java.io.File +import java.io.FileOutputStream +import java.nio.ByteBuffer +import java.nio.ByteOrder +import java.util.concurrent.atomic.AtomicInteger + +/** + * 流式 Keyword Spotter 唤醒引擎(v3 架构)。 + * + * 工作流: + * 1. AudioCapture 喂 100ms PCM 帧 + * 2. KeywordSpotter (sherpa-onnx Zipformer-2 流式 KWS) 持续接收音频 + * 3. 模型实时匹配 keywords.txt 里的拼音规则 + * 4. 命中即触发 onWakeup 回调 + * + * 优势: + * - 流式实时识别,延迟 100~200ms(对比 ASR 切段 1~2 秒) + * - 模型 5MB(SenseVoice 228MB),启动快、内存低 + * - 专为短唤醒词训练,识别率高 + * - 不再依赖 VAD / 能量门 / DC 去除 + * - keywords.txt 多变体覆盖("你好丽啦/咪啦/丽拉..."),修改不用改代码 + * + * keywords.txt 格式: + * <拼音 token1> ... @<显示名> + * 例: n ǐ h ǎo l ì l ā @你好Lila + * + * @param onWakeup 命中回调(在 ASR 推理线程调用) + */ +class AsrEngine( + private val context: Context, + var onWakeup: ((keyword: String) -> Unit)? = null, +) { + companion object { + private const val TAG = "KwsService.Engine" + } + + private var spotter: KeywordSpotter? = null + private var stream: OnlineStream? = null + + private val dumpCounter = AtomicInteger(0) + + fun init() { + Log.i(TAG, "[Engine] init start") + + val config = KeywordSpotterConfig( + featConfig = FeatureConfig(sampleRate = Config.SAMPLE_RATE, featureDim = 80), + modelConfig = OnlineModelConfig( + transducer = OnlineTransducerModelConfig( + encoder = Config.KWS_ENCODER, + decoder = Config.KWS_DECODER, + joiner = Config.KWS_JOINER, + ), + tokens = Config.KWS_TOKENS, + modelType = "zipformer2", + numThreads = Config.KWS_NUM_THREADS, + provider = "cpu", + ), + keywordsFile = Config.KWS_KEYWORDS_FILE, + keywordsScore = Config.KWS_SCORE, + keywordsThreshold = Config.KWS_THRESHOLD, + numTrailingBlanks = 2, + ) + spotter = KeywordSpotter(assetManager = context.assets, config = config) + stream = spotter!!.createStream() + + Log.i(TAG, "[Engine] init done (KWS Zipformer + keywords.txt loaded)") + } + + /** + * 喂一帧 PCM 数据。流式识别,命中立即回调。 + */ + fun feedAudio(pcm: ShortArray) { + val s = stream ?: return + val sp = spotter ?: return + + // PCM short → float [-1, 1] + val samples = FloatArray(pcm.size) { pcm[it] / 32768f } + s.acceptWaveform(samples, Config.SAMPLE_RATE) + + // 持续 decode 直到没有新数据 + while (sp.isReady(s)) { + sp.decode(s) + } + + // 检查命中 + val result = sp.getResult(s) + if (result.keyword.isNotEmpty()) { + Log.i(TAG, "[KWS] HIT keyword=\"${result.keyword}\" tokens=${result.tokens.joinToString(",")}") + onWakeup?.invoke(result.keyword) + + // 命中后 reset stream,等待下一次唤醒 + sp.reset(s) + + // 命中时 dump 最近 PCM 到 WAV(诊断用) + if (Config.DUMP_HIT_WAV) { + dumpHitWav(pcm) + } + } + } + + /** + * 重置识别状态。在 PAUSE/RESUME 切换时调用。 + */ + fun reset() { + val s = stream ?: return + val sp = spotter ?: return + sp.reset(s) + Log.i(TAG, "[Engine] reset (stream cleared)") + } + + /** + * 释放 native 资源。 + */ + fun release() { + try { + stream?.release() + spotter?.release() + } catch (e: Exception) { + Log.e(TAG, "release failed: ${e.message}") + } + stream = null + spotter = null + Log.i(TAG, "[Engine] released") + } + + // ============================================================ + // WAV dump(诊断用) + // ============================================================ + + /** + * 命中时 dump 最近一帧 PCM 到 WAV。 + * + * 路径: /sdcard/Android/data/com.lila.wakeup/files/lila_kws/hit_NNN.wav + * 拉回: adb pull /sdcard/Android/data/com.lila.wakeup/files/lila_kws/ ./ + */ + private fun dumpHitWav(pcm: ShortArray) { + try { + val baseDir = context.getExternalFilesDir(null) ?: context.filesDir + val dir = File(baseDir, "lila_kws") + if (!dir.exists()) dir.mkdirs() + + val idx = dumpCounter.incrementAndGet() + val file = File(dir, "hit_${idx.toString().padStart(3, '0')}.wav") + + val dataBytes = pcm.size * 2 + val totalBytes = 36 + dataBytes + val byteRate = Config.SAMPLE_RATE * 2 + val header = ByteBuffer.allocate(44).order(ByteOrder.LITTLE_ENDIAN).apply { + put("RIFF".toByteArray()) + putInt(totalBytes) + put("WAVE".toByteArray()) + put("fmt ".toByteArray()) + putInt(16) + putShort(1) + putShort(1) + putInt(Config.SAMPLE_RATE) + putInt(byteRate) + putShort(2) + putShort(16) + put("data".toByteArray()) + putInt(dataBytes) + }.array() + + FileOutputStream(file).use { fos -> + fos.write(header) + val data = ByteBuffer.allocate(dataBytes).order(ByteOrder.LITTLE_ENDIAN) + for (s in pcm) data.putShort(s) + fos.write(data.array()) + } + Log.i(TAG, "[Dump] hit WAV saved: ${file.absolutePath}") + } catch (e: Exception) { + Log.e(TAG, "[Dump] failed: ${e.message}", e) + } + } +} diff --git a/app/src/main/java/com/lila/wakeup/kws/AudioCapture.kt b/app/src/main/java/com/lila/wakeup/kws/AudioCapture.kt index 0efd567..3a4c294 100644 --- a/app/src/main/java/com/lila/wakeup/kws/AudioCapture.kt +++ b/app/src/main/java/com/lila/wakeup/kws/AudioCapture.kt @@ -10,13 +10,16 @@ import com.lila.wakeup.Config /** * 麦克风音频采集封装。 * - * 设计要点: - * - AudioSource 用 VOICE_RECOGNITION(自动启用 NS/AGC,省去自己降噪) - * - 16kHz / mono / 16-bit PCM(与 sherpa-onnx 模型严格对齐) - * - 10ms 帧(160 sample)便于和 VAD 对齐 - * - 暂停时完全 release AudioRecord,释放麦克风给 RTC SDK + * 设计要点: + * - AudioSource 用 VOICE_RECOGNITION:启用 Android 系统 AGC + NS(自动增益 + 降噪) + * 板载 mic 信号弱(说话峰值 16000 vs 底噪 5000,SNR ~10dB),用 MIC raw 信号 + * 经 SenseVoice 识别后输出全空。VOICE_RECOGNITION 走系统 ASR 标准路径,硬件 AGC + * 把弱信号拉到合理量程,实测 ASR 命中率显著提升。 + * - 16kHz / mono / 16-bit PCM(与 sherpa-onnx 模型严格对齐) + * - 100ms 帧(1600 sample),过小帧会导致 sherpa-onnx 内部 chunk 边界异常 + * - 暂停时完全 release AudioRecord,释放麦克风给 RTC SDK * - * 注:这是常驻读取线程,不是 ForegroundService 主线程。 + * 注:这是常驻读取线程,不是 ForegroundService 主线程。 */ class AudioCapture( /** 每读到一帧 PCM 数据时回调,参数为 16-bit PCM short[] */ @@ -40,11 +43,14 @@ class AudioCapture( val minBuf = AudioRecord.getMinBufferSize( Config.SAMPLE_RATE, Config.CHANNEL_CONFIG, Config.ENCODING ) - // 至少 100ms ring buffer,多于 minBuf 防止音频丢帧 - val bufSize = maxOf(minBuf, Config.SAMPLE_RATE * 2 * 100 / 1000) + // ring buffer 必须 >= 一帧字节数,否则 read(1600) 在 1280 字节 buffer 上读不全, + // 数据断帧导致 VAD/ASR 收到的是部分 + 历史脏数据。取 max(minBuf*2, frameBytes*4) + // 给 100ms 帧充足缓冲,同时不丢帧。 + val frameBytes = Config.FRAME_SAMPLES * 2 // 16-bit PCM = 2 字节/样本 + val bufSize = maxOf(minBuf * 2, frameBytes * 4) record = AudioRecord.Builder() - .setAudioSource(MediaRecorder.AudioSource.VOICE_RECOGNITION) + .setAudioSource(MediaRecorder.AudioSource.VOICE_RECOGNITION) // 启用系统 AGC/NS .setAudioFormat( AudioFormat.Builder() .setSampleRate(Config.SAMPLE_RATE) @@ -67,9 +73,31 @@ class AudioCapture( thread = Thread { val frame = ShortArray(Config.FRAME_SAMPLES) + val gain = Config.AUDIO_CAPTURE_GAIN + var frameCount = 0L while (running) { val n = record?.read(frame, 0, frame.size) ?: 0 if (n > 0) { + // 软件预增益: 板载 mic 信号偏弱,放大让 KWS 看到更显著特征 + if (gain != 1.0f) { + for (i in 0 until n) { + val v = (frame[i] * gain).toInt() + frame[i] = when { + v > 32767 -> 32767 + v < -32768 -> -32768 + else -> v.toShort() + } + } + } + // 每 10 帧 (1 秒) 打印一次音量峰值 + if (++frameCount % 10 == 0L) { + var peak = 0 + for (i in 0 until n) { + val v = if (frame[i] >= 0) frame[i].toInt() else -frame[i].toInt() + if (v > peak) peak = v + } + Log.i(TAG, "[Audio] frame#$frameCount n=$n peak=$peak (gain=${gain}x)") + } onFrame(frame) } else if (n < 0) { Log.e(TAG, "AudioRecord.read error code=$n, abort") diff --git a/app/src/main/java/com/lila/wakeup/kws/KwsEngine.kt b/app/src/main/java/com/lila/wakeup/kws/KwsEngine.kt deleted file mode 100644 index 9f2f86d..0000000 --- a/app/src/main/java/com/lila/wakeup/kws/KwsEngine.kt +++ /dev/null @@ -1,119 +0,0 @@ -package com.lila.wakeup.kws - -import android.content.Context -import android.util.Log -import com.k2fsa.sherpa.onnx.KeywordSpotter -import com.k2fsa.sherpa.onnx.KeywordSpotterConfig -import com.k2fsa.sherpa.onnx.OnlineModelConfig -import com.k2fsa.sherpa.onnx.OnlineStream -import com.k2fsa.sherpa.onnx.OnlineTransducerModelConfig -import com.lila.wakeup.Config - -/** - * sherpa-onnx KWS 引擎封装。 - * - * 该类是对 sherpa-onnx 官方 [KeywordSpotter] Kotlin Wrapper 的薄封装, - * 隐藏底层 API 细节,对外暴露简洁的 [process] 接口。 - * - * 模型路径:见 [Config.MODEL_DIR],从 APK assets 加载。 - * - * ⚠️ 版本兼容:本骨架基于 sherpa-onnx v1.13.0+ 的 Kotlin API。 - * 若克隆的 sherpa-onnx 仓库 API 有差异(如类名变更),仅需调整本文件。 - * - * 业务结果数据类。 - * - * @param keyword 命中的唤醒词文本(如 "你好Lila"),未命中为空字符串 - * @param confidence 置信度([0,1]) - */ -data class KwsResult( - val keyword: String, - val confidence: Float -) { - val isHit: Boolean get() = keyword.isNotEmpty() -} - -class KwsEngine(private val context: Context) { - - companion object { - private const val TAG = "KwsService.Engine" - } - - private var spotter: KeywordSpotter? = null - private var stream: OnlineStream? = null - - /** - * 加载模型 + keywords.txt,初始化推理引擎。 - * 必须在 [process] 之前调用一次。 - */ - fun init() { - val cfg = KeywordSpotterConfig( - featConfig = com.k2fsa.sherpa.onnx.FeatureConfig( - sampleRate = Config.SAMPLE_RATE, - featureDim = 80 - ), - modelConfig = OnlineModelConfig( - transducer = OnlineTransducerModelConfig( - encoder = Config.MODEL_ENCODER, - decoder = Config.MODEL_DECODER, - joiner = Config.MODEL_JOINER - ), - tokens = Config.MODEL_TOKENS, - modelType = "zipformer" - ), - keywordsFile = Config.MODEL_KEYWORDS, - keywordsThreshold = Config.KWS_THRESHOLD_PRIMARY, - keywordsScore = 1.5f, - numTrailingBlanks = 2 - ) - - spotter = KeywordSpotter(assetManager = context.assets, config = cfg) - stream = spotter?.createStream() - Log.i(TAG, "[Engine] init done, keywordsFile=${Config.MODEL_KEYWORDS}") - } - - /** - * 喂一帧 PCM 数据进引擎,返回是否命中。 - * - * @param pcm 16-bit PCM short[] 数据,长度任意(一般 10ms 帧 = 160 samples) - * @return 命中则 [KwsResult.isHit] 为 true - */ - fun process(pcm: ShortArray): KwsResult { - val sp = spotter ?: return KwsResult("", 0f) - val st = stream ?: return KwsResult("", 0f) - - // sherpa-onnx 输入要求 [-1, 1] 范围 float - val floatBuf = FloatArray(pcm.size) { pcm[it] / 32768f } - st.acceptWaveform(floatBuf, Config.SAMPLE_RATE) - - while (sp.isReady(st)) { - sp.decode(st) - } - - val result = sp.getResult(st) - val keyword = result.keyword - - return if (keyword.isNotEmpty()) { - // 命中后必须 reset,否则下次不会再触发 - sp.reset(st) - Log.i(TAG, "[Engine] hit keyword=$keyword") - KwsResult(keyword, 1f) // sherpa-onnx 不直接给 confidence,通过 keyword_score 间接控制 - } else { - KwsResult("", 0f) - } - } - - /** - * 释放 native 资源。在 Service.onDestroy 中调用。 - */ - fun release() { - try { - stream?.release() - spotter?.release() - } catch (e: Exception) { - Log.e(TAG, "release failed: ${e.message}") - } - stream = null - spotter = null - Log.i(TAG, "[Engine] released") - } -} diff --git a/app/src/main/java/com/lila/wakeup/kws/KwsStateMachine.kt b/app/src/main/java/com/lila/wakeup/kws/KwsStateMachine.kt index d0564b2..9cadb9b 100644 --- a/app/src/main/java/com/lila/wakeup/kws/KwsStateMachine.kt +++ b/app/src/main/java/com/lila/wakeup/kws/KwsStateMachine.kt @@ -10,23 +10,39 @@ import java.util.concurrent.atomic.AtomicReference /** * KWS 状态机 —— 协议 v2.1 规则的中央控制器。 * - * 三个状态: - * - [State.Idle]: 服务启动中(极短) - * - [State.Listening]: 正在监听唤醒词,命中即发 WAKEUP 广播 - * - [State.Paused]: APP 主动暂停(含 silence_ms 二次保险窗口) + * 三个状态: + * - [State.Idle]: 服务启动中(极短) + * - [State.Listening]: 正在监听唤醒词,命中即发 WAKEUP 广播 + * - [State.Paused]: APP 主动暂停(含 silence_ms 二次保险窗口) * - * silence_ms 实现: - * - 收到 PAUSE → 立即进入 Paused,记录 silenceUntilMs - * - silenceUntilMs 期间收到 RESUME → 等到 silenceUntilMs 过后才真正 resume(防 AI 自我唤醒) + * silence_ms 实现: + * - 收到 PAUSE → 立即进入 Paused,记录 silenceUntilMs + * - silenceUntilMs 期间收到 RESUME → 等到 silenceUntilMs 过后才真正 resume(防 AI 自我唤醒) * - 命中事件在 Paused 期间忽略 * - * 兜底超时:APP 漏发 RESUME 时,2 分钟自动 resume(详见 Config.PAUSE_TIMEOUT_MS)。 + * 兜底超时:APP 漏发 RESUME 时,2 分钟自动 resume(详见 Config.PAUSE_TIMEOUT_MS)。 + * + * 后验平滑设计移除:sherpa-onnx 内部已有 numTrailingBlanks 平滑, + * 外部再叠加反而导致漏命中(命中后 reset stream,下一帧不会再 hit)。 */ class KwsStateMachine( - private val engine: KwsEngine, + private val engine: AsrEngine, private val sender: BroadcastSender, private val onTimeoutResume: () -> Unit = {} ) { + init { + // ASR 引擎在内部线程触发命中,通过回调把 keyword 转发给 sender + // 仅在 Listening 状态下转发,Paused 期间忽略命中(VAD 段已处理但状态变更) + engine.onWakeup = { keyword -> + if (state.get() is State.Listening) { + Log.i(TAG, "[KWS] HIT keyword=$keyword") + sender.sendWakeup(keyword, confidence = 1.0f) + } else { + Log.d(TAG, "[KWS] hit ignored (state != LISTENING) keyword=$keyword") + } + } + } + companion object { private const val TAG = "KwsService.State" } @@ -37,38 +53,31 @@ class KwsStateMachine( data class Paused(val silenceUntilMs: Long, val reason: String) : State() } - /** 用 Atomic 做无锁状态切换,避免读写竞态 */ + /** 用 Atomic 做无锁状态切换,避免读写竞态 */ private val state = AtomicReference(State.Idle) private val mainHandler = Handler(Looper.getMainLooper()) private var timeoutRunnable: Runnable? = null private var deferredResumeRunnable: Runnable? = null - /** 平滑窗口:连续 N 帧都命中才真正发 WAKEUP(v2.1 后验平滑) */ - private var consecutiveHits = 0 - fun transitionToListening() { state.set(State.Listening) - consecutiveHits = 0 Log.i(TAG, "[State] -> LISTENING") } /** * 收到 KWS_PAUSE 广播时调用。 * - * @param silenceMs 静默期(毫秒),默认 [Config.DEFAULT_SILENCE_MS] - * @param reason 调用原因(仅日志) + * @param silenceMs 静默期(毫秒),默认 [Config.DEFAULT_SILENCE_MS] + * @param reason 调用原因(仅日志) */ fun onPauseReceived(silenceMs: Long, reason: String) { val until = System.currentTimeMillis() + silenceMs state.set(State.Paused(until, reason)) - consecutiveHits = 0 Log.i(TAG, "[State] -> PAUSED reason=$reason silence_ms=$silenceMs until=$until") - - // 取消上一次的 deferred resume(如果有) + // 重置 VAD,清除暂停期间累积的音频(避免恢复后重复识别旧音) + engine.reset() cancelDeferredResume() - - // 启动 2 分钟兜底超时 scheduleTimeoutResume() } @@ -76,12 +85,10 @@ class KwsStateMachine( * 收到 KWS_RESUME 广播时调用。 */ fun onResumeReceived(reason: String) { - val current = state.get() - when (current) { + when (val current = state.get()) { is State.Paused -> { val now = System.currentTimeMillis() if (now < current.silenceUntilMs) { - // silence 期间,延迟到 silence 满才真正 resume val delay = current.silenceUntilMs - now Log.i(TAG, "[State] RESUME deferred ${delay}ms (still in silence)") deferredResumeRunnable = Runnable { @@ -96,7 +103,6 @@ class KwsStateMachine( } } is State.Listening -> { - // 幂等:本就在监听态,无副作用 Log.d(TAG, "[State] RESUME ignored, already LISTENING") } is State.Idle -> { @@ -108,26 +114,12 @@ class KwsStateMachine( } /** - * AudioCapture 每帧调用一次。在 Listening 态喂引擎并检查命中。 + * AudioCapture 每帧调用一次。在 Listening 态喂 VAD, + * VAD 累积语音段后异步触发 ASR 识别,命中通过 engine.onWakeup 回调。 */ fun onAudioFrame(pcm: ShortArray) { if (state.get() !is State.Listening) return - - val result = engine.process(pcm) - if (result.isHit) { - consecutiveHits++ - if (consecutiveHits >= Config.SMOOTH_FRAMES) { - // 命中且过平滑 → 发 WAKEUP 广播 - Log.i(TAG, "[KWS] HIT confirmed keyword=${result.keyword} smooth=$consecutiveHits") - sender.sendWakeup(result.keyword, result.confidence) - consecutiveHits = 0 - } else { - Log.d(TAG, "[KWS] hit pending smooth=$consecutiveHits/${Config.SMOOTH_FRAMES}") - } - } else { - // 未命中重置计数 - if (consecutiveHits > 0) consecutiveHits = 0 - } + engine.feedAudio(pcm) } fun shutdown() { @@ -137,13 +129,13 @@ class KwsStateMachine( } // ============================================================ - // 内部:兜底定时器 + // 内部:兜底定时器 // ============================================================ private fun scheduleTimeoutResume() { cancelTimeoutResume() timeoutRunnable = Runnable { - Log.w(TAG, "[Tmout] PAUSE 超时(${Config.PAUSE_TIMEOUT_MS / 1000}s),强制 RESUME") + Log.w(TAG, "[Tmout] PAUSE 超时(${Config.PAUSE_TIMEOUT_MS / 1000}s),强制 RESUME") transitionToListening() onTimeoutResume() }.also { diff --git a/app/src/main/java/com/lila/wakeup/protocol/BroadcastSender.kt b/app/src/main/java/com/lila/wakeup/protocol/BroadcastSender.kt index 6dee129..b1fc12e 100644 --- a/app/src/main/java/com/lila/wakeup/protocol/BroadcastSender.kt +++ b/app/src/main/java/com/lila/wakeup/protocol/BroadcastSender.kt @@ -23,14 +23,24 @@ class BroadcastSender(private val context: Context) { fun sendWakeup(keyword: String, confidence: Float) { val timestamp = System.currentTimeMillis() - val intent = Intent(Config.ACTION_WAKEUP).apply { + // 1. 外部协议广播:发给数字人 APP(com.qy.lila) + val externalIntent = Intent(Config.ACTION_WAKEUP).apply { setPackage(Config.APP_PACKAGE) // v2.1 双向 setPackage 强制要求 putExtra(Config.EXTRA_KEYWORD, keyword) putExtra(Config.EXTRA_TIMESTAMP, timestamp) putExtra(Config.EXTRA_CONFIDENCE, confidence) } + context.sendBroadcast(externalIntent) + + // 2. 内部广播:本 APP 的 MainActivity 接收,用于 UI 实时显示命中 + val internalIntent = Intent(Config.ACTION_INTERNAL_WAKEUP).apply { + setPackage(Config.SELF_PACKAGE) // 仅本 APP 自己接收,避免泄漏到外部 + putExtra(Config.EXTRA_KEYWORD, keyword) + putExtra(Config.EXTRA_TIMESTAMP, timestamp) + putExtra(Config.EXTRA_CONFIDENCE, confidence) + } + context.sendBroadcast(internalIntent) - context.sendBroadcast(intent) Log.i(TAG, "WAKEUP -> ${Config.APP_PACKAGE} keyword=$keyword conf=$confidence ts=$timestamp") } }