zyc 689fa8936b Integrate Volcengine realtime voice + Live2D mouth driving
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-14 15:39:23 +08:00

536 lines
22 KiB
Objective-C
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#import "SpeechEnginePlugin.h"
#import <AVFoundation/AVFoundation.h>
#import <math.h>
// 仅当 SpeechEngineDialogToB pod 真实被链接时启用全部 SDK 调用。
// 跑模拟器/桌面端时执行 SPEECH_SDK_ENABLED=0 pod install 跳过 SDK
// Plugin 退化为 stub所有方法返回 SIMULATOR_NOT_SUPPORTED
#if __has_include(<SpeechEngineToB/SpeechEngine.h>)
#define SPEECH_SDK_AVAILABLE 1
#import <SpeechEngineToB/SpeechEngine.h>
#elif __has_include(<SpeechEngineDialogToB/SpeechEngine.h>)
#define SPEECH_SDK_AVAILABLE 1
#import <SpeechEngineDialogToB/SpeechEngine.h>
#else
#define SPEECH_SDK_AVAILABLE 0
#endif
static NSString *const kMethodChannel = @"avatar_flutter_app/speech_engine";
static NSString *const kEventChannel = @"avatar_flutter_app/speech_events";
@interface SpeechEnginePlugin () <FlutterStreamHandler
#if SPEECH_SDK_AVAILABLE
, SpeechEngineDelegate
#endif
>
@property (nonatomic, strong) FlutterMethodChannel *methodChannel;
@property (nonatomic, strong) FlutterEventChannel *eventChannel;
@property (nonatomic, copy) FlutterEventSink eventSink;
#if SPEECH_SDK_AVAILABLE
@property (nonatomic, strong) SpeechEngine *speechEngine;
@property (nonatomic, assign) BOOL engineStarted;
@property (nonatomic, copy) NSString *botName;
@property (nonatomic, copy) NSString *sdkLogPath;
@property (nonatomic, assign) unsigned long long lastLogOffset;
@property (nonatomic, strong) dispatch_source_t logPoller;
@property (nonatomic, assign) NSTimeInterval lastMouthEmitTs;
@property (nonatomic, assign) float smoothedMouth;
#endif
@end
@implementation SpeechEnginePlugin
#pragma mark - FlutterPlugin
+ (void)prepareEnvironment {
#if SPEECH_SDK_AVAILABLE
NSLog(@"[SpeechPlugin] prepareEnvironment");
[SpeechEngine prepareEnvironment];
#else
NSLog(@"[SpeechPlugin] prepareEnvironment skipped (SDK not linked, simulator build)");
#endif
}
+ (void)registerWithRegistrar:(NSObject<FlutterPluginRegistrar> *)registrar {
SpeechEnginePlugin *instance = [[SpeechEnginePlugin alloc] init];
instance.methodChannel = [FlutterMethodChannel
methodChannelWithName:kMethodChannel
binaryMessenger:[registrar messenger]];
[registrar addMethodCallDelegate:instance channel:instance.methodChannel];
instance.eventChannel = [FlutterEventChannel
eventChannelWithName:kEventChannel
binaryMessenger:[registrar messenger]];
[instance.eventChannel setStreamHandler:instance];
}
#pragma mark - FlutterStreamHandler
- (FlutterError *)onListenWithArguments:(id)arguments eventSink:(FlutterEventSink)events {
self.eventSink = events;
return nil;
}
- (FlutterError *)onCancelWithArguments:(id)arguments {
self.eventSink = nil;
return nil;
}
- (void)sendEvent:(NSDictionary *)payload {
if (!self.eventSink) return;
if ([NSThread isMainThread]) {
self.eventSink(payload);
} else {
dispatch_async(dispatch_get_main_queue(), ^{
if (self.eventSink) self.eventSink(payload);
});
}
}
#pragma mark - MethodCall
- (void)handleMethodCall:(FlutterMethodCall *)call result:(FlutterResult)result {
#if !SPEECH_SDK_AVAILABLE
result([FlutterError errorWithCode:@"SIMULATOR_NOT_SUPPORTED"
message:@"火山实时语音 SDK 在 Apple Silicon 模拟器上不可用。请用真机测试,或执行 'pod install' 启用 SDK 后再构建。"
details:nil]);
return;
#else
NSString *m = call.method;
if ([m isEqualToString:@"init"]) {
[self initEngine:call.arguments result:result];
} else if ([m isEqualToString:@"start"]) {
[self startEngine:result];
} else if ([m isEqualToString:@"startTalking"]) {
[self sendDirective:SEDirectiveStartTalking data:nil result:result];
} else if ([m isEqualToString:@"finishTalking"]) {
[self sendDirective:SEDirectiveFinishTalking data:nil result:result];
} else if ([m isEqualToString:@"pauseTalking"]) {
[self sendDirective:SEDirectivePauseTalking data:nil result:result];
} else if ([m isEqualToString:@"resumeTalking"]) {
[self sendDirective:SEDirectiveResumeTalking data:nil result:result];
} else if ([m isEqualToString:@"cancelCurrentDialog"]) {
[self sendDirective:SEDirectiveCancelCurrentDialog data:nil result:result];
} else if ([m isEqualToString:@"sayHello"]) {
// demo 用 SEDirectiveEventSayHello
NSString *content = call.arguments[@"content"] ?: @"";
NSString *escaped = [content stringByReplacingOccurrencesOfString:@"\"" withString:@"\\\""];
NSString *json = [NSString stringWithFormat:@"{\"content\":\"%@\"}", escaped];
[self sendDirective:SEDirectiveEventSayHello data:json result:result];
} else if ([m isEqualToString:@"stop"]) {
[self sendDirective:SEDirectiveStopEngine data:nil result:result];
} else if ([m isEqualToString:@"destroy"]) {
[self destroyEngine:result];
} else {
result(FlutterMethodNotImplemented);
}
#endif
}
#if SPEECH_SDK_AVAILABLE
#pragma mark - Engine lifecycle
/// TTS 音频帧int16 PCM→ RMS → 平滑 → 30Hz 推送给 Dart 驱动 setMouthOpen
- (void)processTtsAudio:(NSData *)data {
if (data.length < 2) return;
const int16_t *samples = data.bytes;
NSUInteger count = data.length / sizeof(int16_t);
if (count == 0) return;
double sumSq = 0.0;
for (NSUInteger i = 0; i < count; i++) {
double s = samples[i] / 32768.0;
sumSq += s * s;
}
double rms = sqrt(sumSq / (double)count);
// 非线性映射 + 增益(让嘴型动作更明显)
double mouth = pow(MIN(1.0, rms * 4.0), 0.6);
if (mouth < 0.05) mouth = 0; // noise gate
// 一阶低通平滑attack 快、release 慢避免抖动)
float prev = self.smoothedMouth;
float target = (float)mouth;
float alpha = (target > prev) ? 0.5f : 0.2f;
float smoothed = prev + (target - prev) * alpha;
self.smoothedMouth = smoothed;
// 限频 30Hz
NSTimeInterval now = [[NSDate date] timeIntervalSince1970];
if (now - self.lastMouthEmitTs < 0.033) return;
self.lastMouthEmitTs = now;
[self sendEvent:@{@"type": @"mouth", @"value": @(smoothed)}];
}
- (void)pluginLog:(NSString *)msg {
// 仅 NSLog 自用,不再推到 UI避免噪音
NSLog(@"[SpeechPlugin] %@", msg);
}
- (void)initEngine:(NSDictionary *)args result:(FlutterResult)result {
[self pluginLog:@"initEngine() called"];
if (self.speechEngine) {
[self pluginLog:@"already inited, skip"];
result(@{@"ok": @YES, @"alreadyInited": @YES});
return;
}
// 注意:不要在这里手动设置 AVAudioSession。
// VolcEngineRTC 内部会管理 audio sessioncategory/mode/route/interruption
// 手动设置 AVAudioSessionModeVoiceChat 会与 RTC 冲突,
// 导致 SDK 误判 "app into background" 并触发 errcode=5000。
[self pluginLog:@"step1 alloc SpeechEngine"];
self.speechEngine = [[SpeechEngine alloc] init];
[self pluginLog:@"step2 createEngineWithDelegate"];
if (![self.speechEngine createEngineWithDelegate:self]) {
[self pluginLog:@"step2 FAILED: createEngineWithDelegate returned NO"];
self.speechEngine = nil;
result([FlutterError errorWithCode:@"CREATE_FAILED"
message:@"createEngineWithDelegate returned NO"
details:nil]);
return;
}
[self pluginLog:[NSString stringWithFormat:@"step3 SDK version=%@", [self.speechEngine getVersion]]];
[self pluginLog:@"step4 setStringParam ENGINE_NAME/LOG_LEVEL/DEBUG_PATH"];
[self.speechEngine setStringParam:SE_DIALOG_ENGINE forKey:SE_PARAMS_KEY_ENGINE_NAME_STRING];
[self.speechEngine setStringParam:SE_LOG_LEVEL_WARN forKey:SE_PARAMS_KEY_LOG_LEVEL_STRING];
NSString *docs = NSSearchPathForDirectoriesInDomains(NSDocumentDirectory, NSUserDomainMask, YES).firstObject ?: @"";
[self.speechEngine setStringParam:docs forKey:SE_PARAMS_KEY_DEBUG_PATH_STRING];
// 仅保存 docs 路径,真正的日志文件名 SDK 启动时才生成(含时间戳)
self.sdkLogPath = docs;
self.lastLogOffset = 0;
[self pluginLog:[NSString stringWithFormat:@"docs=%@ (sdk 日志文件名运行时探测)", docs]];
NSString *appId = args[@"appId"] ?: @"";
NSString *appKey = args[@"appKey"] ?: @"";
NSString *token = args[@"token"] ?: @"";
NSString *resourceId = args[@"dialogId"] ?: @"";
self.botName = args[@"botName"] ?: @"豆包";
[self pluginLog:[NSString stringWithFormat:@"step5 auth: appId=%@ appKeyLen=%lu tokenLen=%lu",
appId, (unsigned long)appKey.length, (unsigned long)token.length]];
[self pluginLog:[NSString stringWithFormat:@"step6 dialog: resourceId=%@", resourceId]];
[self.speechEngine setStringParam:appId forKey:SE_PARAMS_KEY_APP_ID_STRING];
[self.speechEngine setStringParam:appKey forKey:SE_PARAMS_KEY_APP_KEY_STRING];
[self.speechEngine setStringParam:token forKey:SE_PARAMS_KEY_APP_TOKEN_STRING];
[self.speechEngine setStringParam:resourceId forKey:SE_PARAMS_KEY_RESOURCE_ID_STRING];
[self.speechEngine setStringParam:(args[@"uid"] ?: @"avatar_flutter_app_user") forKey:SE_PARAMS_KEY_UID_STRING];
[self.speechEngine setStringParam:(args[@"address"] ?: @"wss://openspeech.bytedance.com")
forKey:SE_PARAMS_KEY_DIALOG_ADDRESS_STRING];
[self.speechEngine setStringParam:(args[@"uri"] ?: @"/api/v3/realtime/dialogue")
forKey:SE_PARAMS_KEY_DIALOG_URI_STRING];
[self.speechEngine setStringParam:SE_RECORDER_TYPE_RECORDER forKey:SE_PARAMS_KEY_RECORDER_TYPE_STRING];
// AEC和 demo 一致默认开启AEC 模型从 Flutter assets/voice/aec.model 自动定位
[self.speechEngine setBoolParam:YES forKey:SE_PARAMS_KEY_ENABLE_AEC_BOOL];
NSString *aecPath = args[@"aecModelPath"] ?: @"";
if (aecPath.length == 0) {
// Flutter assets 在 .app/Frameworks/App.framework/flutter_assets/ 下
NSString *bundle = [[NSBundle mainBundle] pathForResource:@"App" ofType:@"framework"];
if (bundle.length > 0) {
NSString *candidate = [bundle stringByAppendingPathComponent:@"flutter_assets/assets/voice/aec.model"];
if ([[NSFileManager defaultManager] fileExistsAtPath:candidate]) {
aecPath = candidate;
}
}
if (aecPath.length == 0) {
// 回退:递归找
NSString *root = [[NSBundle mainBundle] bundlePath];
NSDirectoryEnumerator *enumerator = [[NSFileManager defaultManager] enumeratorAtPath:root];
for (NSString *p in enumerator) {
if ([p hasSuffix:@"aec.model"]) {
aecPath = [root stringByAppendingPathComponent:p];
break;
}
}
}
}
[self pluginLog:[NSString stringWithFormat:@"AEC model path=%@", aecPath]];
if (aecPath.length > 0) {
[self.speechEngine setStringParam:aecPath forKey:SE_PARAMS_KEY_AEC_MODEL_PATH_STRING];
}
// 启用播放器 + 启用播放器音频回调(用于嘴型驱动)
[self.speechEngine setBoolParam:YES forKey:SE_PARAMS_KEY_DIALOG_ENABLE_PLAYER_BOOL];
[self.speechEngine setBoolParam:YES forKey:SE_PARAMS_KEY_DIALOG_ENABLE_PLAYER_AUDIO_CALLBACK_BOOL];
[self pluginLog:@"step7 calling initEngine ..."];
SEEngineErrorCode ret = [self.speechEngine initEngine];
[self pluginLog:[NSString stringWithFormat:@"step8 initEngine ret=%d (0=NoError)", (int)ret]];
if (ret != SENoError) {
result([FlutterError errorWithCode:@"INIT_FAILED"
message:[NSString stringWithFormat:@"initEngine returned %d", (int)ret]
details:nil]);
return;
}
result(@{@"ok": @YES, @"version": [self.speechEngine getVersion] ?: @""});
}
- (void)startEngine:(FlutterResult)result {
if (!self.speechEngine) {
[self pluginLog:@"startEngine FAILED: not inited"];
result([FlutterError errorWithCode:@"NOT_INITED" message:@"call init() first" details:nil]);
return;
}
[self pluginLog:@"step9 SyncStopEngine (清理上次会话)"];
[self.speechEngine sendDirective:SEDirectiveSyncStopEngine];
// demo: StartEngine 必须带 data={"dialog":{"bot_name":"豆包"}}
NSString *botName = self.botName.length > 0 ? self.botName : @"豆包";
NSString *startJson = [NSString stringWithFormat:@"{\"dialog\":{\"bot_name\":\"%@\"}}", botName];
[self pluginLog:[NSString stringWithFormat:@"step10 sending StartEngine data=%@", startJson]];
SEEngineErrorCode ret = [self.speechEngine sendDirective:SEDirectiveStartEngine data:startJson];
[self pluginLog:[NSString stringWithFormat:@"step11 StartEngine ret=%d", (int)ret]];
if (ret == SERecCheckEnvironmentFailed) {
[self pluginLog:@"NO MICROPHONE PERMISSION!"];
result([FlutterError errorWithCode:@"NO_PERMISSION" message:@"microphone not granted" details:nil]);
} else if (ret != SENoError) {
result([FlutterError errorWithCode:@"START_FAILED"
message:[NSString stringWithFormat:@"%d", (int)ret] details:nil]);
} else {
[self pluginLog:@"StartEngine OK"];
// SDK 日志轮询已停用(避免日志爆刷)。调试时取消注释打开:
// [self startSdkLogPoller];
result(@{@"ok": @YES});
}
}
- (void)startSdkLogPoller {
if (self.logPoller) return;
if (self.sdkLogPath.length == 0) return;
dispatch_queue_t q = dispatch_get_global_queue(QOS_CLASS_BACKGROUND, 0);
dispatch_source_t timer = dispatch_source_create(DISPATCH_SOURCE_TYPE_TIMER, 0, 0, q);
dispatch_source_set_timer(timer,
dispatch_time(DISPATCH_TIME_NOW, NSEC_PER_SEC),
NSEC_PER_SEC, // 每秒读一次
250 * NSEC_PER_MSEC);
__weak typeof(self) weakSelf = self;
dispatch_source_set_event_handler(timer, ^{
[weakSelf drainSdkLog];
});
dispatch_resume(timer);
self.logPoller = timer;
[self pluginLog:@"sdk log poller started"];
}
- (NSString *)resolveLatestSdkLog {
NSString *docs = self.sdkLogPath; // 此字段实际保存 docs 路径
if (docs.length == 0) return nil;
NSArray *files = [[NSFileManager defaultManager] contentsOfDirectoryAtPath:docs error:nil];
NSString *latest = nil;
for (NSString *name in files) {
if (![name hasPrefix:@"speech_sdk_"]) continue;
if (![name hasSuffix:@".log"]) continue;
if (!latest || [name compare:latest] == NSOrderedDescending) {
latest = name;
}
}
return latest ? [docs stringByAppendingPathComponent:latest] : nil;
}
- (void)drainSdkLog {
NSString *file = [self resolveLatestSdkLog];
if (!file) {
static BOOL diagnosed = NO;
if (!diagnosed) {
diagnosed = YES;
NSArray *files = [[NSFileManager defaultManager] contentsOfDirectoryAtPath:self.sdkLogPath error:nil];
[self sendEvent:@{
@"type": @"sdk_log_line",
@"line": [NSString stringWithFormat:@"docs 目录内容:%@",
[files componentsJoinedByString:@", "]]
}];
}
return;
}
NSDictionary *attrs = [[NSFileManager defaultManager] attributesOfItemAtPath:file error:nil];
if (!attrs) return;
unsigned long long size = [attrs fileSize];
if (size <= self.lastLogOffset) return;
NSFileHandle *fh = [NSFileHandle fileHandleForReadingAtPath:file];
if (!fh) return;
@try {
[fh seekToFileOffset:self.lastLogOffset];
NSData *data = [fh readDataToEndOfFile];
self.lastLogOffset = size;
NSString *chunk = [[NSString alloc] initWithData:data encoding:NSUTF8StringEncoding] ?: @"";
// 只把 WARN/ERROR/FAIL 行推到 UIinfo/debug 留在文件,避免刷屏
NSArray *lines = [chunk componentsSeparatedByString:@"\n"];
NSInteger emitted = 0;
for (NSString *raw in lines) {
NSString *line = [raw stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceCharacterSet]];
if (line.length == 0) continue;
BOOL important =
[line rangeOfString:@"WARN" options:NSCaseInsensitiveSearch].location != NSNotFound
|| [line rangeOfString:@"ERROR" options:NSCaseInsensitiveSearch].location != NSNotFound
|| [line rangeOfString:@"FAIL" options:NSCaseInsensitiveSearch].location != NSNotFound
|| [line rangeOfString:@"err_code" options:NSCaseInsensitiveSearch].location != NSNotFound
|| [line rangeOfString:@"err_msg" options:NSCaseInsensitiveSearch].location != NSNotFound;
if (!important) continue;
[self sendEvent:@{@"type": @"sdk_log_line", @"line": line}];
if (++emitted >= 15) break;
}
} @finally {
[fh closeFile];
}
}
- (void)stopSdkLogPoller {
if (self.logPoller) {
dispatch_source_cancel(self.logPoller);
self.logPoller = nil;
}
}
- (void)sendDirective:(SEDirective)directive data:(NSString *)data result:(FlutterResult)result {
if (!self.speechEngine) {
result([FlutterError errorWithCode:@"NOT_INITED" message:@"call init() first" details:nil]);
return;
}
SEEngineErrorCode ret = data
? [self.speechEngine sendDirective:directive data:data]
: [self.speechEngine sendDirective:directive];
if (ret != SENoError) {
result([FlutterError errorWithCode:@"DIRECTIVE_FAILED"
message:[NSString stringWithFormat:@"directive=%d ret=%d", (int)directive, (int)ret]
details:nil]);
} else {
result(@{@"ok": @YES});
}
}
- (void)destroyEngine:(FlutterResult)result {
[self stopSdkLogPoller];
if (self.speechEngine) {
[self.speechEngine destroyEngine];
self.speechEngine = nil;
}
self.engineStarted = NO;
result(@{@"ok": @YES});
}
#pragma mark - SpeechEngineDelegate
- (void)onMessageWithType:(SEMessageType)type andData:(NSData *)data {
NSString *str = data ? [[NSString alloc] initWithData:data encoding:NSUTF8StringEncoding] : nil;
switch (type) {
case SEEngineStart:
self.engineStarted = YES;
[self sendEvent:@{@"type": @"engine_start", @"payload": str ?: @""}];
break;
case SEEngineStop:
self.engineStarted = NO;
[self sendEvent:@{@"type": @"engine_stop", @"payload": str ?: @""}];
break;
case SEEngineError: {
NSLog(@"[SpeechPlugin] SEEngineError: %@", str);
// 尝试解出 code + message解析失败就整段塞 message
id parsed = nil;
if (data) parsed = [NSJSONSerialization JSONObjectWithData:data options:0 error:nil];
NSDictionary *info = [parsed isKindOfClass:[NSDictionary class]] ? parsed : nil;
[self sendEvent:@{
@"type": @"engine_error",
@"code": info[@"code"] ?: info[@"errcode"] ?: info[@"error_code"] ?: @0,
@"message": info[@"message"] ?: info[@"msg"] ?: info[@"reason"] ?: (str ?: @""),
@"raw": str ?: @""
}];
break;
}
// === demo 风格 dialog 事件 (SpeechEngineToB) ===
case SEDialogConnectionStarted:
[self sendEvent:@{@"type": @"channel_joined", @"payload": str ?: @""}];
break;
case SEDialogConnectionFailed:
[self sendEvent:@{@"type": @"engine_error", @"message": str ?: @"connection failed"}];
break;
case SEDialogConnectionFinished:
[self sendEvent:@{@"type": @"engine_stop", @"payload": str ?: @""}];
break;
case SEDialogSessionStarted:
[self sendEvent:@{@"type": @"dialog_begin", @"payload": str ?: @""}];
break;
case SEDialogSessionCanceled:
[self sendEvent:@{@"type": @"dialog_cancelled"}];
break;
case SEDialogSessionFinished:
[self sendEvent:@{@"type": @"dialog_end", @"payload": str ?: @""}];
break;
case SEDialogASRResponse:
// 用户讲话流式 {"results":[{"text":"..."}]}
[self sendEvent:@{@"type": @"asr_partial", @"payload": str ?: @""}];
break;
case SEDialogASREnded:
[self sendEvent:@{@"type": @"vad_end"}];
break;
case SEDialogChatResponse:
// AI 回复流式 {"content":"..."}
[self sendEvent:@{@"type": @"subtitle_on", @"payload": str ?: @""}];
break;
case SEDialogChatEnded:
[self sendEvent:@{@"type": @"subtitle_off"}];
break;
case SEDialogTTSResponse:
// TTS 文本片段
[self sendEvent:@{@"type": @"tts_text", @"payload": str ?: @""}];
break;
case SEDialogTTSSentenceStart:
[self sendEvent:@{@"type": @"ai_voice_begin"}];
break;
case SEDialogTTSSentenceEnd:
[self sendEvent:@{@"type": @"tts_finish"}];
break;
case SEDialogTTSEnded:
[self sendEvent:@{@"type": @"ai_voice_end"}];
break;
case SETtsStartPlaying:
[self sendEvent:@{@"type": @"tts_start"}];
break;
case SETtsAudioData:
case SEDialogPlayerAudio: {
// TTS 播放音频帧 → 计算 RMS → 限频 30Hz 推 mouth_value 驱动嘴型
[self processTtsAudio:data];
break;
}
case SEDialogRecorderAudio:
// 麦克风音频不需要驱动嘴型,丢弃
break;
case SETtsFinishPlaying:
[self sendEvent:@{@"type": @"tts_finish"}];
break;
case SEVadBegin:
[self sendEvent:@{@"type": @"vad_begin"}];
break;
case SEVadEnd:
[self sendEvent:@{@"type": @"vad_end"}];
break;
case SEAsrPartialResult:
[self sendEvent:@{@"type": @"asr_partial", @"payload": str ?: @""}];
break;
case SEFinalResult:
[self sendEvent:@{@"type": @"asr_final", @"payload": str ?: @""}];
break;
case SEEngineLog:
[self sendEvent:@{@"type": @"sdk_log", @"payload": str ?: @""}];
break;
default:
// 调试用,仅 NSLog 不推 UI
NSLog(@"[SpeechPlugin] unhandled messageType=%d dataLen=%lu", (int)type, (unsigned long)(data ? data.length : 0));
break;
}
}
#endif // SPEECH_SDK_AVAILABLE
@end