diff --git a/main/Kconfig.projbuild b/main/Kconfig.projbuild index c49e8ec..005c3aa 100644 --- a/main/Kconfig.projbuild +++ b/main/Kconfig.projbuild @@ -12,6 +12,12 @@ config DEVICE_STATUS_REPORT_URL help URL for reporting device status to server +config STORY_API_URL + string "Story API URL" + default "http://192.168.124.8:8000/api/v1/devices/stories/" + help + 故事播放API接口地址,设备会附加 ?mac_address=XX:XX:XX:XX:XX:XX 参数请求 + choice prompt "语言选择" default LANGUAGE_ZH_CN diff --git a/main/application.cc b/main/application.cc index 6041068..a83cda8 100644 --- a/main/application.cc +++ b/main/application.cc @@ -28,6 +28,8 @@ #include #include #include +#include +#include #define TAG "Application" #define MAC_TAG "BluetoothMAC" @@ -705,7 +707,23 @@ void Application::Start() { Alert(Lang::Strings::ERROR, message.c_str(), "sad", Lang::Sounds::P3_EXCLAMATION); } }); + // 收到Bot下行消息(subv字幕等)时,立即中止HTTPS音频播放 + // 不等音频PCM到达再中止,避免故事在Bot回复期间继续播放数秒 + // 收到非字幕的Bot下行消息(ctrl/conv/tool/info等)时中止HTTPS播放 + // subv字幕消息在协议层跳过此回调,由subtitle handler处理(可区分USER/AI) + protocol_->OnBotMessage([this]() { + if (https_playback_active_.load() && !https_playback_abort_.load()) { + AbortHttpsPlayback("收到Bot响应消息"); + } + }); protocol_->OnIncomingAudio([this](std::vector&& data) { + // HTTPS播放中(含HTTP请求阶段)静默丢弃RTC PCM包 + // opus_playback_active_ 在任务启动时即设置,覆盖HTTP请求阶段 + // https_playback_active_ 在音频入队时设置,覆盖音频播放阶段 + if (https_playback_active_.load() || opus_playback_active_.load()) { + return; + } + if (websocket_protocol_ && websocket_protocol_->IsAudioChannelOpened()) { aborted_ = true; { @@ -718,7 +736,7 @@ void Application::Start() { } ResetDecoder(); ws_downlink_enabled_.store(false); - ws_playback_active_.store(false); + opus_playback_active_.store(false); websocket_protocol_->CloseAudioChannel();// 关闭WebSocket通道 Schedule([this]() { vTaskDelay(pdMS_TO_TICKS(120)); @@ -899,18 +917,27 @@ void Application::Start() { } } } - // 讲故事功能:通过WebSocket发送故事请求 + // 讲故事功能:支持HTTPS下载或WebSocket两种方式 else if (strcmp(name->valuestring, "obtain_story") == 0) { - ESP_LOGI(TAG, "收到obtain_story工具调用,通过WebSocket请求故事"); + ESP_LOGI(TAG, "收到obtain_story工具调用"); cJSON* sn = cJSON_GetObjectItem(args_obj, "story_name"); const char* story = (sn && cJSON_IsString(sn) && sn->valuestring) ? sn->valuestring : "random"; + cJSON* url_item = cJSON_GetObjectItem(args_obj, "story_url"); ESP_LOGI(TAG, "故事名称: %s", story); - // 打断当前播放 + 通过WebSocket发送故事请求(和按story按钮一样) AbortSpeaking(kAbortReasonNone); - SendStoryRequest(); - - std::string msg = "正在为你获取故事"; + std::string msg; + if (url_item && cJSON_IsString(url_item) && url_item->valuestring && strlen(url_item->valuestring) > 0) { + // HTTPS方式:直接下载JSON音频文件播放 + ESP_LOGI(TAG, "[HTTPS播放] 使用URL方式: %s", url_item->valuestring); + HttpsPlaybackFromUrl(url_item->valuestring); + msg = "正在通过HTTPS为你播放故事"; + } else { + // WebSocket方式:通过服务器推送 + ESP_LOGI(TAG, "[WS播放] 使用WebSocket方式请求音频"); + SendStoryRequest(); + msg = "正在为你获取故事"; + } cJSON* call_id_item = cJSON_GetObjectItem(call, "id"); const char* call_id = (call_id_item && cJSON_IsString(call_id_item) && call_id_item->valuestring) ? call_id_item->valuestring : ""; if (protocol_ && call_id && call_id[0] != '\0') { @@ -1112,12 +1139,21 @@ void Application::Start() { } } } - // 讲故事功能:通过WebSocket发送故事请求 + // 讲故事功能:支持HTTPS下载或WebSocket两种方式 else if (strcmp(name->valuestring, "obtain_story") == 0) { - ESP_LOGI(TAG, "收到obtain_story工具调用,通过WebSocket请求故事"); + ESP_LOGI(TAG, "收到obtain_story工具调用"); + cJSON* url_item = cJSON_GetObjectItem(args_obj, "story_url"); AbortSpeaking(kAbortReasonNone); - SendStoryRequest(); - std::string msg = "正在为你获取故事"; + std::string msg; + if (url_item && cJSON_IsString(url_item) && url_item->valuestring && strlen(url_item->valuestring) > 0) { + ESP_LOGI(TAG, "[HTTPS播放] 使用URL方式: %s", url_item->valuestring); + HttpsPlaybackFromUrl(url_item->valuestring); + msg = "正在通过HTTPS为你播放故事"; + } else { + ESP_LOGI(TAG, "[WS播放] 使用WebSocket方式请求音频"); + SendStoryRequest(); + msg = "正在为你获取故事"; + } cJSON* call_id_item = cJSON_GetObjectItem(call, "id"); const char* call_id = (call_id_item && cJSON_IsString(call_id_item) && call_id_item->valuestring) ? call_id_item->valuestring : ""; if (protocol_ && call_id && call_id[0] != '\0') { @@ -1176,6 +1212,36 @@ void Application::Start() { display->SetEmotion(emotion_str.c_str()); }); } + } else if (strcmp(type->valuestring, "subtitle") == 0) { + // 火山 RTC 字幕消息:区分用户说的话和AI回答 + auto data_arr = cJSON_GetObjectItem(root, "data"); + if (data_arr && cJSON_IsArray(data_arr)) { + for (int i = 0; i < cJSON_GetArraySize(data_arr); ++i) { + auto item = cJSON_GetArrayItem(data_arr, i); + auto text = cJSON_GetObjectItem(item, "text"); + auto user_id = cJSON_GetObjectItem(item, "userId"); + auto definite = cJSON_GetObjectItem(item, "definite"); + if (!text || !cJSON_IsString(text) || !text->valuestring[0]) continue; + + bool is_final = definite && cJSON_IsTrue(definite); + // userId 以 "bot_" 开头为AI,其余为用户 + bool is_user = true; + if (user_id && cJSON_IsString(user_id)) { + if (strncmp(user_id->valuestring, "bot_", 4) == 0) { + is_user = false; + } + } + + const char* role = is_user ? "USER" : "AI"; + ESP_LOGI(TAG, "%s %s: %s", is_final ? "📝" : "..", role, text->valuestring); + + // 用户说话时立即中止HTTPS音频播放 + // subv字幕消息在协议层跳过了on_bot_message_,由此处直接处理 + if (is_user && https_playback_active_.load() && !https_playback_abort_.load()) { + AbortHttpsPlayback("检测到用户说话(字幕)"); + } + } + } } else if (strcmp(type->valuestring, "iot") == 0) { auto commands = cJSON_GetObjectItem(root, "commands"); if (commands != NULL) { @@ -1245,12 +1311,21 @@ void Application::Start() { } } } - // 讲故事功能:通过WebSocket发送故事请求 + // 讲故事功能:支持HTTPS下载或WebSocket两种方式 else if (strcmp(name->valuestring, "obtain_story") == 0) { - ESP_LOGI(TAG, "收到obtain_story工具调用,通过WebSocket请求故事"); + ESP_LOGI(TAG, "收到obtain_story工具调用"); + cJSON* url_item = cJSON_GetObjectItem(args_obj, "story_url"); AbortSpeaking(kAbortReasonNone); - SendStoryRequest(); - std::string msg = "正在为你获取故事"; + std::string msg; + if (url_item && cJSON_IsString(url_item) && url_item->valuestring && strlen(url_item->valuestring) > 0) { + ESP_LOGI(TAG, "[HTTPS播放] 使用URL方式: %s", url_item->valuestring); + HttpsPlaybackFromUrl(url_item->valuestring); + msg = "正在通过HTTPS为你播放故事"; + } else { + ESP_LOGI(TAG, "[WS播放] 使用WebSocket方式请求音频"); + SendStoryRequest(); + msg = "正在为你获取故事"; + } cJSON* call_id_item = cJSON_GetObjectItem(root, "call_id"); const char* call_id = (call_id_item && cJSON_IsString(call_id_item) && call_id_item->valuestring) ? call_id_item->valuestring : ""; if (protocol_ && call_id && call_id[0] != '\0') { @@ -1309,12 +1384,26 @@ void Application::Start() { } } } - // 讲故事功能:通过WebSocket发送故事请求 + // 讲故事功能:支持HTTPS下载或WebSocket两种方式 else if (strcmp(name->valuestring, "obtain_story") == 0) { - ESP_LOGI(TAG, "收到obtain_story工具调用,通过WebSocket请求故事"); + ESP_LOGI(TAG, "收到obtain_story工具调用"); + cJSON* args_parsed = nullptr; + if (arguments && cJSON_IsString(arguments) && arguments->valuestring) { + args_parsed = cJSON_Parse(arguments->valuestring); + } + cJSON* url_item = args_parsed ? cJSON_GetObjectItem(args_parsed, "story_url") : nullptr; AbortSpeaking(kAbortReasonNone); - SendStoryRequest(); - std::string msg = "正在为你获取故事"; + std::string msg; + if (url_item && cJSON_IsString(url_item) && url_item->valuestring && strlen(url_item->valuestring) > 0) { + ESP_LOGI(TAG, "[HTTPS播放] 使用URL方式: %s", url_item->valuestring); + HttpsPlaybackFromUrl(url_item->valuestring); + msg = "正在通过HTTPS为你播放故事"; + } else { + ESP_LOGI(TAG, "[WS播放] 使用WebSocket方式请求音频"); + SendStoryRequest(); + msg = "正在为你获取故事"; + } + if (args_parsed) cJSON_Delete(args_parsed); cJSON* call_id_item = cJSON_GetObjectItem(root, "call_id"); const char* call_id = (call_id_item && cJSON_IsString(call_id_item) && call_id_item->valuestring) ? call_id_item->valuestring : ""; if (protocol_ && call_id && call_id[0] != '\0') { @@ -1855,16 +1944,24 @@ void Application::OnAudioOutput() { auto opus = std::move(audio_decode_queue_.front()); audio_decode_queue_.pop_front(); + // 在出队时捕获opus解码标志,避免background_task异步执行时标志已变化 + // 导致残留的Opus帧被当作PCM播放(产生杂音) + bool is_opus_frame = opus_playback_active_.load(); lock.unlock(); - background_task_->Schedule([this, codec, opus = std::move(opus)]() mutable { + background_task_->Schedule([this, codec, opus = std::move(opus), is_opus_frame]() mutable { if (aborted_) { return; } + // 跳过已中止的HTTPS opus残留帧:出队时is_opus_frame=true,但中止后opus_playback_active_=false + // 不能用https_playback_abort_判断,因为故事任务退出时会将其清为false,导致残留帧漏过 + if (is_opus_frame && !opus_playback_active_.load()) { + return; + } std::vector pcm; bool decoded = false; - bool treat_as_pcm = (protocol_ && protocol_->downlink_is_pcm() && !ws_playback_active_.load()); + bool treat_as_pcm = (protocol_ && protocol_->downlink_is_pcm() && !is_opus_frame); if (!treat_as_pcm) { decoded = opus_decoder_->Decode(std::move(opus), pcm); } @@ -2187,6 +2284,12 @@ void Application::AbortSpeaking(AbortReason reason) { ESP_LOGI(TAG, "🔴 Abort speaking - immediate stop"); aborted_ = true; + // 中止HTTPS音频播放(如果正在进行) + if (https_playback_active_.load()) { + https_playback_abort_.store(true); + ESP_LOGI(TAG, "🔴 HTTPS音频播放中止信号已发送"); + } + // 🔧 更新安全操作时间戳 last_safe_operation_.store(std::chrono::steady_clock::now()); @@ -2208,6 +2311,21 @@ void Application::AbortSpeaking(AbortReason reason) { // ⚠️ 移除WaitForCompletion避免死锁,让后台任务通过aborted_标志自然结束 ESP_LOGI(TAG, "🔴 Audio queue cleared, background tasks will stop on next iteration"); + // 重启codec输出以清空I2S DMA缓冲区中残留音频,确保扬声器立即静音 + // 移除output_enabled()守卫,确保始终执行flush + if (background_task_) { + background_task_->Schedule([this]() { + auto codec = Board::GetInstance().GetAudioCodec(); + if (codec) { + ESP_LOGI(TAG, "DMA flush: output_enabled=%d", codec->output_enabled()); + codec->EnableOutput(false); + vTaskDelay(pdMS_TO_TICKS(10)); + codec->EnableOutput(true); + ESP_LOGI(TAG, "🔇 音频输出已重置,DMA缓冲区已清空"); + } + }); + } + // 🔧 修复:始终尝试发送中止消息以打断RTC下行(不受IsSafeToOperate限制) if (protocol_) { try { @@ -2238,24 +2356,741 @@ void Application::AbortSpeaking(AbortReason reason) { is_aborting_.store(false); } -// 发送讲故事请求 webscoket协议 -void Application::SendStoryRequest() { - if (!websocket_protocol_) { - InitializeWebsocketProtocol();// 初始化WebSocket协议 - if (!websocket_protocol_) { - ESP_LOGW(TAG, "WebSocket协议初始化失败"); - return; +// 中止HTTPS音频播放:清空队列、重置解码器、清除标志、DMA flush +void Application::AbortHttpsPlayback(const char* reason) { + ESP_LOGI(TAG, "🔴 %s,中止HTTPS音频播放", reason); + https_playback_abort_.store(true); + { + std::lock_guard lock(mutex_); + if (!audio_decode_queue_.empty()) { + ESP_LOGI(TAG, "清空HTTPS音频队列,大小=%zu", audio_decode_queue_.size()); + audio_decode_queue_.clear(); } } - Schedule([this]() { - ws_downlink_enabled_.store(true); - // 确保音频通道已打开 - if (!websocket_protocol_->IsAudioChannelOpened()) { - websocket_protocol_->OpenAudioChannel();// 打开音频通道 + ResetDecoder(); + opus_playback_active_.store(false); + https_playback_active_.store(false); + ESP_LOGI(TAG, "🔴 HTTPS播放标志已清除,RTC音频通道已打开"); + // DMA flush:用独立任务立即清空I2S DMA缓冲区 + // 不能用background_task_,RTC音频lambda会持续占用它导致延迟数秒 + xTaskCreate([](void* arg) { + auto codec = Board::GetInstance().GetAudioCodec(); + if (codec) { + ESP_LOGI(TAG, "DMA flush: output_enabled=%d", codec->output_enabled()); + codec->EnableOutput(false); + vTaskDelay(pdMS_TO_TICKS(10)); + codec->EnableOutput(true); + ESP_LOGI(TAG, "🔇 音频输出已重置,DMA缓冲区已清空"); } - websocket_protocol_->SendStoryRequest();// 发送故事请求 - ESP_LOGI(TAG, "通过WebSocket发送的故事请求!"); - }); + vTaskDelete(NULL); + }, "dma_flush", 4096, NULL, 10, NULL); +} + +// 通过故事API请求并播放故事(intro标题 + body正文无缝衔接) +void Application::SendStoryRequest() { + // 防止重复启动:opus_playback_active_ 在任务启动时设置,覆盖HTTP请求阶段 + if (https_playback_active_.load() || https_playback_abort_.load() || opus_playback_active_.load()) { + ESP_LOGW(TAG, "[故事API] 已有音频正在播放或退出中,忽略本次请求"); + return; + } + + xTaskCreate([](void* arg) { + auto& app = Application::GetInstance(); + // 先设置opus和abort标志(用于重复启动守卫和OnIncomingAudio阻断RTC PCM) + // 注意:https_playback_active_ 延迟到intro音频入队前设置, + // 这样HTTP请求期间(~500ms)残留的Bot subv消息不会触发OnBotMessage中止 + app.opus_playback_active_.store(true); + app.https_playback_abort_.store(false); + + // base64 解码查找表 + static uint8_t b64_table[256] = {0}; + static bool b64_inited = false; + if (!b64_inited) { + const char* chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + for (int c = 0; chars[c]; c++) { + b64_table[(uint8_t)chars[c]] = (uint8_t)c; + } + b64_inited = true; + } + + // ========== 步骤1: 请求故事API ========== + std::string mac = SystemInfo::GetBleMacAddress(); + // 转大写 + for (auto& c : mac) { + if (c >= 'a' && c <= 'f') c -= 32; + } + + char api_url[256]; + snprintf(api_url, sizeof(api_url), "%s?mac_address=%s", + CONFIG_STORY_API_URL, mac.c_str()); + + ESP_LOGI(TAG, "[故事API] 请求: %s", api_url); + ESP_LOGI(TAG, "[故事API] 空闲堆: %lu", (unsigned long)esp_get_free_heap_size()); + + esp_http_client_config_t api_config = {}; + api_config.url = api_url; + api_config.method = HTTP_METHOD_GET; + api_config.timeout_ms = 10000; + api_config.buffer_size = 2048; + api_config.buffer_size_tx = 512; + + esp_http_client_handle_t api_client = esp_http_client_init(&api_config); + if (!api_client) { + ESP_LOGE(TAG, "[故事API] HTTP客户端初始化失败"); + app.https_playback_active_.store(false); + app.opus_playback_active_.store(false); + vTaskDelete(NULL); + return; + } + + esp_err_t err = esp_http_client_open(api_client, 0); + if (err != ESP_OK) { + ESP_LOGE(TAG, "[故事API] 连接失败: %s", esp_err_to_name(err)); + esp_http_client_cleanup(api_client); + app.https_playback_active_.store(false); + app.opus_playback_active_.store(false); + vTaskDelete(NULL); + return; + } + + esp_http_client_fetch_headers(api_client); + int api_status = esp_http_client_get_status_code(api_client); + if (api_status != 200) { + ESP_LOGE(TAG, "[故事API] 请求失败,状态码: %d", api_status); + esp_http_client_close(api_client); + esp_http_client_cleanup(api_client); + app.https_playback_active_.store(false); + app.opus_playback_active_.store(false); + vTaskDelete(NULL); + return; + } + + // 读取API响应(通常 < 10KB) + std::string api_response; + char buf[1024]; + int rlen; + while ((rlen = esp_http_client_read(api_client, buf, sizeof(buf))) > 0) { + api_response.append(buf, rlen); + } + esp_http_client_close(api_client); + esp_http_client_cleanup(api_client); + + ESP_LOGI(TAG, "[故事API] 响应: %d 字节", (int)api_response.size()); + + if (app.https_playback_abort_.load()) { + app.https_playback_active_.store(false); + app.https_playback_abort_.store(false); + app.opus_playback_active_.store(false); + ESP_LOGI(TAG, "[故事API] HTTP请求阶段被中止"); + vTaskDelete(NULL); + return; + } + + // 解析外层JSON + cJSON* root = cJSON_Parse(api_response.c_str()); + api_response.clear(); + api_response.shrink_to_fit(); + + if (!root) { + ESP_LOGE(TAG, "[故事API] JSON解析失败"); + app.https_playback_active_.store(false); + app.opus_playback_active_.store(false); + vTaskDelete(NULL); + return; + } + + cJSON* code_item = cJSON_GetObjectItem(root, "code"); + if (!code_item || code_item->valueint != 0) { + cJSON* msg_item = cJSON_GetObjectItem(root, "message"); + ESP_LOGE(TAG, "[故事API] 服务端错误: %s", + (msg_item && msg_item->valuestring) ? msg_item->valuestring : "unknown"); + cJSON_Delete(root); + app.https_playback_active_.store(false); + app.opus_playback_active_.store(false); + vTaskDelete(NULL); + return; + } + + cJSON* data = cJSON_GetObjectItem(root, "data"); + cJSON* title_item = data ? cJSON_GetObjectItem(data, "title") : nullptr; + cJSON* intro_str = data ? cJSON_GetObjectItem(data, "intro_opus_data") : nullptr; + cJSON* opus_url_item = data ? cJSON_GetObjectItem(data, "opus_url") : nullptr; + + if (!intro_str || !cJSON_IsString(intro_str) || !intro_str->valuestring || + !opus_url_item || !cJSON_IsString(opus_url_item) || !opus_url_item->valuestring) { + ESP_LOGE(TAG, "[故事API] 缺少intro_opus_data或opus_url字段"); + cJSON_Delete(root); + app.https_playback_active_.store(false); + app.opus_playback_active_.store(false); + vTaskDelete(NULL); + return; + } + + ESP_LOGI(TAG, "[故事API] 标题: %s", + (title_item && title_item->valuestring) ? title_item->valuestring : "未知"); + + // 提取字符串后释放外层JSON + std::string intro_json_str = intro_str->valuestring; + std::string opus_url = opus_url_item->valuestring; + cJSON_Delete(root); + + // ========== 步骤2: 解析 intro_opus_data ========== + cJSON* intro_root = cJSON_Parse(intro_json_str.c_str()); + intro_json_str.clear(); + intro_json_str.shrink_to_fit(); + + if (!intro_root) { + ESP_LOGE(TAG, "[故事API] intro JSON解析失败"); + app.https_playback_active_.store(false); + app.opus_playback_active_.store(false); + vTaskDelete(NULL); + return; + } + + cJSON* intro_sr = cJSON_GetObjectItem(intro_root, "sample_rate"); + cJSON* intro_fd = cJSON_GetObjectItem(intro_root, "frame_duration_ms"); + cJSON* intro_frames = cJSON_GetObjectItem(intro_root, "frames"); + + if (!intro_frames || !cJSON_IsArray(intro_frames)) { + ESP_LOGE(TAG, "[故事API] intro缺少frames数组"); + cJSON_Delete(intro_root); + app.https_playback_active_.store(false); + app.opus_playback_active_.store(false); + vTaskDelete(NULL); + return; + } + + int sample_rate = (intro_sr && cJSON_IsNumber(intro_sr)) ? intro_sr->valueint : 16000; + int frame_duration = (intro_fd && cJSON_IsNumber(intro_fd)) ? intro_fd->valueint : 60; + int intro_count = cJSON_GetArraySize(intro_frames); + + ESP_LOGI(TAG, "[故事API] intro: 采样率=%d, 帧时长=%dms, 帧数=%d (%.1f秒)", + sample_rate, frame_duration, intro_count, + intro_count * frame_duration / 1000.0f); + + app.SetDecodeSampleRate(sample_rate, frame_duration); + + // 音频即将入队,现在激活播放标志,允许OnBotMessage中止 + app.https_playback_active_.store(true); + + // ========== 步骤3: 入队 intro frames ========== + int enqueued = 0; + int errors = 0; + + for (int i = 0; i < intro_count; i++) { + if (app.https_playback_abort_.load()) break; + + cJSON* fi = cJSON_GetArrayItem(intro_frames, i); + if (!fi || !cJSON_IsString(fi) || !fi->valuestring) { errors++; continue; } + + const char* b64 = fi->valuestring; + size_t b64_len = strlen(b64); + if (b64_len == 0) { errors++; continue; } + + size_t out_len = (b64_len * 3) / 4; + if (b64_len >= 1 && b64[b64_len - 1] == '=') out_len--; + if (b64_len >= 2 && b64[b64_len - 2] == '=') out_len--; + + std::vector frame(out_len); + size_t j = 0, k = 0; + while (j < b64_len) { + uint32_t a = b64_table[(uint8_t)b64[j++]]; + uint32_t b = (j < b64_len) ? b64_table[(uint8_t)b64[j++]] : 0; + uint32_t c = (j < b64_len) ? b64_table[(uint8_t)b64[j++]] : 0; + uint32_t d = (j < b64_len) ? b64_table[(uint8_t)b64[j++]] : 0; + uint32_t triple = (a << 18) | (b << 12) | (c << 6) | d; + if (k < out_len) frame[k++] = (triple >> 16) & 0xFF; + if (k < out_len) frame[k++] = (triple >> 8) & 0xFF; + if (k < out_len) frame[k++] = triple & 0xFF; + } + + { + std::lock_guard lock(app.mutex_); + app.audio_decode_queue_.emplace_back(std::move(frame)); + } + enqueued++; + + // 队列节流 + while (!app.https_playback_abort_.load()) { + size_t qs; + { std::lock_guard lock(app.mutex_); qs = app.audio_decode_queue_.size(); } + if (qs < 50) break; + vTaskDelay(pdMS_TO_TICKS(30)); + } + } + + cJSON_Delete(intro_root); + ESP_LOGI(TAG, "[故事API] intro入队完成: %d帧, 错误: %d", enqueued, errors); + + if (app.https_playback_abort_.load()) { + ESP_LOGI(TAG, "[故事API] intro阶段被中止"); + app.https_playback_active_.store(false); + app.https_playback_abort_.store(false); + app.opus_playback_active_.store(false); + vTaskDelete(NULL); + return; + } + + // ========== 步骤4: 下载 opus_url 正文 ========== + ESP_LOGI(TAG, "[故事API] 开始下载正文: %s", opus_url.c_str()); + + esp_http_client_config_t opus_config = {}; + opus_config.url = opus_url.c_str(); + opus_config.method = HTTP_METHOD_GET; + opus_config.transport_type = HTTP_TRANSPORT_OVER_SSL; + opus_config.timeout_ms = 15000; + opus_config.buffer_size = 2048; + opus_config.buffer_size_tx = 512; +#ifdef CONFIG_MBEDTLS_CERTIFICATE_BUNDLE + opus_config.crt_bundle_attach = esp_crt_bundle_attach; +#endif + + esp_http_client_handle_t opus_client = esp_http_client_init(&opus_config); + if (!opus_client) { + ESP_LOGE(TAG, "[故事API] opus HTTP初始化失败"); + app.https_playback_active_.store(false); + app.https_playback_abort_.store(false); + app.opus_playback_active_.store(false); + vTaskDelete(NULL); + return; + } + + err = esp_http_client_open(opus_client, 0); + if (err != ESP_OK) { + ESP_LOGE(TAG, "[故事API] opus连接失败: %s", esp_err_to_name(err)); + esp_http_client_cleanup(opus_client); + app.https_playback_active_.store(false); + app.https_playback_abort_.store(false); + app.opus_playback_active_.store(false); + vTaskDelete(NULL); + return; + } + + int64_t opus_content_len = esp_http_client_fetch_headers(opus_client); + int opus_status = esp_http_client_get_status_code(opus_client); + ESP_LOGI(TAG, "[故事API] opus状态码: %d, 长度: %lld", opus_status, (long long)opus_content_len); + + if (opus_status != 200) { + ESP_LOGE(TAG, "[故事API] opus请求失败,状态码: %d", opus_status); + esp_http_client_close(opus_client); + esp_http_client_cleanup(opus_client); + app.https_playback_active_.store(false); + app.https_playback_abort_.store(false); + app.opus_playback_active_.store(false); + vTaskDelete(NULL); + return; + } + + std::string opus_json; + if (opus_content_len > 0) opus_json.reserve(opus_content_len); + int total_read = 0; + while ((rlen = esp_http_client_read(opus_client, buf, sizeof(buf))) > 0) { + if (app.https_playback_abort_.load()) break; + opus_json.append(buf, rlen); + total_read += rlen; + } + esp_http_client_close(opus_client); + esp_http_client_cleanup(opus_client); + + ESP_LOGI(TAG, "[故事API] opus下载完成: %d 字节, 堆: %lu", + total_read, (unsigned long)esp_get_free_heap_size()); + + if (app.https_playback_abort_.load()) { + ESP_LOGI(TAG, "[故事API] opus下载被中止"); + app.https_playback_active_.store(false); + app.https_playback_abort_.store(false); + app.opus_playback_active_.store(false); + vTaskDelete(NULL); + return; + } + + // ========== 步骤5: 解析并入队 body frames ========== + cJSON* opus_root = cJSON_Parse(opus_json.c_str()); + opus_json.clear(); + opus_json.shrink_to_fit(); + ESP_LOGI(TAG, "[故事API] opus JSON已释放, 堆: %lu", (unsigned long)esp_get_free_heap_size()); + + if (!opus_root) { + ESP_LOGE(TAG, "[故事API] opus JSON解析失败"); + app.https_playback_active_.store(false); + app.https_playback_abort_.store(false); + app.opus_playback_active_.store(false); + vTaskDelete(NULL); + return; + } + + cJSON* body_frames = cJSON_GetObjectItem(opus_root, "frames"); + if (!body_frames || !cJSON_IsArray(body_frames)) { + ESP_LOGE(TAG, "[故事API] opus缺少frames数组"); + cJSON_Delete(opus_root); + app.https_playback_active_.store(false); + app.https_playback_abort_.store(false); + app.opus_playback_active_.store(false); + vTaskDelete(NULL); + return; + } + + // 检查body采样率是否与intro不同 + cJSON* body_sr = cJSON_GetObjectItem(opus_root, "sample_rate"); + cJSON* body_fd = cJSON_GetObjectItem(opus_root, "frame_duration_ms"); + int body_sample_rate = (body_sr && cJSON_IsNumber(body_sr)) ? body_sr->valueint : sample_rate; + int body_frame_duration = (body_fd && cJSON_IsNumber(body_fd)) ? body_fd->valueint : frame_duration; + int body_count = cJSON_GetArraySize(body_frames); + + ESP_LOGI(TAG, "[故事API] body: 采样率=%d, 帧时长=%dms, 帧数=%d (%.1f秒)", + body_sample_rate, body_frame_duration, body_count, + body_count * body_frame_duration / 1000.0f); + + if (body_sample_rate != sample_rate || body_frame_duration != frame_duration) { + app.SetDecodeSampleRate(body_sample_rate, body_frame_duration); + } + + int body_enqueued = 0; + int body_errors = 0; + + for (int i = 0; i < body_count; i++) { + if (app.https_playback_abort_.load()) { + ESP_LOGI(TAG, "[故事API] body入队中止: %d/%d", body_enqueued, body_count); + break; + } + + cJSON* fi = cJSON_GetArrayItem(body_frames, i); + if (!fi || !cJSON_IsString(fi) || !fi->valuestring) { body_errors++; continue; } + + const char* b64 = fi->valuestring; + size_t b64_len = strlen(b64); + if (b64_len == 0) { body_errors++; continue; } + + size_t out_len = (b64_len * 3) / 4; + if (b64_len >= 1 && b64[b64_len - 1] == '=') out_len--; + if (b64_len >= 2 && b64[b64_len - 2] == '=') out_len--; + + std::vector frame(out_len); + size_t j = 0, k = 0; + while (j < b64_len) { + uint32_t a = b64_table[(uint8_t)b64[j++]]; + uint32_t b = (j < b64_len) ? b64_table[(uint8_t)b64[j++]] : 0; + uint32_t c = (j < b64_len) ? b64_table[(uint8_t)b64[j++]] : 0; + uint32_t d = (j < b64_len) ? b64_table[(uint8_t)b64[j++]] : 0; + uint32_t triple = (a << 18) | (b << 12) | (c << 6) | d; + if (k < out_len) frame[k++] = (triple >> 16) & 0xFF; + if (k < out_len) frame[k++] = (triple >> 8) & 0xFF; + if (k < out_len) frame[k++] = triple & 0xFF; + } + + { + std::lock_guard lock(app.mutex_); + app.audio_decode_queue_.emplace_back(std::move(frame)); + } + body_enqueued++; + + // 队列节流 + while (!app.https_playback_abort_.load()) { + size_t qs; + { std::lock_guard lock(app.mutex_); qs = app.audio_decode_queue_.size(); } + if (qs < 50) break; + vTaskDelay(pdMS_TO_TICKS(30)); + } + + // 每100帧打印进度 + if (body_enqueued % 100 == 0) { + size_t qs; + { std::lock_guard lock(app.mutex_); qs = app.audio_decode_queue_.size(); } + ESP_LOGI(TAG, "[故事API] body进度: %d/%d (%.0f%%), 队列: %zu, 堆: %lu", + body_enqueued, body_count, + body_enqueued * 100.0f / body_count, qs, + (unsigned long)esp_get_free_heap_size()); + } + } + + cJSON_Delete(opus_root); + ESP_LOGI(TAG, "[故事API] body入队完成: %d帧, 错误: %d", body_enqueued, body_errors); + + // ========== 步骤6: 等待播放完毕 ========== + if (!app.https_playback_abort_.load()) { + ESP_LOGI(TAG, "[故事API] 全部入队完成,等待播放完毕..."); + while (!app.https_playback_abort_.load()) { + size_t qs; + { std::lock_guard lock(app.mutex_); qs = app.audio_decode_queue_.size(); } + if (qs == 0) break; + vTaskDelay(pdMS_TO_TICKS(100)); + } + } + + bool was_aborted = app.https_playback_abort_.load(); + app.https_playback_active_.store(false); + app.https_playback_abort_.store(false); + app.opus_playback_active_.store(false); + ESP_LOGI(TAG, "[故事API] 播放结束, aborted=%d, 堆: %lu", + was_aborted, (unsigned long)esp_get_free_heap_size()); + vTaskDelete(NULL); + + }, "story_play", 10240, NULL, 5, NULL); +} + +// 通过HTTPS下载JSON并流式播放音频(故事/歌曲等) +void Application::HttpsPlaybackFromUrl(const std::string& url) { + // 防止重复启动:opus_playback_active_ 在任务启动时设置,覆盖HTTP请求阶段 + if (https_playback_active_.load() || https_playback_abort_.load() || opus_playback_active_.load()) { + ESP_LOGW(TAG, "[HTTPS播放] 已有音频正在播放或退出中,忽略本次请求"); + return; + } + + // 在独立任务中执行,避免阻塞调用线程 + std::string url_copy = url; + xTaskCreate([](void* arg) { + std::string* url_ptr = static_cast(arg); + std::string playback_url = std::move(*url_ptr); + delete url_ptr; + + auto& app = Application::GetInstance(); + // 先设置opus和abort标志(用于重复启动守卫和OnIncomingAudio阻断RTC PCM) + // https_playback_active_ 延迟到音频入队前设置,防止残留subv触发OnBotMessage + app.opus_playback_active_.store(true); + app.https_playback_abort_.store(false); + + ESP_LOGI(TAG, "[HTTPS播放] 开始下载: %s", playback_url.c_str()); + ESP_LOGI(TAG, "[HTTPS播放] 空闲堆内存: %lu 字节", (unsigned long)esp_get_free_heap_size()); + + // 配置HTTP客户端 + esp_http_client_config_t config = {}; + config.url = playback_url.c_str(); + config.method = HTTP_METHOD_GET; + config.transport_type = HTTP_TRANSPORT_OVER_SSL; + config.timeout_ms = 15000; + config.buffer_size = 2048; // 接收缓冲区(节省内存) + config.buffer_size_tx = 512; +#ifdef CONFIG_MBEDTLS_CERTIFICATE_BUNDLE + config.crt_bundle_attach = esp_crt_bundle_attach; +#endif + + esp_http_client_handle_t client = esp_http_client_init(&config); + if (!client) { + ESP_LOGE(TAG, "[HTTPS播放] HTTP客户端初始化失败"); + app.https_playback_active_.store(false); + app.opus_playback_active_.store(false); + vTaskDelete(NULL); + return; + } + + esp_err_t err = esp_http_client_open(client, 0); + if (err != ESP_OK) { + ESP_LOGE(TAG, "[HTTPS播放] HTTP连接失败: %s", esp_err_to_name(err)); + esp_http_client_cleanup(client); + app.https_playback_active_.store(false); + app.opus_playback_active_.store(false); + vTaskDelete(NULL); + return; + } + + int64_t content_length = esp_http_client_fetch_headers(client); + int status_code = esp_http_client_get_status_code(client); + ESP_LOGI(TAG, "[HTTPS播放] HTTP状态码: %d, 内容长度: %lld", status_code, (long long)content_length); + + if (status_code != 200) { + ESP_LOGE(TAG, "[HTTPS播放] HTTP请求失败,状态码: %d", status_code); + esp_http_client_close(client); + esp_http_client_cleanup(client); + app.https_playback_active_.store(false); + app.opus_playback_active_.store(false); + vTaskDelete(NULL); + return; + } + + // 流式读取整个JSON(必须完整读取才能解析frames数组) + // 但使用分块读取减少单次分配峰值 + std::string json_data; + if (content_length > 0) { + json_data.reserve(content_length); + } + char read_buf[2048]; + int read_len; + int total_read = 0; + while ((read_len = esp_http_client_read(client, read_buf, sizeof(read_buf))) > 0) { + if (app.https_playback_abort_.load()) { + ESP_LOGI(TAG, "[HTTPS播放] 下载被中止"); + break; + } + json_data.append(read_buf, read_len); + total_read += read_len; + } + + // 关闭HTTP连接,释放TLS资源 + esp_http_client_close(client); + esp_http_client_cleanup(client); + ESP_LOGI(TAG, "[HTTPS播放] 下载完成: %d 字节, 堆剩余: %lu", + total_read, (unsigned long)esp_get_free_heap_size()); + + if (app.https_playback_abort_.load()) { + ESP_LOGI(TAG, "[HTTPS播放] 播放已取消,释放资源"); + app.https_playback_active_.store(false); + app.https_playback_abort_.store(false); + app.opus_playback_active_.store(false); + vTaskDelete(NULL); + return; + } + + // 解析JSON + cJSON* root = cJSON_Parse(json_data.c_str()); + // 解析完成后立即释放原始JSON字符串 + json_data.clear(); + json_data.shrink_to_fit(); + ESP_LOGI(TAG, "[HTTPS播放] JSON字符串已释放, 堆剩余: %lu", + (unsigned long)esp_get_free_heap_size()); + + if (!root) { + ESP_LOGE(TAG, "[HTTPS播放] JSON解析失败"); + app.https_playback_active_.store(false); + app.opus_playback_active_.store(false); + vTaskDelete(NULL); + return; + } + + // 读取音频参数 + cJSON* sample_rate_item = cJSON_GetObjectItem(root, "sample_rate"); + cJSON* channels_item = cJSON_GetObjectItem(root, "channels"); + cJSON* frame_duration_item = cJSON_GetObjectItem(root, "frame_duration_ms"); + cJSON* frames_array = cJSON_GetObjectItem(root, "frames"); + + if (!frames_array || !cJSON_IsArray(frames_array)) { + ESP_LOGE(TAG, "[HTTPS播放] JSON中缺少frames数组"); + cJSON_Delete(root); + app.https_playback_active_.store(false); + app.opus_playback_active_.store(false); + vTaskDelete(NULL); + return; + } + + int sample_rate = (sample_rate_item && cJSON_IsNumber(sample_rate_item)) ? sample_rate_item->valueint : 16000; + int channels = (channels_item && cJSON_IsNumber(channels_item)) ? channels_item->valueint : 1; + int frame_duration = (frame_duration_item && cJSON_IsNumber(frame_duration_item)) ? frame_duration_item->valueint : 60; + int frame_count = cJSON_GetArraySize(frames_array); + + ESP_LOGI(TAG, "[HTTPS播放] 音频参数: 采样率=%d, 通道=%d, 帧时长=%dms, 总帧数=%d", + sample_rate, channels, frame_duration, frame_count); + ESP_LOGI(TAG, "[HTTPS播放] 预计时长: %.1f 秒", frame_count * frame_duration / 1000.0f); + + // 设置解码器采样率(复用现有Opus解码器) + app.SetDecodeSampleRate(sample_rate, frame_duration); + + // 音频即将入队,现在激活播放标志,允许OnBotMessage中止 + app.https_playback_active_.store(true); + + // 逐帧base64解码并入队播放 + int enqueued = 0; + int decode_errors = 0; + + // base64 解码查找表(C++ 兼容初始化) + static uint8_t b64_table[256] = {0}; + static bool b64_inited = false; + if (!b64_inited) { + const char* chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + for (int c = 0; chars[c]; c++) { + b64_table[(uint8_t)chars[c]] = (uint8_t)c; + } + b64_inited = true; + } + + for (int i = 0; i < frame_count; i++) { + if (app.https_playback_abort_.load()) { + ESP_LOGI(TAG, "[HTTPS播放] 播放中止,已入队 %d/%d 帧", enqueued, frame_count); + break; + } + + cJSON* frame_item = cJSON_GetArrayItem(frames_array, i); + if (!frame_item || !cJSON_IsString(frame_item) || !frame_item->valuestring) { + decode_errors++; + continue; + } + + const char* b64 = frame_item->valuestring; + size_t b64_len = strlen(b64); + if (b64_len == 0) { + decode_errors++; + continue; + } + + // base64 解码 + size_t out_len = (b64_len * 3) / 4; + if (b64_len >= 1 && b64[b64_len - 1] == '=') out_len--; + if (b64_len >= 2 && b64[b64_len - 2] == '=') out_len--; + + std::vector opus_frame(out_len); + size_t j = 0, k = 0; + while (j < b64_len) { + uint32_t sextet_a = (j < b64_len) ? b64_table[(uint8_t)b64[j++]] : 0; + uint32_t sextet_b = (j < b64_len) ? b64_table[(uint8_t)b64[j++]] : 0; + uint32_t sextet_c = (j < b64_len) ? b64_table[(uint8_t)b64[j++]] : 0; + uint32_t sextet_d = (j < b64_len) ? b64_table[(uint8_t)b64[j++]] : 0; + uint32_t triple = (sextet_a << 18) | (sextet_b << 12) | (sextet_c << 6) | sextet_d; + if (k < out_len) opus_frame[k++] = (triple >> 16) & 0xFF; + if (k < out_len) opus_frame[k++] = (triple >> 8) & 0xFF; + if (k < out_len) opus_frame[k++] = triple & 0xFF; + } + + // 入队到音频解码队列(和WebSocket入队方式完全一致) + { + std::lock_guard lock(app.mutex_); + app.audio_decode_queue_.emplace_back(std::move(opus_frame)); + } + enqueued++; + + // 控制入队速度:队列过大时等待消费,避免内存堆积 + // 每帧60ms,队列超过50帧(3秒缓冲)时等待 + while (!app.https_playback_abort_.load()) { + size_t queue_size; + { + std::lock_guard lock(app.mutex_); + queue_size = app.audio_decode_queue_.size(); + } + if (queue_size < 50) break; + vTaskDelay(pdMS_TO_TICKS(30)); // 等待消费 + } + + // 每100帧打印一次进度 + if (enqueued % 100 == 0) { + size_t queue_size; + { + std::lock_guard lock(app.mutex_); + queue_size = app.audio_decode_queue_.size(); + } + ESP_LOGI(TAG, "[HTTPS播放] 进度: %d/%d 帧 (%.0f%%), 队列: %zu, 堆: %lu", + enqueued, frame_count, enqueued * 100.0f / frame_count, + queue_size, (unsigned long)esp_get_free_heap_size()); + } + } + + // 释放cJSON解析树 + cJSON_Delete(root); + ESP_LOGI(TAG, "[HTTPS播放] JSON解析树已释放, 堆剩余: %lu", + (unsigned long)esp_get_free_heap_size()); + + if (app.https_playback_abort_.load()) { + ESP_LOGI(TAG, "[HTTPS播放] 播放被用户中止,入队 %d 帧,解码错误 %d", + enqueued, decode_errors); + } else { + ESP_LOGI(TAG, "[HTTPS播放] 全部入队完成: %d 帧,解码错误 %d,等待播放完毕...", + enqueued, decode_errors); + } + + // 等待队列播放完毕(或被中止) + while (!app.https_playback_abort_.load()) { + size_t queue_size; + { + std::lock_guard lock(app.mutex_); + queue_size = app.audio_decode_queue_.size(); + } + if (queue_size == 0) break; + vTaskDelay(pdMS_TO_TICKS(100)); + } + + app.https_playback_active_.store(false); + app.https_playback_abort_.store(false); + app.opus_playback_active_.store(false); + ESP_LOGI(TAG, "[HTTPS播放] 播放结束, 最终堆剩余: %lu", + (unsigned long)esp_get_free_heap_size()); + vTaskDelete(NULL); + }, "https_play", 8192, new std::string(url_copy), 5, NULL); } // 设置监听模式 @@ -3053,7 +3888,7 @@ void Application::InitializeWebsocketProtocol() { if (!ws_downlink_enabled_.load()) { return; } - ws_playback_active_.store(true); + opus_playback_active_.store(true); std::lock_guard lock(mutex_); size_t len = data.size(); audio_decode_queue_.emplace_back(std::move(data)); diff --git a/main/application.h b/main/application.h index 457f676..0c95c86 100644 --- a/main/application.h +++ b/main/application.h @@ -70,7 +70,9 @@ public: void Alert(const char* status, const char* message, const char* emotion = "", const std::string_view& sound = "");// 警报管理 状态、消息、情感、声音 void DismissAlert();// 关闭警报 void AbortSpeaking(AbortReason reason);// 打断语音播报 - void SendStoryRequest(); // 发送讲故事 请求 + void AbortHttpsPlayback(const char* reason);// 中止HTTPS音频播放并清空DMA + void SendStoryRequest(); // 发送讲故事 请求(WebSocket方式) + void HttpsPlaybackFromUrl(const std::string& url); // 通过HTTPS下载JSON并播放音频(故事/歌曲等) void ToggleChatState();// 切换聊天状态 void ToggleListeningState();// 切换监听状态 void StartListening();// 开始监听 @@ -145,7 +147,9 @@ private: bool realtime_chat_enabled_ = false; #endif std::atomic ws_downlink_enabled_{true};// 🌐 WebSocket下行通道是否启用 - std::atomic ws_playback_active_{false};// 🌐 WebSocket下行播放活跃标志 + std::atomic opus_playback_active_{false};// 🌐 Opus解码播放活跃标志(WS/HTTPS共用) + std::atomic https_playback_active_{false};// 🌐 HTTPS音频播放进行中标志 + std::atomic https_playback_abort_{false};// 🌐 HTTPS音频播放中止标志 bool aborted_ = false; bool voice_detected_ = false; bool audio_paused_ = false; // 音频暂停状态标志 diff --git a/main/protocols/protocol.cc b/main/protocols/protocol.cc index 283de46..6a3a419 100644 --- a/main/protocols/protocol.cc +++ b/main/protocols/protocol.cc @@ -24,6 +24,10 @@ void Protocol::OnNetworkError(std::function ca on_network_error_ = callback; } +void Protocol::OnBotMessage(std::function callback) { + on_bot_message_ = callback; +} + void Protocol::SetError(const std::string& message) { error_occurred_ = true; if (on_network_error_ != nullptr) { diff --git a/main/protocols/protocol.h b/main/protocols/protocol.h index 81747ea..7e9c635 100644 --- a/main/protocols/protocol.h +++ b/main/protocols/protocol.h @@ -49,6 +49,7 @@ public: void OnAudioChannelOpened(std::function callback); void OnAudioChannelClosed(std::function callback); void OnNetworkError(std::function callback); + void OnBotMessage(std::function callback); virtual void Start() = 0; virtual bool OpenAudioChannel() = 0; @@ -76,6 +77,7 @@ protected: std::function on_audio_channel_opened_; std::function on_audio_channel_closed_; std::function on_network_error_; + std::function on_bot_message_; int server_sample_rate_ = 24000; int server_frame_duration_ = 60; diff --git a/main/protocols/volc_rtc_protocol.cc b/main/protocols/volc_rtc_protocol.cc index aeb6ca4..49ffc23 100644 --- a/main/protocols/volc_rtc_protocol.cc +++ b/main/protocols/volc_rtc_protocol.cc @@ -579,16 +579,23 @@ void VolcRtcProtocol::DataCallback(void* context, const void* data, size_t len, if (data && len > 0) { const uint8_t* buf = static_cast(data); std::string json_text; - if (info->info.message.is_binary && len >= 8) { + // 检测二进制前缀格式: [prefix(4字节)] + [json_len(4字节大端)] + [JSON] + // 注意: SDK DataCallback中 is_binary 始终为false,不能依赖此字段 + bool is_subv = false; + if (len >= 8) { bool is_ctrl = (memcmp(buf, "ctrl", 4) == 0); bool is_conv = (memcmp(buf, "conv", 4) == 0); bool is_tool = (memcmp(buf, "tool", 4) == 0); - if (is_ctrl || is_conv || is_tool) { + is_subv = (memcmp(buf, "subv", 4) == 0); + bool is_info = (memcmp(buf, "info", 4) == 0); + if (is_ctrl || is_conv || is_tool || is_subv || is_info) { uint32_t json_len = (uint32_t)((buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | (buf[7])); if (json_len > 0 && (size_t)(8 + json_len) <= len) { json_text.assign(reinterpret_cast(buf + 8), json_len); - if (!protocol->suppress_incoming_message_log_) { - ESP_LOGI(TAG, "接收下行二进制消息(%s): %.*s", is_ctrl ? "ctrl" : (is_conv ? "conv" : "tool"), (int)json_text.size(), json_text.c_str()); + // 字幕消息不打印内容(频率高) + if (!is_subv && !protocol->suppress_incoming_message_log_) { + const char* prefix = is_ctrl ? "ctrl" : (is_conv ? "conv" : (is_tool ? "tool" : "info")); + ESP_LOGI(TAG, "接收下行消息(%s): %.*s", prefix, (int)json_text.size(), json_text.c_str()); } } } @@ -599,48 +606,52 @@ void VolcRtcProtocol::DataCallback(void* context, const void* data, size_t len, ESP_LOGI(TAG, "接收下行消息: %.*s", (int)json_text.size(), json_text.c_str()); } } - cJSON* root = cJSON_Parse(json_text.c_str()); - if (root) { - const char* sid_keys[] = {"sessionId", "session_id", "sid"}; - cJSON* sid = nullptr; - for (size_t i = 0; i < sizeof(sid_keys) / sizeof(sid_keys[0]); ++i) { - sid = cJSON_GetObjectItem(root, sid_keys[i]); - if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') { - break; + + // 非subv消息立即通知应用层中止HTTPS播放(尽早触发,不等JSON解析) + // subv字幕消息由应用层subtitle handler处理(可区分USER/AI) + if (!is_subv && protocol->on_bot_message_) { + protocol->on_bot_message_(); } - sid = nullptr; - } - if (!sid) { - const char* containers[] = {"data", "payload", "context", "session"}; - for (size_t i = 0; i < sizeof(containers) / sizeof(containers[0]); ++i) { - cJSON* obj = cJSON_GetObjectItem(root, containers[i]); - if (obj && cJSON_IsObject(obj)) { - for (size_t j = 0; j < sizeof(sid_keys) / sizeof(sid_keys[0]); ++j) { - sid = cJSON_GetObjectItem(obj, sid_keys[j]); - if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') { - break; + + cJSON* root = cJSON_Parse(json_text.c_str()); + if (root) { + // 提取 Session ID(支持多种字段名和嵌套位置) + const char* sid_keys[] = {"sessionId", "session_id", "sid"}; + cJSON* sid = nullptr; + for (size_t i = 0; i < sizeof(sid_keys) / sizeof(sid_keys[0]); ++i) { + sid = cJSON_GetObjectItem(root, sid_keys[i]); + if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') break; + sid = nullptr; + } + if (!sid) { + const char* containers[] = {"data", "payload", "context", "session"}; + for (size_t i = 0; i < sizeof(containers) / sizeof(containers[0]); ++i) { + cJSON* obj = cJSON_GetObjectItem(root, containers[i]); + if (obj && cJSON_IsObject(obj)) { + for (size_t j = 0; j < sizeof(sid_keys) / sizeof(sid_keys[0]); ++j) { + sid = cJSON_GetObjectItem(obj, sid_keys[j]); + if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') break; + } } + if (sid) break; } } - if (sid) break; + if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') { + protocol->session_id_ = sid->valuestring; + ESP_LOGI(TAG, "Session ID set: %s", protocol->session_id_.c_str()); + if (protocol->is_audio_channel_opened_ && protocol->start_listening_pending_) { + ListeningMode m = protocol->pending_listening_mode_; + protocol->start_listening_pending_ = false; + protocol->SendStartListening(m); + } + } + if (protocol->on_incoming_json_) { + protocol->on_incoming_json_(root); + } + cJSON_Delete(root); } } - if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') { - protocol->session_id_ = sid->valuestring; - ESP_LOGI(TAG, "Session ID set: %s", protocol->session_id_.c_str()); - if (protocol->is_audio_channel_opened_ && protocol->start_listening_pending_) { - ListeningMode m = protocol->pending_listening_mode_; - protocol->start_listening_pending_ = false; - protocol->SendStartListening(m); - } - } - if (protocol->on_incoming_json_) { - protocol->on_incoming_json_(root); - } - cJSON_Delete(root); } - } - } }