#include "volc_rtc_protocol.h" #include #include "esp_log.h" #include "sdkconfig.h" #include "freertos/FreeRTOS.h" #include "freertos/task.h" #include "esp_heap_caps.h" #include "esp_system.h" #include #include #include #include #include #include // 新增包含 system_info.h 头文件以使用 SystemInfo 类 #include "system_info.h" #include "application.h" // SNTP is initialized in WiFi board after network is up; no duplicate init here #include "base/volc_device_manager.h" #include "settings.h" static const char* TAG = "VolcRtcProtocol"; VolcRtcProtocol::VolcRtcProtocol() { event_group_handle_ = xEventGroupCreate(); } VolcRtcProtocol::~VolcRtcProtocol() { if (event_group_handle_) { vEventGroupDelete(event_group_handle_); } if (rtc_handle_) { volc_rtc_stop(rtc_handle_); volc_rtc_destroy(rtc_handle_); } // 释放动态分配的设备名称内存 if (iot_info_.device_name && iot_info_.device_name != (char*)CONFIG_VOLC_DEVICE_NAME) { free(iot_info_.device_name); iot_info_.device_name = nullptr; } } void VolcRtcProtocol::Start() { ESP_LOGI(TAG, "VolcRtcProtocol 开始启动...");// VolcRtcProtocol 开始启动... esp_log_level_set(TAG, ESP_LOG_DEBUG); // 注释掉所有文件系统相关操作,避免设备重启 // 这些操作需要文件系统支持,但当前设备可能没有正确挂载文件系统 // ESP_LOGI(TAG, "跳过文件系统操作以防止设备重启");// 跳过文件系统操作以防止设备重启 // TODO: Implement proper file system initialization if file logging is needed // 禁用获取当前工作目录的操作,避免文件系统访问 // TODO: Re-enable if filesystem is properly initialized // ESP_LOGI(TAG, "当前工作目录检查已禁用,以防止文件系统访问");// 当前工作目录检查已禁用,以防止文件系统访问 // 如果已有RTC实例,先停止并销毁 if (rtc_handle_) { volc_rtc_stop(rtc_handle_); volc_rtc_destroy(rtc_handle_); rtc_handle_ = nullptr; } // 创建火山RTC配置 cJSON* config = cJSON_CreateObject(); if (!config) { ESP_LOGE(TAG, "RTC配置创建失败");// RTC配置创建失败 SetError("Failed to create RTC config"); return; } // 添加必要的RTC配置项 cJSON* audio_config = cJSON_CreateObject(); if (audio_config) { cJSON_AddBoolToObject(audio_config, "publish", true); cJSON_AddBoolToObject(audio_config, "subscribe", true); cJSON_AddNumberToObject(audio_config, "codec", 4); // 设置音频编解码器为4(根据设计文档) cJSON_AddItemToObject(config, "audio", audio_config);// 添加音频配置到RTC配置 } cJSON* video_config = cJSON_CreateObject(); if (video_config) { cJSON_AddBoolToObject(video_config, "publish", false); cJSON_AddBoolToObject(video_config, "subscribe", false); cJSON_AddNumberToObject(video_config, "codec", 1); // 设置视频编解码器为1(根据设计文档) cJSON_AddItemToObject(config, "video", video_config); } cJSON_AddNumberToObject(config, "log_level", 1); // 设置日志级别 // 添加参数数组,与 Airhub_Rtc_h 项目保持一致 cJSON* params = cJSON_CreateArray(); if (params) { // 只输出日志到控制台,不输出到文件 cJSON_AddItemToArray(params, cJSON_CreateString("{\"debug\":{\"log_to_console\":1}}"));// 添加日志到控制台配置 cJSON_AddItemToArray(params, cJSON_CreateString("{\"audio\":{\"codec\":{\"internal\":{\"enable\":1}}}}"));// 添加音频编解码器内部配置,启用 SDK 内部编解码 cJSON_AddItemToArray(params, cJSON_CreateString("{\"rtc\":{\"access\":{\"concurrent_requests\":1}}}"));// 添加RTC并发请求配置 cJSON_AddItemToArray(params, cJSON_CreateString("{\"rtc\":{\"ice\":{\"concurrent_agents\":1}}}"));// 添加RTC并发ICE代理配置 cJSON_AddItemToObject(config, "params", params); } // 创建IoT信息并优先从NVS加载 memset(&iot_info_, 0, sizeof(iot_info_)); iot_info_.instance_id = (char*)CONFIG_VOLC_INSTANCE_ID; iot_info_.product_key = (char*)CONFIG_VOLC_PRODUCT_KEY; iot_info_.product_secret = (char*)CONFIG_VOLC_PRODUCT_SECRET; iot_info_.bot_id = (char*)CONFIG_VOLC_BOT_ID; // 优先使用配置文件中的设备名称,如果为空则使用MAC地址 if (CONFIG_VOLC_DEVICE_NAME && strlen(CONFIG_VOLC_DEVICE_NAME) > 0) { // 使用配置文件中的设备名称 iot_info_.device_name = (char*)CONFIG_VOLC_DEVICE_NAME; ESP_LOGI(TAG, "使用配置文件中的设备名称: %s", iot_info_.device_name); } else { // 配置文件中的设备名称为空,使用MAC地址作为设备名称 std::string mac_address = SystemInfo::GetMacAddress(); // MAC地址中替换冒号为下划线,避免文件名中包含冒号 std::replace(mac_address.begin(), mac_address.end(), ':', '_'); char* mac_buffer = (char*)malloc(mac_address.length() + 1); strcpy(mac_buffer, mac_address.c_str()); iot_info_.device_name = mac_buffer; ESP_LOGI(TAG, "使用Wi-Fi MAC地址作为设备名称(已替换冒号为下划线): %s", iot_info_.device_name); } Settings s("volc"); auto saved_name = s.GetString("device_name", ""); bool name_mismatch = (!saved_name.empty() && strcmp(saved_name.c_str(), iot_info_.device_name) != 0); std::string saved_secret; std::string saved_appid; if (name_mismatch) { ESP_LOGW(TAG, "检测到设备名称变更:%s -> %s,清除旧凭证", saved_name.c_str(), iot_info_.device_name); Settings sw("volc", true); sw.EraseKey("device_secret"); sw.EraseKey("rtc_app_id"); sw.SetString("device_name", iot_info_.device_name); } else { saved_secret = s.GetString("device_secret", ""); saved_appid = s.GetString("rtc_app_id", ""); if (saved_name.empty()) { Settings sw("volc", true); sw.SetString("device_name", iot_info_.device_name); } } if (!saved_secret.empty()) { iot_info_.device_secret = strdup(saved_secret.c_str()); } if (!saved_appid.empty()) { iot_info_.rtc_app_id = strdup(saved_appid.c_str()); } ESP_LOGI(TAG, "NVS凭证已加载:secret=%d appid=%d device_name=%s, free_heap=%u", !saved_secret.empty(), !saved_appid.empty(), iot_info_.device_name, (unsigned)heap_caps_get_free_size(MALLOC_CAP_DEFAULT)); // 创建一个结构体来传递参数给任务 struct InitParams { VolcRtcProtocol* protocol; cJSON* config; }; InitParams* init_params = new InitParams(); init_params->protocol = this; init_params->config = config; // 将设备注册和RTC创建操作移到单独的任务中执行,避免main任务栈溢出 xTaskCreate([](void* arg) { InitParams* init_params = static_cast(arg); VolcRtcProtocol* protocol = init_params->protocol; cJSON* config = init_params->config; // 如果没有设备密钥或RTC应用ID,进行设备注册 if (!protocol->iot_info_.device_secret || !protocol->iot_info_.rtc_app_id) { char* device_secret_ptr = nullptr; if (volc_device_register(&protocol->iot_info_, &device_secret_ptr) != 0 || device_secret_ptr == nullptr) { ESP_LOGE(TAG, "设备注册失败");// 设备注册失败 protocol->SetError("Failed to register device"); cJSON_Delete(config); delete init_params; vTaskDelete(NULL); return; } protocol->iot_info_.device_secret = device_secret_ptr; Settings sw("volc", true); sw.SetString("device_secret", protocol->iot_info_.device_secret); if (protocol->iot_info_.rtc_app_id) { sw.SetString("rtc_app_id", protocol->iot_info_.rtc_app_id); } sw.SetString("device_name", protocol->iot_info_.device_name); } // 创建RTC实例 protocol->rtc_handle_ = volc_rtc_create( protocol->iot_info_.rtc_app_id ? protocol->iot_info_.rtc_app_id : CONFIG_VOLC_INSTANCE_ID, protocol, config, &MessageCallback, &DataCallback ); cJSON_Delete(config); delete init_params; if (!protocol->rtc_handle_) { ESP_LOGE(TAG, "RTC实例创建失败");// RTC实例创建失败 protocol->SetError("Failed to create RTC instance"); } else { protocol->iot_ready_ = true; ESP_LOGI(TAG, "RTC实例已准备就绪;房间加入将在监听状态后执行");// RTC实例已准备就绪;房间加入将在监听状态后执行 Application::GetInstance().InitializeWebsocketProtocol();// RTC初始化成功后,初始化Websocket协议 } vTaskDelete(NULL); }, "volc_rtc_init", 16384, init_params, 5, NULL); // 注意:此处不再立即创建RTC实例,而是将其推迟到任务中执行 ESP_LOGI(TAG, "VolcRtcProtocol初始化任务已创建");// VolcRtcProtocol初始化任务已创建 } // 新增:设置AgentConfig配置参数,包含body中的config参数和agent_config参数 void VolcRtcProtocol::SetAgentConfig(const std::string& params) { extra_params_ = params; ESP_LOGI(TAG, "设置Agent配置参数: %s", extra_params_.c_str()); } // 🔊 发送音频数据到RTC void VolcRtcProtocol::SendAudio(const std::vector& data) { if (!rtc_handle_ || !is_connected_ || !is_audio_channel_opened_) { ESP_LOGW(TAG, "无法发送音频:RTC未准备就绪");// 无法发送音频:RTC未准备就绪 return; } std::lock_guard lock(rtc_mutex_); volc_data_info_t data_info; memset(&data_info, 0, sizeof(data_info)); data_info.type = VOLC_DATA_TYPE_AUDIO; // 音频数据类型 data_info.info.audio.data_type = VOLC_AUDIO_DATA_TYPE_OPUS; // 格式:OPUS // 音频参数应该在RTC初始化时已经设置好,这里只需要发送数据 int ret = volc_rtc_send(rtc_handle_, data.data(), data.size(), &data_info); if (ret != 0) { ESP_LOGE(TAG, "发送音频失败:%d", ret);// 发送音频失败 } else { opus_bytes_accum_ += data.size(); opus_frames_accum_ += 1; LogUplinkStatsMaybe(); } } // 🔊 发送PCM音频数据到RTC void VolcRtcProtocol::SendPcm(const std::vector& data) { if (!rtc_handle_ || !is_connected_ || !is_audio_channel_opened_) { ESP_LOGW(TAG, "无法发送音频:RTC未准备就绪"); return; } std::lock_guard lock(rtc_mutex_); pcm_pending_.insert(pcm_pending_.end(), data.begin(), data.end()); // 以 20ms 固定帧打包 PCM(8k/16bit/mono),即 320 字节;静音段也持续发送以满足 AEC/RTC 的恒定节拍 const size_t frame_bytes = (size_t)(8000 * 20 / 1000) * sizeof(int16_t); size_t offset = 0; while (offset + frame_bytes <= pcm_pending_.size()) { volc_data_info_t data_info; memset(&data_info, 0, sizeof(data_info)); data_info.type = VOLC_DATA_TYPE_AUDIO; data_info.info.audio.data_type = VOLC_AUDIO_DATA_TYPE_PCM; data_info.info.audio.commit = false; int ret = volc_rtc_send(rtc_handle_, pcm_pending_.data() + offset, frame_bytes, &data_info); if (ret != 0) { ESP_LOGE(TAG, "发送音频失败:%d", ret); break; } else { pcm_bytes_accum_ += frame_bytes; pcm_frames_accum_ += 1; } offset += frame_bytes; } if (offset > 0) { pcm_pending_.erase(pcm_pending_.begin(), pcm_pending_.begin() + offset); } LogUplinkStatsMaybe(); } // 🔊 发送G711A音频数据到RTC void VolcRtcProtocol::SendG711A(const std::vector& data) { if (!rtc_handle_ || !is_connected_ || !is_audio_channel_opened_) { ESP_LOGW(TAG, "无法发送音频:RTC未准备就绪"); return; } std::lock_guard lock(rtc_mutex_); g711a_pending_.insert(g711a_pending_.end(), data.begin(), data.end()); const size_t frame_bytes = 160; size_t offset = 0; while (offset + frame_bytes <= g711a_pending_.size()) { volc_data_info_t data_info; memset(&data_info, 0, sizeof(data_info)); data_info.type = VOLC_DATA_TYPE_AUDIO; data_info.info.audio.data_type = VOLC_AUDIO_DATA_TYPE_G711A; data_info.info.audio.commit = true; int ret = volc_rtc_send(rtc_handle_, g711a_pending_.data() + offset, frame_bytes, &data_info); if (ret != 0) { ESP_LOGE(TAG, "发送音频失败:%d", ret); break; } else { ESP_LOGI(TAG, "发送上行G711A帧: 大小=%zu", (size_t)frame_bytes); g711a_bytes_accum_ += frame_bytes; g711a_frames_accum_ += 1; } offset += frame_bytes; } if (offset > 0) { g711a_pending_.erase(g711a_pending_.begin(), g711a_pending_.begin() + offset); } LogUplinkStatsMaybe(); } // 🔊 日志上行音频统计 void VolcRtcProtocol::LogUplinkStatsMaybe() { uint64_t now_us = esp_timer_get_time(); if (uplink_last_log_us_ == 0) uplink_last_log_us_ = now_us; uint64_t diff_us = now_us - uplink_last_log_us_; if (diff_us >= 2000000) { uint64_t bps = ((uint64_t)(opus_bytes_accum_ + pcm_bytes_accum_ + g711a_bytes_accum_) * 8 * 1000000ULL) / (diff_us ? diff_us : 1); ESP_LOGI(TAG, "上行音频统计: PCM帧=%d 字节=%zu, G711A帧=%d 字节=%zu, 速率=%llu bps", pcm_frames_accum_, (size_t)pcm_bytes_accum_, g711a_frames_accum_, (size_t)g711a_bytes_accum_, (unsigned long long)bps); ESP_LOGI(TAG, "下行音频统计: PCM字节=%zu, OPUS字节=%zu", (size_t)down_pcm_bytes_accum_, (size_t)down_opus_bytes_accum_); opus_bytes_accum_ = 0; pcm_bytes_accum_ = 0; g711a_bytes_accum_ = 0; down_pcm_bytes_accum_ = 0; down_opus_bytes_accum_ = 0; opus_frames_accum_ = 0; pcm_frames_accum_ = 0; g711a_frames_accum_ = 0; uplink_last_log_us_ = now_us; } } // 🔊 打开音频通道 bool VolcRtcProtocol::OpenAudioChannel() { if (!rtc_handle_) { ESP_LOGW(TAG, "无法打开音频通道:RTC句柄未准备就绪");// 无法打开音频通道:RTC句柄未准备就绪 return false; } if (!is_connected_) { if (!iot_ready_) { ESP_LOGE(TAG, "IoT信息未准备就绪,无法加入房间");// IoT信息未准备就绪,无法加入房间 ESP_LOGW(TAG, "Diag: app_id=%s device_name=%s bot_id=%s secret=%s", iot_info_.rtc_app_id ? iot_info_.rtc_app_id : "(null)", iot_info_.device_name ? iot_info_.device_name : "(null)", CONFIG_VOLC_BOT_ID, iot_info_.device_secret ? "yes" : "no"); return false; } xEventGroupClearBits(event_group_handle_, 0x1 | 0x2); // 新增:extra_params 用于传递额外的AgentConfig配置参数 ESP_LOGI(TAG, "Join RTC: handle=%p bot=%s iot_ready=%d free_heap=%u", rtc_handle_, CONFIG_VOLC_BOT_ID, (int)iot_ready_, (unsigned)heap_caps_get_free_size(MALLOC_CAP_DEFAULT)); int ret = volc_rtc_start(rtc_handle_, CONFIG_VOLC_BOT_ID, &iot_info_, extra_params_.empty() ? NULL : extra_params_.c_str()); if (ret != 0) { ESP_LOGE(TAG, "RTC启动失败:%d", ret);// RTC启动失败:%d ESP_LOGW(TAG, "Diag: start failed. Possible causes: invalid IoT creds, TLS/HTTP error, network unreachable, time not synced");// 诊断:启动失败可能原因:无效的IoT凭证、TLS/HTTP错误、网络不可达、时间未同步 return false; } EventBits_t bits = xEventGroupWaitBits(event_group_handle_, 0x1, pdFALSE, pdFALSE, pdMS_TO_TICKS(5000)); ESP_LOGI(TAG, "Wait connect bits=0x%x free_heap=%u", (unsigned)bits, (unsigned)heap_caps_get_free_size(MALLOC_CAP_DEFAULT)); if ((bits & 0x1) == 0) { ESP_LOGE(TAG, "RTC连接超时");// RTC连接超时 ESP_LOGW(TAG, "Diag: check Wi-Fi, SNTP time sync, IoT creds, RTC server availability");// 诊断:检查Wi-Fi、SNTP时间同步、IoT凭证、RTC服务器可用性 return false; } // Do not block audio readiness on remote user join; enable subscribe immediately bits = xEventGroupWaitBits(event_group_handle_, 0x2, pdFALSE, pdFALSE, pdMS_TO_TICKS(3000)); if ((bits & 0x2) == 0) { ESP_LOGW(TAG, "RTC远程用户未加入 yet - 主动开启音频通道");// RTC远程用户未加入 yet - 主动开启音频通道 // 远程用户未加入时,需要手动设置状态 server_sample_rate_ = 16000; server_frame_duration_ = 60; is_audio_channel_opened_ = true; first_downlink_logged_ = false; ESP_LOGI(TAG, "音频通道已打开");// 音频通道已打开 if (on_audio_channel_opened_) { on_audio_channel_opened_(); } } else { // 远程用户已加入时,不要重复打印日志,因为MessageCallback中已经处理 // 但需要确保状态正确设置 if (!is_audio_channel_opened_) { server_sample_rate_ = 16000; server_frame_duration_ = 60; is_audio_channel_opened_ = true; first_downlink_logged_ = false; ESP_LOGI(TAG, "音频通道已打开");// 音频通道已打开 if (on_audio_channel_opened_) { on_audio_channel_opened_(); } } } } return true; } // 🔊 关闭音频通道 void VolcRtcProtocol::CloseAudioChannel() { if (!rtc_handle_) { return; } if (is_connected_) { volc_rtc_stop(rtc_handle_);// 关闭RTC音频通道 is_connected_ = false;// 标记音频通道已关闭 } ESP_LOGI(TAG, "音频通道已关闭");// 音频通道已关闭 is_audio_channel_opened_ = false;// 标记音频通道已关闭 if (on_audio_channel_closed_) { on_audio_channel_closed_();// 调用音频通道关闭回调 } } // 🔊 检查音频通道是否已打开 bool VolcRtcProtocol::IsAudioChannelOpened() const { return is_audio_channel_opened_; } void VolcRtcProtocol::MessageCallback(void* context, volc_msg_t* message) { VolcRtcProtocol* protocol = static_cast(context); // 目前只处理简单的连接状态消息 switch (message->code) { case VOLC_MSG_CONNECTED: protocol->is_connected_ = true; xEventGroupSetBits(protocol->event_group_handle_, 0x1); protocol->server_sample_rate_ = 16000; protocol->server_frame_duration_ = 60; ESP_LOGI(TAG, "RTC连接成功");// RTC连接成功 //Application::GetInstance().InitializeWebsocketProtocol();// RTC连接成功后初始化Websocket协议 break; case VOLC_MSG_DISCONNECTED: protocol->is_connected_ = false; protocol->is_audio_channel_opened_ = false; xEventGroupClearBits(protocol->event_group_handle_, 0x1 | 0x2); ESP_LOGI(TAG, "RTC断开连接");// RTC断开连接 break; case VOLC_MSG_USER_JOINED: // 只有在音频通道尚未打开的情况下才设置状态和调用回调 if (!protocol->is_audio_channel_opened_) { protocol->is_audio_channel_opened_ = true; xEventGroupSetBits(protocol->event_group_handle_, 0x2); ESP_LOGI(TAG, "RTC远程用户加入");// RTC远程用户加入 // Set default decoder parameters before audio starts protocol->server_sample_rate_ = 16000; protocol->server_frame_duration_ = 60; // 调用音频通道打开回调 if (protocol->on_audio_channel_opened_) { protocol->on_audio_channel_opened_(); } } else { // 音频通道已经打开,只更新事件标志 xEventGroupSetBits(protocol->event_group_handle_, 0x2); ESP_LOGD(TAG, "RTC远程用户加入,音频通道已打开");// 调试信息,不重复打印 } break; case VOLC_MSG_KEY_FRAME_REQ: // 关键帧请求消息,不需要处理msg字段 ESP_LOGI(TAG, "接收RTC关键帧请求");// 接收RTC关键帧请求 break; case VOLC_MSG_TARGET_BITRATE_CHANGED: // 目标码率变化消息,使用target_bitrate字段 // ESP_LOGI(TAG, "RTC target bitrate changed: %lu bps", message->data.target_bitrate); break; case VOLC_MSG_CONV_STATUS: // 会话状态消息,使用conv_status字段 ESP_LOGI(TAG, "RTC会话状态:%lu", message->data.conv_status); if (message && message->data.msg && message->data.msg[0] != '\0') { std::string text(message->data.msg); ESP_LOGI(TAG, "RTC会话状态消息内容: %s", text.c_str()); cJSON* root = cJSON_Parse(text.c_str()); if (root) { const char* sid_keys[] = {"sessionId", "session_id", "sid"}; cJSON* sid = nullptr; for (size_t i = 0; i < sizeof(sid_keys) / sizeof(sid_keys[0]); ++i) { sid = cJSON_GetObjectItem(root, sid_keys[i]); if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') { break; } sid = nullptr; } if (!sid) { const char* containers[] = {"data", "payload", "context", "session"}; for (size_t i = 0; i < sizeof(containers) / sizeof(containers[0]); ++i) { cJSON* obj = cJSON_GetObjectItem(root, containers[i]); if (obj && cJSON_IsObject(obj)) { for (size_t j = 0; j < sizeof(sid_keys) / sizeof(sid_keys[0]); ++j) { sid = cJSON_GetObjectItem(obj, sid_keys[j]); if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') { break; } } } if (sid) break; } } if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') { protocol->session_id_ = sid->valuestring; ESP_LOGI(TAG, "Session ID set: %s", protocol->session_id_.c_str()); if (protocol->is_audio_channel_opened_ && protocol->start_listening_pending_) { ListeningMode m = protocol->pending_listening_mode_; protocol->start_listening_pending_ = false; protocol->SendStartListening(m); } } if (protocol->on_incoming_json_) { protocol->on_incoming_json_(root); } cJSON_Delete(root); } } break; default: ESP_LOGI(TAG, "接收RTC消息:%d", message->code);// 接收RTC消息:%d if (message && message->data.msg && message->data.msg[0] != '\0') { std::string text(message->data.msg); ESP_LOGI(TAG, "RTC消息内容: %s", text.c_str()); cJSON* root = cJSON_Parse(text.c_str()); if (root) { const char* sid_keys[] = {"sessionId", "session_id", "sid"}; cJSON* sid = nullptr; for (size_t i = 0; i < sizeof(sid_keys) / sizeof(sid_keys[0]); ++i) { sid = cJSON_GetObjectItem(root, sid_keys[i]); if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') { break; } sid = nullptr; } if (!sid) { const char* containers[] = {"data", "payload", "context", "session"}; for (size_t i = 0; i < sizeof(containers) / sizeof(containers[0]); ++i) { cJSON* obj = cJSON_GetObjectItem(root, containers[i]); if (obj && cJSON_IsObject(obj)) { for (size_t j = 0; j < sizeof(sid_keys) / sizeof(sid_keys[0]); ++j) { sid = cJSON_GetObjectItem(obj, sid_keys[j]); if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') { break; } } } if (sid) break; } } if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') { protocol->session_id_ = sid->valuestring; ESP_LOGI(TAG, "Session ID set: %s", protocol->session_id_.c_str()); if (protocol->is_audio_channel_opened_ && protocol->start_listening_pending_) { ListeningMode m = protocol->pending_listening_mode_; protocol->start_listening_pending_ = false; protocol->SendStartListening(m); } } if (protocol->on_incoming_json_) { protocol->on_incoming_json_(root);// 调用回调函数处理JSON消息 } cJSON_Delete(root);// 删除JSON根对象,释放内存 } } break; } } // 处理RTC音频数据 void VolcRtcProtocol::DataCallback(void* context, const void* data, size_t len, volc_data_info_t* info) { VolcRtcProtocol* protocol = static_cast(context); // ESP_LOGI(TAG, "RTC data: type=%d len=%u free_heap=%u", info->type, (unsigned)len, (unsigned)heap_caps_get_free_size(MALLOC_CAP_DEFAULT)); if (info->type == VOLC_DATA_TYPE_AUDIO) { if (info) { protocol->downlink_is_pcm_ = (info->info.audio.data_type == VOLC_AUDIO_DATA_TYPE_PCM); if (protocol->downlink_is_pcm_) { protocol->down_pcm_bytes_accum_ += len; protocol->server_sample_rate_ = 8000; protocol->server_frame_duration_ = 20; } else { protocol->down_opus_bytes_accum_ += len; protocol->server_sample_rate_ = 16000; protocol->server_frame_duration_ = 60; } if (!protocol->first_downlink_logged_) { ESP_LOGI(TAG, "接收下行音频首包: 类型=%s 大小=%d", protocol->downlink_is_pcm_ ? "PCM" : "OPUS", (int)len);// 接收下行音频首包: 类型=%s 大小=%d protocol->first_downlink_logged_ = true;// 标记已记录首包 } } protocol->ProcessAudioData(data, len);// 处理音频数据 } else if (info->type == VOLC_DATA_TYPE_MESSAGE) { if (data && len > 0) { const uint8_t* buf = static_cast(data); std::string json_text; if (info->info.message.is_binary && len >= 8) { bool is_ctrl = (memcmp(buf, "ctrl", 4) == 0); bool is_conv = (memcmp(buf, "conv", 4) == 0); bool is_tool = (memcmp(buf, "tool", 4) == 0); if (is_ctrl || is_conv || is_tool) { uint32_t json_len = (uint32_t)((buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | (buf[7])); if (json_len > 0 && (size_t)(8 + json_len) <= len) { json_text.assign(reinterpret_cast(buf + 8), json_len); if (!protocol->suppress_incoming_message_log_) { ESP_LOGI(TAG, "接收下行二进制消息(%s): %.*s", is_ctrl ? "ctrl" : (is_conv ? "conv" : "tool"), (int)json_text.size(), json_text.c_str()); } } } } if (json_text.empty()) { json_text.assign(reinterpret_cast(data), len); if (!protocol->suppress_incoming_message_log_) { ESP_LOGI(TAG, "接收下行消息: %.*s", (int)json_text.size(), json_text.c_str()); } } cJSON* root = cJSON_Parse(json_text.c_str()); if (root) { const char* sid_keys[] = {"sessionId", "session_id", "sid"}; cJSON* sid = nullptr; for (size_t i = 0; i < sizeof(sid_keys) / sizeof(sid_keys[0]); ++i) { sid = cJSON_GetObjectItem(root, sid_keys[i]); if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') { break; } sid = nullptr; } if (!sid) { const char* containers[] = {"data", "payload", "context", "session"}; for (size_t i = 0; i < sizeof(containers) / sizeof(containers[0]); ++i) { cJSON* obj = cJSON_GetObjectItem(root, containers[i]); if (obj && cJSON_IsObject(obj)) { for (size_t j = 0; j < sizeof(sid_keys) / sizeof(sid_keys[0]); ++j) { sid = cJSON_GetObjectItem(obj, sid_keys[j]); if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') { break; } } } if (sid) break; } } if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') { protocol->session_id_ = sid->valuestring; ESP_LOGI(TAG, "Session ID set: %s", protocol->session_id_.c_str()); if (protocol->is_audio_channel_opened_ && protocol->start_listening_pending_) { ListeningMode m = protocol->pending_listening_mode_; protocol->start_listening_pending_ = false; protocol->SendStartListening(m); } } if (protocol->on_incoming_json_) { protocol->on_incoming_json_(root); } cJSON_Delete(root); } } } } // 解析服务器发送的JSON消息 void VolcRtcProtocol::ParseServerMessage(const char* message) { ESP_LOGI(TAG, "接收服务器消息:%s", message);// 接收服务器消息:%s cJSON* root = cJSON_Parse(message); if (!root) { ESP_LOGE(TAG, "解析服务器消息失败");// 解析服务器消息失败 return; } if (on_incoming_json_) { on_incoming_json_(root); } cJSON_Delete(root); } void VolcRtcProtocol::ProcessAudioData(const void* data, int size) { if (!on_incoming_audio_) { return; } ESP_LOGD(TAG, "接收音频数据,大小:%d 字节", size);// 接收音频数据,大小:%d 字节 // 直接使用原始数据指针,避免内存分配 // 如果on_incoming_audio_需要持久化数据,它应该自己负责复制 on_incoming_audio_(std::vector(static_cast(data), static_cast(data) + size)); } void VolcRtcProtocol::SendText(const std::string& text) { if (!rtc_handle_ || !is_connected_) { ESP_LOGW(TAG, "不能发送文本消息:RTC未准备好");// 不能发送文本消息,RTC未准备好 return; } std::lock_guard lock(rtc_mutex_); volc_data_info_t data_info; memset(&data_info, 0, sizeof(data_info)); data_info.type = VOLC_DATA_TYPE_MESSAGE; // 文本数据类型 int ret = volc_rtc_send(rtc_handle_, text.data(), text.size(), &data_info); if (ret != 0) { ESP_LOGE(TAG, "发送文本消息失败:%d", ret);// 发送文本消息失败:%d } else { ESP_LOGD(TAG, "发送文本消息: %s", text.c_str());// 发送文本消息:%s } } // 🔊 发送开始监听消息到RTC void VolcRtcProtocol::SendStartListening(ListeningMode mode) { // 若尚未建立会话ID或远端未加入,则排队,待会话就绪后发送 if (session_id_.empty() || !is_connected_) { start_listening_pending_ = true; pending_listening_mode_ = mode; ESP_LOGI(TAG, "延迟发送StartListening,等待会话就绪"); return; } Protocol::SendStartListening(mode);// 调用基类方法发送开始监听消息 } // 🔊 发送控制指令到RTC void VolcRtcProtocol::SendCtrl(const std::string& json) { if (!rtc_handle_ || !is_connected_) { ESP_LOGW(TAG, "不能发送ctrl二进制消息:RTC未准备好");// 不能发送ctrl二进制消息,RTC未准备好 return; } std::lock_guard lock(rtc_mutex_);// 🔊 发送控制指令到RTC时,加锁保护RTC句柄 // 构建二进制消息:"ctrl" + 4字节大端长度 + JSON负载 const char magic[4] = {'c','t','r','l'}; const uint32_t len = (uint32_t)json.size(); std::vector payload; payload.reserve(4 + 4 + len); payload.insert(payload.end(), magic, magic + 4); payload.push_back((uint8_t)((len >> 24) & 0xFF)); payload.push_back((uint8_t)((len >> 16) & 0xFF)); payload.push_back((uint8_t)((len >> 8) & 0xFF)); payload.push_back((uint8_t)(len & 0xFF)); payload.insert(payload.end(), json.begin(), json.end()); volc_data_info_t data_info; memset(&data_info, 0, sizeof(data_info)); data_info.type = VOLC_DATA_TYPE_MESSAGE; data_info.info.message.is_binary = true; int ret = volc_rtc_send(rtc_handle_, payload.data(), (int)payload.size(), &data_info); if (ret != 0) { ESP_LOGE(TAG, "发送ctrl二进制消息失败:%d", ret); } else { ESP_LOGD(TAG, "发送ctrl二进制消息: %.*s", (int)json.size(), json.c_str()); } } // 🔊 发送函数调用指令到RTC void VolcRtcProtocol::SendFunc(const std::string& json) { if (!rtc_handle_ || !is_connected_) { ESP_LOGW(TAG, "不能发送func二进制消息:RTC未准备好");// 不能发送func二进制消息,RTC未准备好 return; } std::lock_guard lock(rtc_mutex_);// 🔊 发送函数调用指令到RTC时,加锁保护RTC句柄 const char magic[4] = {'f','u','n','c'}; const uint32_t len = (uint32_t)json.size(); std::vector payload; payload.reserve(4 + 4 + len); payload.insert(payload.end(), magic, magic + 4); payload.push_back((uint8_t)((len >> 24) & 0xFF)); payload.push_back((uint8_t)((len >> 16) & 0xFF)); payload.push_back((uint8_t)((len >> 8) & 0xFF)); payload.push_back((uint8_t)(len & 0xFF)); payload.insert(payload.end(), json.begin(), json.end()); volc_data_info_t data_info; memset(&data_info, 0, sizeof(data_info)); data_info.type = VOLC_DATA_TYPE_MESSAGE; data_info.info.message.is_binary = true; int ret = volc_rtc_send(rtc_handle_, payload.data(), (int)payload.size(), &data_info); if (ret != 0) { ESP_LOGE(TAG, "发送func二进制消息失败:%d", ret); } else { ESP_LOGD(TAG, "发送func二进制消息: %.*s", (int)json.size(), json.c_str()); } } // 🔊 发送函数调用结果到RTC void VolcRtcProtocol::SendFunctionResult(const std::string& tool_call_id, const std::string& content) { cJSON* obj = cJSON_CreateObject(); if (!obj) { ESP_LOGE(TAG, "创建函数结果JSON失败,回退为文本");// 创建函数结果JSON失败,回退为文本 Protocol::SendFunctionResult(tool_call_id, content); return; } cJSON_AddStringToObject(obj, "ToolCallID", tool_call_id.c_str());// 添加函数调用ID到JSON cJSON_AddStringToObject(obj, "Content", content.c_str());// 添加函数调用结果到JSON char* printed = cJSON_PrintUnformatted(obj); std::string json = printed ? printed : std::string(); if (printed) cJSON_free(printed); cJSON_Delete(obj); if (json.empty()) { ESP_LOGW(TAG, "函数结果JSON为空,回退为文本"); Protocol::SendFunctionResult(tool_call_id, content); return; } SendFunc(json); } // 🔊 发送文本消息到RTC (传入大模型上下文信息) void VolcRtcProtocol::SendTextMessage(const std::string& text) { // 按官方方案封装:ExternalTextToLLM,确保进入LLM并触发TTS cJSON* root = cJSON_CreateObject(); if (!root) { ESP_LOGE(TAG, "创建JSON失败,回退为文本消息"); Protocol::SendTextMessage(text); return; } cJSON_AddStringToObject(root, "Command", "ExternalTextToLLM"); cJSON_AddStringToObject(root, "Message", text.c_str()); cJSON_AddNumberToObject(root, "InterruptMode", 1); char* printed = cJSON_PrintUnformatted(root); std::string json = printed ? printed : std::string(); if (printed) cJSON_free(printed); cJSON_Delete(root); if (json.empty()) { ESP_LOGW(TAG, "生成的JSON为空,回退为文本消息"); Protocol::SendTextMessage(text); return; } SendCtrl(json); } // 🔊 发送中止通话请求 void VolcRtcProtocol::SendAbortSpeaking(AbortReason reason) { if (!rtc_handle_ || !is_connected_ || !is_audio_channel_opened_) { ESP_LOGW(TAG, "不能发送中止通话请求:RTC未准备好");// 不能发送打断请求,RTC未准备好 return; } std::lock_guard lock(rtc_mutex_);// 🔊 发送中止通话请求时,加锁保护RTC句柄 ESP_LOGI(TAG, "通过Volc RTC中断发送中止通话请求!");// 发送打断请求,通过火山RTC中断 // 调用火山RTC的打断API int ret = volc_rtc_interrupt(rtc_handle_); if (ret != 0) { ESP_LOGE(TAG, "通过Volc RTC中断发送打断请求失败:%d", ret);// 发送打断请求,通过火山RTC中断失败:%d } else { ESP_LOGI(TAG, "通过Volc RTC中断发送打断请求成功!");// 发送打断请求,通过火山RTC中断成功 } }