Pendant_Rtc_Toy/main/protocols/volc_rtc_protocol.cc
Rdzleo 8111515277 修复 Pendant 衍生项目无痛移植问题
实机验证通过后,按 Kapi 无屏底座路线补齐 Pendant RTC 吊坠项目的迁移修复。

1. BLE 配网与资源隔离
- sdkconfig.defaults 开启 BT 优先 PSRAM 分配,并将 LWIP socket 上限提升到 20
- sdkconfig.defaults.esp32s3 允许 BSS/NOINIT 放入 PSRAM,释放内部 SRAM 给 BLE/WiFi/RTC
- 配网模式 codec 使用 StartOutputOnly(),跳过麦克风 RX DMA 和 ES7210 输入链路
- ResetWifiConfiguration() 改为独立 wifi_reset task,避免在 iot_button/esp_timer 回调中阻塞延时
- WifiBoard 增加 IsWifiConfigMode(),供启动阶段判断是否走配网资源隔离路径

2. 音频底噪与 DMA 残留修复
- AudioCodec 增加 StartOutputOnly(),支持仅启动扬声器输出
- RTC 音频通道打开后灌入 200ms silence PCM,覆盖 I2S DMA 残留数据
- 软退出进入待命前重启 codec output 并再次灌静音,减少待命音/欢迎语前杂音
- box_audio_codec 在无硬件回采时使用 channel_mask=0,避免 I2S slot mask 被错误污染

3. 软件 loopback AEC
- 引入 esp_aec 底层同步 API,使用 DAC 输出复制构建 ref ring
- 上行 mic PCM 与延迟 ref 做同步消回声,适配无屏无硬件回采的 Pendant 形态
- AEC 采用 lazy init,减少启动阶段对 WiFi/BLE 内部 SRAM 的压力
- ref 静音时直接 passthrough,避免 AI 静音后误压制用户语音
- 在 player_pipeline_write 和 codec->OutputData 两条下行路径都追加 ref hook

4. RTC 连接稳定性与软退出
- VolcRtcProtocol 增加 LeaveRoom(bool notify_closed),支持 stop 房间但保留 rtc_handle
- hibernate 路径使用 LeaveRoom(false),避免关闭回调顺手关掉 codec output 导致待命音无声
- LeaveRoom/ForceRebuildEngine 重置 downlink_is_pcm_ 和首包标志,避免本地 Opus 音效被当 PCM 播成杂音
- OpenAudioChannel 连续失败 3 次后 ForceRebuildEngine,清理 RTC SDK 内部异常状态
- 加入 DIAG-RTC socket/heap/PSRAM/RSSI 日志,便于定位 ICE socket 和内存问题

5. Dialog watchdog 与 BOOT 唤醒
- Dialog watchdog 到期不再写 reboot_dlg_idle 后 esp_restart
- 新增 EnterIdleHibernate():软退房、清空残留音频队列、关闭麦克风、播放待命音后静默
- 新增 WakeFromHibernate():BOOT 唤醒后复用 RTC engine 并通过 ToggleChatState() 重连 RTC
- BOOT 单击优先判断 IsHibernating(),异步唤醒,避免走普通按键状态机
- hibernate 期间禁止 PowerSaveTimer 进入 Light Sleep,保护 I2C/codec 总线

6. 文档与衍生项目沉淀
- 更新石头光源属性检测方案文档
- 将 Pendant 实测通过的软退出、AEC、BLE 配网隔离经验同步到衍生项目移植规则
2026-05-29 13:36:36 +08:00

938 lines
43 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "volc_rtc_protocol.h"
#include <wifi_station.h>
#include "esp_log.h"
#include "sdkconfig.h"
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "esp_heap_caps.h"
#include "esp_system.h"
#include <cJSON.h>
#include <cstring>
#include <inttypes.h>
#include <sys/stat.h>
#include <errno.h>
#include <unistd.h>
// 新增包含 system_info.h 头文件以使用 SystemInfo 类
#include "system_info.h"
#include "application.h"
// SNTP is initialized in WiFi board after network is up; no duplicate init here
#include "base/volc_device_manager.h"
#include "settings.h"
static const char* TAG = "VolcRtcProtocol";
#ifndef DIAG_RTC_BIND_ENABLE
#define DIAG_RTC_BIND_ENABLE 1
#endif
#if DIAG_RTC_BIND_ENABLE
#include "esp_wifi.h"
#include "lwip/sockets.h"
static int diag_count_used_sockets(void) {
int used = 0;
for (int fd = LWIP_SOCKET_OFFSET; fd < LWIP_SOCKET_OFFSET + CONFIG_LWIP_MAX_SOCKETS; fd++) {
struct stat st;
if (fstat(fd, &st) == 0) {
used++;
}
}
return used;
}
#endif
VolcRtcProtocol::VolcRtcProtocol() {
event_group_handle_ = xEventGroupCreate();
}
VolcRtcProtocol::~VolcRtcProtocol() {
if (event_group_handle_) {
vEventGroupDelete(event_group_handle_);
}
if (rtc_handle_) {
volc_rtc_stop(rtc_handle_);
volc_rtc_destroy(rtc_handle_);
}
// 释放动态分配的设备名称内存
if (iot_info_.device_name && iot_info_.device_name != (char*)CONFIG_VOLC_DEVICE_NAME) {
free(iot_info_.device_name);
iot_info_.device_name = nullptr;
}
}
void VolcRtcProtocol::Start() {
ESP_LOGI(TAG, "VolcRtcProtocol 开始启动...");// VolcRtcProtocol 开始启动...
esp_log_level_set(TAG, ESP_LOG_DEBUG);
// 注释掉所有文件系统相关操作,避免设备重启
// 这些操作需要文件系统支持,但当前设备可能没有正确挂载文件系统
// ESP_LOGI(TAG, "跳过文件系统操作以防止设备重启");// 跳过文件系统操作以防止设备重启
// TODO: Implement proper file system initialization if file logging is needed
// 禁用获取当前工作目录的操作,避免文件系统访问
// TODO: Re-enable if filesystem is properly initialized
// ESP_LOGI(TAG, "当前工作目录检查已禁用,以防止文件系统访问");// 当前工作目录检查已禁用,以防止文件系统访问
// 如果已有RTC实例先停止并销毁
if (rtc_handle_) {
volc_rtc_stop(rtc_handle_);
volc_rtc_destroy(rtc_handle_);
rtc_handle_ = nullptr;
}
// 创建火山RTC配置
cJSON* config = cJSON_CreateObject();
if (!config) {
ESP_LOGE(TAG, "RTC配置创建失败");// RTC配置创建失败
SetError("Failed to create RTC config");
return;
}
// 添加必要的RTC配置项
cJSON* audio_config = cJSON_CreateObject();
if (audio_config) {
cJSON_AddBoolToObject(audio_config, "publish", true);
cJSON_AddBoolToObject(audio_config, "subscribe", true);
cJSON_AddNumberToObject(audio_config, "codec", 4); // 设置音频编解码器为4根据设计文档
cJSON_AddItemToObject(config, "audio", audio_config);// 添加音频配置到RTC配置
}
cJSON* video_config = cJSON_CreateObject();
if (video_config) {
cJSON_AddBoolToObject(video_config, "publish", false);
cJSON_AddBoolToObject(video_config, "subscribe", false);
cJSON_AddNumberToObject(video_config, "codec", 1); // 设置视频编解码器为1根据设计文档
cJSON_AddItemToObject(config, "video", video_config);
}
cJSON_AddNumberToObject(config, "log_level", 1); // 设置日志级别
// 添加参数数组,与 Airhub_Rtc_h 项目保持一致
cJSON* params = cJSON_CreateArray();
if (params) {
// 只输出日志到控制台,不输出到文件
cJSON_AddItemToArray(params, cJSON_CreateString("{\"debug\":{\"log_to_console\":1}}"));// 添加日志到控制台配置
cJSON_AddItemToArray(params, cJSON_CreateString("{\"audio\":{\"codec\":{\"internal\":{\"enable\":1}}}}"));// 添加音频编解码器内部配置,启用 SDK 内部编解码
cJSON_AddItemToArray(params, cJSON_CreateString("{\"rtc\":{\"access\":{\"concurrent_requests\":1}}}"));// 添加RTC并发请求配置
cJSON_AddItemToArray(params, cJSON_CreateString("{\"rtc\":{\"ice\":{\"concurrent_agents\":1}}}"));// 添加RTC并发ICE代理配置
cJSON_AddItemToObject(config, "params", params);
}
// 创建IoT信息并优先从NVS加载
memset(&iot_info_, 0, sizeof(iot_info_));
iot_info_.instance_id = (char*)CONFIG_VOLC_INSTANCE_ID;
iot_info_.product_key = (char*)CONFIG_VOLC_PRODUCT_KEY;
iot_info_.product_secret = (char*)CONFIG_VOLC_PRODUCT_SECRET;
iot_info_.bot_id = (char*)CONFIG_VOLC_BOT_ID;
// 优先使用配置文件中的设备名称如果为空则使用MAC地址
if (CONFIG_VOLC_DEVICE_NAME && strlen(CONFIG_VOLC_DEVICE_NAME) > 0) {
// 使用配置文件中的设备名称
iot_info_.device_name = (char*)CONFIG_VOLC_DEVICE_NAME;
ESP_LOGI(TAG, "使用配置文件中的设备名称: %s", iot_info_.device_name);
} else {
// 配置文件中的设备名称为空使用蓝牙MAC地址作为设备名称
std::string mac_address = SystemInfo::GetBleMacAddress();
// MAC地址中替换冒号为下划线,避免文件名中包含冒号
std::replace(mac_address.begin(), mac_address.end(), ':', '_');
char* mac_buffer = (char*)malloc(mac_address.length() + 1);
strcpy(mac_buffer, mac_address.c_str());
iot_info_.device_name = mac_buffer;
ESP_LOGI(TAG, "使用蓝牙MAC地址作为设备名称(已替换冒号为下划线): %s", iot_info_.device_name);
}
Settings s("volc");
auto saved_name = s.GetString("device_name", "");
bool name_mismatch = (!saved_name.empty() && strcmp(saved_name.c_str(), iot_info_.device_name) != 0);
std::string saved_secret;
std::string saved_appid;
if (name_mismatch) {
ESP_LOGW(TAG, "检测到设备名称变更:%s -> %s清除旧凭证", saved_name.c_str(), iot_info_.device_name);
Settings sw("volc", true);
sw.EraseKey("device_secret");
sw.EraseKey("rtc_app_id");
sw.SetString("device_name", iot_info_.device_name);
} else {
saved_secret = s.GetString("device_secret", "");
saved_appid = s.GetString("rtc_app_id", "");
if (saved_name.empty()) {
Settings sw("volc", true);
sw.SetString("device_name", iot_info_.device_name);
}
}
if (!saved_secret.empty()) {
iot_info_.device_secret = strdup(saved_secret.c_str());
}
if (!saved_appid.empty()) {
iot_info_.rtc_app_id = strdup(saved_appid.c_str());
}
ESP_LOGI(TAG, "NVS凭证已加载secret=%d appid=%d device_name=%s, free_heap=%u",
!saved_secret.empty(), !saved_appid.empty(), iot_info_.device_name,
(unsigned)heap_caps_get_free_size(MALLOC_CAP_DEFAULT));
// 创建一个结构体来传递参数给任务
struct InitParams {
VolcRtcProtocol* protocol;
cJSON* config;
};
InitParams* init_params = new InitParams();
init_params->protocol = this;
init_params->config = config;
// 将设备注册和RTC创建操作移到单独的任务中执行避免main任务栈溢出
xTaskCreate([](void* arg) {
InitParams* init_params = static_cast<InitParams*>(arg);
VolcRtcProtocol* protocol = init_params->protocol;
cJSON* config = init_params->config;
// 如果没有设备密钥或RTC应用ID进行设备注册
if (!protocol->iot_info_.device_secret || !protocol->iot_info_.rtc_app_id) {
char* device_secret_ptr = nullptr;
if (volc_device_register(&protocol->iot_info_, &device_secret_ptr) != 0 || device_secret_ptr == nullptr) {
ESP_LOGE(TAG, "设备注册失败");// 设备注册失败
protocol->SetError("Failed to register device");
cJSON_Delete(config);
delete init_params;
vTaskDelete(NULL);
return;
}
protocol->iot_info_.device_secret = device_secret_ptr;
Settings sw("volc", true);
sw.SetString("device_secret", protocol->iot_info_.device_secret);
if (protocol->iot_info_.rtc_app_id) {
sw.SetString("rtc_app_id", protocol->iot_info_.rtc_app_id);
}
sw.SetString("device_name", protocol->iot_info_.device_name);
}
// 创建RTC实例
protocol->rtc_handle_ = volc_rtc_create(
protocol->iot_info_.rtc_app_id ? protocol->iot_info_.rtc_app_id : CONFIG_VOLC_INSTANCE_ID,
protocol,
config,
&MessageCallback,
&DataCallback
);
cJSON_Delete(config);
delete init_params;
if (!protocol->rtc_handle_) {
ESP_LOGE(TAG, "RTC实例创建失败");// RTC实例创建失败
protocol->SetError("Failed to create RTC instance");
} else {
protocol->iot_ready_ = true;
ESP_LOGI(TAG, "RTC实例已准备就绪房间加入将在监听状态后执行");// RTC实例已准备就绪房间加入将在监听状态后执行
Application::GetInstance().InitializeWebsocketProtocol();// RTC初始化成功后初始化Websocket协议
}
vTaskDelete(NULL);
}, "volc_rtc_init", 16384, init_params, 5, NULL);
// 注意此处不再立即创建RTC实例而是将其推迟到任务中执行
ESP_LOGI(TAG, "VolcRtcProtocol初始化任务已创建");// VolcRtcProtocol初始化任务已创建
}
// 新增设置AgentConfig配置参数,包含body中的config参数和agent_config参数
void VolcRtcProtocol::SetAgentConfig(const std::string& params) {
extra_params_ = params;
ESP_LOGI(TAG, "设置Agent配置参数: %s", extra_params_.c_str());
}
// 🔊 发送音频数据到RTC
void VolcRtcProtocol::SendAudio(const std::vector<uint8_t>& data) {
if (!rtc_handle_ || !is_connected_ || !is_audio_channel_opened_) {
ESP_LOGW(TAG, "无法发送音频RTC未准备就绪");// 无法发送音频RTC未准备就绪
return;
}
std::lock_guard<std::mutex> lock(rtc_mutex_);
volc_data_info_t data_info;
memset(&data_info, 0, sizeof(data_info));
data_info.type = VOLC_DATA_TYPE_AUDIO; // 音频数据类型
data_info.info.audio.data_type = VOLC_AUDIO_DATA_TYPE_OPUS; // 格式OPUS
// 音频参数应该在RTC初始化时已经设置好这里只需要发送数据
int ret = volc_rtc_send(rtc_handle_, data.data(), data.size(), &data_info);
if (ret != 0) {
ESP_LOGE(TAG, "发送音频失败:%d", ret);// 发送音频失败
} else {
opus_bytes_accum_ += data.size();
opus_frames_accum_ += 1;
LogUplinkStatsMaybe();
}
}
// 🔊 发送PCM音频数据到RTC
void VolcRtcProtocol::SendPcm(const std::vector<uint8_t>& data) {
if (!rtc_handle_ || !is_connected_ || !is_audio_channel_opened_) {
ESP_LOGW(TAG, "无法发送音频RTC未准备就绪");
return;
}
std::lock_guard<std::mutex> lock(rtc_mutex_);
pcm_pending_.insert(pcm_pending_.end(), data.begin(), data.end());
// 以 20ms 固定帧打包 PCM8k/16bit/mono即 320 字节;静音段也持续发送以满足 AEC/RTC 的恒定节拍
const size_t frame_bytes = (size_t)(8000 * 20 / 1000) * sizeof(int16_t);
size_t offset = 0;
while (offset + frame_bytes <= pcm_pending_.size()) {
volc_data_info_t data_info;
memset(&data_info, 0, sizeof(data_info));
data_info.type = VOLC_DATA_TYPE_AUDIO;
data_info.info.audio.data_type = VOLC_AUDIO_DATA_TYPE_PCM;
data_info.info.audio.commit = false;
int ret = volc_rtc_send(rtc_handle_, pcm_pending_.data() + offset, frame_bytes, &data_info);
if (ret != 0) {
ESP_LOGE(TAG, "发送音频失败:%d", ret);
break;
} else {
pcm_bytes_accum_ += frame_bytes;
pcm_frames_accum_ += 1;
}
offset += frame_bytes;
}
if (offset > 0) {
pcm_pending_.erase(pcm_pending_.begin(), pcm_pending_.begin() + offset);
}
LogUplinkStatsMaybe();
}
// 🔊 发送G711A音频数据到RTC
void VolcRtcProtocol::SendG711A(const std::vector<uint8_t>& data) {
if (!rtc_handle_ || !is_connected_ || !is_audio_channel_opened_) {
ESP_LOGW(TAG, "无法发送音频RTC未准备就绪");
return;
}
std::lock_guard<std::mutex> lock(rtc_mutex_);
g711a_pending_.insert(g711a_pending_.end(), data.begin(), data.end());
const size_t frame_bytes = 160;
size_t offset = 0;
while (offset + frame_bytes <= g711a_pending_.size()) {
volc_data_info_t data_info;
memset(&data_info, 0, sizeof(data_info));
data_info.type = VOLC_DATA_TYPE_AUDIO;
data_info.info.audio.data_type = VOLC_AUDIO_DATA_TYPE_G711A;
data_info.info.audio.commit = true;
int ret = volc_rtc_send(rtc_handle_, g711a_pending_.data() + offset, frame_bytes, &data_info);
if (ret != 0) {
ESP_LOGE(TAG, "发送音频失败:%d", ret);
break;
} else {
ESP_LOGI(TAG, "发送上行G711A帧: 大小=%zu", (size_t)frame_bytes);
g711a_bytes_accum_ += frame_bytes;
g711a_frames_accum_ += 1;
}
offset += frame_bytes;
}
if (offset > 0) {
g711a_pending_.erase(g711a_pending_.begin(), g711a_pending_.begin() + offset);
}
LogUplinkStatsMaybe();
}
// 🔊 日志上行音频统计
void VolcRtcProtocol::LogUplinkStatsMaybe() {
uint64_t now_us = esp_timer_get_time();
if (uplink_last_log_us_ == 0) uplink_last_log_us_ = now_us;
uint64_t diff_us = now_us - uplink_last_log_us_;
if (diff_us >= 2000000) {
uint64_t bps = ((uint64_t)(opus_bytes_accum_ + pcm_bytes_accum_ + g711a_bytes_accum_) * 8 * 1000000ULL) / (diff_us ? diff_us : 1);
ESP_LOGI(TAG, "上行音频统计: PCM帧=%d 字节=%zu, G711A帧=%d 字节=%zu, 速率=%llu bps",
pcm_frames_accum_, (size_t)pcm_bytes_accum_, g711a_frames_accum_, (size_t)g711a_bytes_accum_, (unsigned long long)bps);
ESP_LOGI(TAG, "下行音频统计: PCM字节=%zu, OPUS字节=%zu",
(size_t)down_pcm_bytes_accum_, (size_t)down_opus_bytes_accum_);
opus_bytes_accum_ = 0;
pcm_bytes_accum_ = 0;
g711a_bytes_accum_ = 0;
down_pcm_bytes_accum_ = 0;
down_opus_bytes_accum_ = 0;
opus_frames_accum_ = 0;
pcm_frames_accum_ = 0;
g711a_frames_accum_ = 0;
uplink_last_log_us_ = now_us;
}
}
// 🔊 打开音频通道
bool VolcRtcProtocol::OpenAudioChannel() {
if (!rtc_handle_) {
ESP_LOGW(TAG, "无法打开音频通道RTC句柄未准备就绪触发重建");
Start();
return false;
}
if (!is_connected_) {
if (!iot_ready_) {
ESP_LOGE(TAG, "IoT信息未准备就绪无法加入房间");// IoT信息未准备就绪无法加入房间
ESP_LOGW(TAG, "Diag: app_id=%s device_name=%s bot_id=%s secret=%s", iot_info_.rtc_app_id ? iot_info_.rtc_app_id : "(null)", iot_info_.device_name ? iot_info_.device_name : "(null)", CONFIG_VOLC_BOT_ID, iot_info_.device_secret ? "yes" : "no");
return false;
}
xEventGroupClearBits(event_group_handle_, 0x1 | 0x2);
// 新增extra_params 用于传递额外的AgentConfig配置参数
ESP_LOGI(TAG, "Join RTC: handle=%p bot=%s iot_ready=%d free_heap=%u", rtc_handle_, CONFIG_VOLC_BOT_ID, (int)iot_ready_, (unsigned)heap_caps_get_free_size(MALLOC_CAP_DEFAULT));
#if DIAG_RTC_BIND_ENABLE
{
int sockets_used = diag_count_used_sockets();
wifi_ap_record_t ap_info = {};
int rssi = (esp_wifi_sta_get_ap_info(&ap_info) == ESP_OK) ? ap_info.rssi : -127;
ESP_LOGW("DIAG-RTC", "Pre-Join: sockets=%d/%d heap=%u psram=%u rssi=%d",
sockets_used, CONFIG_LWIP_MAX_SOCKETS,
(unsigned)heap_caps_get_free_size(MALLOC_CAP_DEFAULT),
(unsigned)heap_caps_get_free_size(MALLOC_CAP_SPIRAM),
rssi);
}
#endif
int ret = volc_rtc_start(rtc_handle_, CONFIG_VOLC_BOT_ID, &iot_info_, extra_params_.empty() ? NULL : extra_params_.c_str());
if (ret != 0) {
ESP_LOGE(TAG, "RTC启动失败%d", ret);// RTC启动失败%d
ESP_LOGW(TAG, "Diag: start failed. Possible causes: invalid IoT creds, TLS/HTTP error, network unreachable, time not synced");// 诊断启动失败可能原因无效的IoT凭证、TLS/HTTP错误、网络不可达、时间未同步
return false;
}
EventBits_t bits = xEventGroupWaitBits(event_group_handle_, 0x1, pdFALSE, pdFALSE, pdMS_TO_TICKS(5000));
ESP_LOGI(TAG, "Wait connect bits=0x%x free_heap=%u", (unsigned)bits, (unsigned)heap_caps_get_free_size(MALLOC_CAP_DEFAULT));
if ((bits & 0x1) == 0) {
ESP_LOGE(TAG, "RTC连接超时");// RTC连接超时
ESP_LOGW(TAG, "Diag: check Wi-Fi, SNTP time sync, IoT creds, RTC server availability");// 诊断检查Wi-Fi、SNTP时间同步、IoT凭证、RTC服务器可用性
#if DIAG_RTC_BIND_ENABLE
ESP_LOGW("DIAG-RTC", "Post-Fail: sockets=%d/%d heap=%u psram=%u errno=%d(%s)",
diag_count_used_sockets(), CONFIG_LWIP_MAX_SOCKETS,
(unsigned)heap_caps_get_free_size(MALLOC_CAP_DEFAULT),
(unsigned)heap_caps_get_free_size(MALLOC_CAP_SPIRAM),
errno, strerror(errno));
#endif
return false;
}
// Do not block audio readiness on remote user join; enable subscribe immediately
bits = xEventGroupWaitBits(event_group_handle_, 0x2, pdFALSE, pdFALSE, pdMS_TO_TICKS(3000));
if ((bits & 0x2) == 0) {
ESP_LOGW(TAG, "RTC远程用户未加入 yet - 主动开启音频通道");// RTC远程用户未加入 yet - 主动开启音频通道
// 远程用户未加入时,需要手动设置状态
server_sample_rate_ = 16000;
server_frame_duration_ = 60;
is_audio_channel_opened_ = true;
first_downlink_logged_ = false;
ESP_LOGI(TAG, "音频通道已打开");// 音频通道已打开
if (on_audio_channel_opened_) {
on_audio_channel_opened_();
}
} else {
// 远程用户已加入时不要重复打印日志因为MessageCallback中已经处理
// 但需要确保状态正确设置
if (!is_audio_channel_opened_) {
server_sample_rate_ = 16000;
server_frame_duration_ = 60;
is_audio_channel_opened_ = true;
first_downlink_logged_ = false;
ESP_LOGI(TAG, "音频通道已打开");// 音频通道已打开
if (on_audio_channel_opened_) {
on_audio_channel_opened_();
}
}
}
}
return true;
}
// 🔊 关闭音频通道
void VolcRtcProtocol::CloseAudioChannel() {
if (!rtc_handle_) {
return;
}
if (is_connected_) {
volc_rtc_stop(rtc_handle_);// 关闭RTC音频通道
is_connected_ = false;// 标记音频通道已关闭
}
ESP_LOGI(TAG, "音频通道已关闭");// 音频通道已关闭
is_audio_channel_opened_ = false;// 标记音频通道已关闭
if (on_audio_channel_closed_) {
on_audio_channel_closed_();// 调用音频通道关闭回调
}
}
void VolcRtcProtocol::LeaveRoom(bool notify_closed) {
if (rtc_handle_) {
if (is_connected_) {
volc_rtc_stop(rtc_handle_);
is_connected_ = false;
}
ESP_LOGI(TAG, "✓ 已 stop RTC 房间(保留 handle 供唤醒复用, notify_closed=%d", (int)notify_closed);
}
is_audio_channel_opened_ = false;
// 退房后本地待命音仍是 Opus必须清掉 RTC PCM 下行标志,避免 Opus 被当 PCM 播成杂音。
downlink_is_pcm_ = false;
first_downlink_logged_ = false;
if (notify_closed && on_audio_channel_closed_) {
on_audio_channel_closed_();
}
}
void VolcRtcProtocol::ForceRebuildEngine() {
ESP_LOGW(TAG, "🔄 ForceRebuildEngine: 销毁 RTC engine 以清理 SDK 状态");
#if DIAG_RTC_BIND_ENABLE
ESP_LOGW("DIAG-RTC", "Pre-Rebuild: sockets=%d/%d heap=%u",
diag_count_used_sockets(), CONFIG_LWIP_MAX_SOCKETS,
(unsigned)heap_caps_get_free_size(MALLOC_CAP_DEFAULT));
#endif
if (rtc_handle_) {
if (is_connected_) {
volc_rtc_stop(rtc_handle_);
is_connected_ = false;
}
volc_rtc_destroy(rtc_handle_);
rtc_handle_ = nullptr;
}
is_audio_channel_opened_ = false;
downlink_is_pcm_ = false;
first_downlink_logged_ = false;
vTaskDelay(pdMS_TO_TICKS(2000));
#if DIAG_RTC_BIND_ENABLE
ESP_LOGW("DIAG-RTC", "Post-Rebuild-Wait: sockets=%d/%d heap=%u",
diag_count_used_sockets(), CONFIG_LWIP_MAX_SOCKETS,
(unsigned)heap_caps_get_free_size(MALLOC_CAP_DEFAULT));
#endif
}
// 🔊 检查音频通道是否已打开
bool VolcRtcProtocol::IsAudioChannelOpened() const {
return is_audio_channel_opened_;
}
void VolcRtcProtocol::MessageCallback(void* context, volc_msg_t* message) {
VolcRtcProtocol* protocol = static_cast<VolcRtcProtocol*>(context);
// 目前只处理简单的连接状态消息
switch (message->code) {
case VOLC_MSG_CONNECTED:
protocol->is_connected_ = true;
xEventGroupSetBits(protocol->event_group_handle_, 0x1);
protocol->server_sample_rate_ = 16000;
protocol->server_frame_duration_ = 60;
ESP_LOGI(TAG, "RTC连接成功");// RTC连接成功
//Application::GetInstance().InitializeWebsocketProtocol();// RTC连接成功后初始化Websocket协议
break;
case VOLC_MSG_DISCONNECTED:
protocol->is_connected_ = false;
protocol->is_audio_channel_opened_ = false;
xEventGroupClearBits(protocol->event_group_handle_, 0x1 | 0x2);
ESP_LOGI(TAG, "RTC断开连接");// RTC断开连接
break;
case VOLC_MSG_USER_JOINED:
// 只有在音频通道尚未打开的情况下才设置状态和调用回调
if (!protocol->is_audio_channel_opened_) {
protocol->is_audio_channel_opened_ = true;
xEventGroupSetBits(protocol->event_group_handle_, 0x2);
ESP_LOGI(TAG, "RTC远程用户加入");// RTC远程用户加入
// Set default decoder parameters before audio starts
protocol->server_sample_rate_ = 16000;
protocol->server_frame_duration_ = 60;
// 调用音频通道打开回调
if (protocol->on_audio_channel_opened_) {
protocol->on_audio_channel_opened_();
}
} else {
// 音频通道已经打开,只更新事件标志
xEventGroupSetBits(protocol->event_group_handle_, 0x2);
ESP_LOGD(TAG, "RTC远程用户加入音频通道已打开");// 调试信息,不重复打印
}
break;
case VOLC_MSG_KEY_FRAME_REQ:
// 关键帧请求消息不需要处理msg字段
ESP_LOGI(TAG, "接收RTC关键帧请求");// 接收RTC关键帧请求
break;
case VOLC_MSG_TARGET_BITRATE_CHANGED:
// 目标码率变化消息使用target_bitrate字段
// ESP_LOGI(TAG, "RTC target bitrate changed: %lu bps", message->data.target_bitrate);
break;
case VOLC_MSG_CONV_STATUS:
// 会话状态消息使用conv_status字段
ESP_LOGI(TAG, "RTC会话状态%lu", message->data.conv_status);
if (message && message->data.msg && message->data.msg[0] != '\0') {
std::string text(message->data.msg);
ESP_LOGI(TAG, "RTC会话状态消息内容: %s", text.c_str());
cJSON* root = cJSON_Parse(text.c_str());
if (root) {
const char* sid_keys[] = {"sessionId", "session_id", "sid"};
cJSON* sid = nullptr;
for (size_t i = 0; i < sizeof(sid_keys) / sizeof(sid_keys[0]); ++i) {
sid = cJSON_GetObjectItem(root, sid_keys[i]);
if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') {
break;
}
sid = nullptr;
}
if (!sid) {
const char* containers[] = {"data", "payload", "context", "session"};
for (size_t i = 0; i < sizeof(containers) / sizeof(containers[0]); ++i) {
cJSON* obj = cJSON_GetObjectItem(root, containers[i]);
if (obj && cJSON_IsObject(obj)) {
for (size_t j = 0; j < sizeof(sid_keys) / sizeof(sid_keys[0]); ++j) {
sid = cJSON_GetObjectItem(obj, sid_keys[j]);
if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') {
break;
}
}
}
if (sid) break;
}
}
if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') {
protocol->session_id_ = sid->valuestring;
ESP_LOGI(TAG, "Session ID set: %s", protocol->session_id_.c_str());
if (protocol->is_audio_channel_opened_ && protocol->start_listening_pending_) {
ListeningMode m = protocol->pending_listening_mode_;
protocol->start_listening_pending_ = false;
protocol->SendStartListening(m);
}
}
if (protocol->on_incoming_json_) {
protocol->on_incoming_json_(root);
}
cJSON_Delete(root);
}
}
break;
default:
ESP_LOGI(TAG, "接收RTC消息%d", message->code);// 接收RTC消息%d
if (message && message->data.msg && message->data.msg[0] != '\0') {
std::string text(message->data.msg);
ESP_LOGI(TAG, "RTC消息内容: %s", text.c_str());
cJSON* root = cJSON_Parse(text.c_str());
if (root) {
const char* sid_keys[] = {"sessionId", "session_id", "sid"};
cJSON* sid = nullptr;
for (size_t i = 0; i < sizeof(sid_keys) / sizeof(sid_keys[0]); ++i) {
sid = cJSON_GetObjectItem(root, sid_keys[i]);
if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') {
break;
}
sid = nullptr;
}
if (!sid) {
const char* containers[] = {"data", "payload", "context", "session"};
for (size_t i = 0; i < sizeof(containers) / sizeof(containers[0]); ++i) {
cJSON* obj = cJSON_GetObjectItem(root, containers[i]);
if (obj && cJSON_IsObject(obj)) {
for (size_t j = 0; j < sizeof(sid_keys) / sizeof(sid_keys[0]); ++j) {
sid = cJSON_GetObjectItem(obj, sid_keys[j]);
if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') {
break;
}
}
}
if (sid) break;
}
}
if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') {
protocol->session_id_ = sid->valuestring;
ESP_LOGI(TAG, "Session ID set: %s", protocol->session_id_.c_str());
if (protocol->is_audio_channel_opened_ && protocol->start_listening_pending_) {
ListeningMode m = protocol->pending_listening_mode_;
protocol->start_listening_pending_ = false;
protocol->SendStartListening(m);
}
}
if (protocol->on_incoming_json_) {
protocol->on_incoming_json_(root);// 调用回调函数处理JSON消息
}
cJSON_Delete(root);// 删除JSON根对象释放内存
}
}
break;
}
}
// 处理RTC音频数据
void VolcRtcProtocol::DataCallback(void* context, const void* data, size_t len, volc_data_info_t* info) {
VolcRtcProtocol* protocol = static_cast<VolcRtcProtocol*>(context);
// ESP_LOGI(TAG, "RTC data: type=%d len=%u free_heap=%u", info->type, (unsigned)len, (unsigned)heap_caps_get_free_size(MALLOC_CAP_DEFAULT));
if (info->type == VOLC_DATA_TYPE_AUDIO) {
if (info) {
protocol->downlink_is_pcm_ = (info->info.audio.data_type == VOLC_AUDIO_DATA_TYPE_PCM);
if (protocol->downlink_is_pcm_) {
protocol->down_pcm_bytes_accum_ += len;
protocol->server_sample_rate_ = 8000;
protocol->server_frame_duration_ = 20;
} else {
protocol->down_opus_bytes_accum_ += len;
protocol->server_sample_rate_ = 16000;
protocol->server_frame_duration_ = 60;
}
if (!protocol->first_downlink_logged_) {
ESP_LOGI(TAG, "接收下行音频首包: 类型=%s 大小=%d", protocol->downlink_is_pcm_ ? "PCM" : "OPUS", (int)len);// 接收下行音频首包: 类型=%s 大小=%d
protocol->first_downlink_logged_ = true;// 标记已记录首包
}
}
protocol->ProcessAudioData(data, len);// 处理音频数据
} else if (info->type == VOLC_DATA_TYPE_MESSAGE) {
if (data && len > 0) {
const uint8_t* buf = static_cast<const uint8_t*>(data);
std::string json_text;
// 检测二进制前缀格式: [prefix(4字节)] + [json_len(4字节大端)] + [JSON]
// 注意: SDK DataCallback中 is_binary 始终为false不能依赖此字段
bool is_subv = false;
if (len >= 8) {
bool is_ctrl = (memcmp(buf, "ctrl", 4) == 0);
bool is_conv = (memcmp(buf, "conv", 4) == 0);
bool is_tool = (memcmp(buf, "tool", 4) == 0);
is_subv = (memcmp(buf, "subv", 4) == 0);
bool is_info = (memcmp(buf, "info", 4) == 0);
if (is_ctrl || is_conv || is_tool || is_subv || is_info) {
uint32_t json_len = (uint32_t)((buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | (buf[7]));
if (json_len > 0 && (size_t)(8 + json_len) <= len) {
json_text.assign(reinterpret_cast<const char*>(buf + 8), json_len);
// 字幕消息不打印内容(频率高)
if (!is_subv && !protocol->suppress_incoming_message_log_) {
const char* prefix = is_ctrl ? "ctrl" : (is_conv ? "conv" : (is_tool ? "tool" : "info"));
ESP_LOGI(TAG, "接收下行消息(%s): %.*s", prefix, (int)json_text.size(), json_text.c_str());
}
}
}
}
if (json_text.empty()) {
json_text.assign(reinterpret_cast<const char*>(data), len);
if (!protocol->suppress_incoming_message_log_) {
ESP_LOGI(TAG, "接收下行消息: %.*s", (int)json_text.size(), json_text.c_str());
}
}
// 非subv消息立即通知应用层中止HTTPS播放尽早触发不等JSON解析
// subv字幕消息由应用层subtitle handler处理可区分USER/AI
if (!is_subv && protocol->on_bot_message_) {
protocol->on_bot_message_();
}
cJSON* root = cJSON_Parse(json_text.c_str());
if (root) {
// 提取 Session ID支持多种字段名和嵌套位置
const char* sid_keys[] = {"sessionId", "session_id", "sid"};
cJSON* sid = nullptr;
for (size_t i = 0; i < sizeof(sid_keys) / sizeof(sid_keys[0]); ++i) {
sid = cJSON_GetObjectItem(root, sid_keys[i]);
if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') break;
sid = nullptr;
}
if (!sid) {
const char* containers[] = {"data", "payload", "context", "session"};
for (size_t i = 0; i < sizeof(containers) / sizeof(containers[0]); ++i) {
cJSON* obj = cJSON_GetObjectItem(root, containers[i]);
if (obj && cJSON_IsObject(obj)) {
for (size_t j = 0; j < sizeof(sid_keys) / sizeof(sid_keys[0]); ++j) {
sid = cJSON_GetObjectItem(obj, sid_keys[j]);
if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') break;
}
}
if (sid) break;
}
}
if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') {
protocol->session_id_ = sid->valuestring;
ESP_LOGI(TAG, "Session ID set: %s", protocol->session_id_.c_str());
if (protocol->is_audio_channel_opened_ && protocol->start_listening_pending_) {
ListeningMode m = protocol->pending_listening_mode_;
protocol->start_listening_pending_ = false;
protocol->SendStartListening(m);
}
}
if (protocol->on_incoming_json_) {
protocol->on_incoming_json_(root);
}
cJSON_Delete(root);
}
}
}
}
// 解析服务器发送的JSON消息
void VolcRtcProtocol::ParseServerMessage(const char* message) {
ESP_LOGI(TAG, "接收服务器消息:%s", message);// 接收服务器消息:%s
cJSON* root = cJSON_Parse(message);
if (!root) {
ESP_LOGE(TAG, "解析服务器消息失败");// 解析服务器消息失败
return;
}
if (on_incoming_json_) {
on_incoming_json_(root);
}
cJSON_Delete(root);
}
void VolcRtcProtocol::ProcessAudioData(const void* data, int size) {
if (!on_incoming_audio_) {
return;
}
ESP_LOGD(TAG, "接收音频数据,大小:%d 字节", size);// 接收音频数据,大小:%d 字节
// 直接使用原始数据指针,避免内存分配
// 如果on_incoming_audio_需要持久化数据它应该自己负责复制
on_incoming_audio_(std::vector<uint8_t>(static_cast<const uint8_t*>(data), static_cast<const uint8_t*>(data) + size));
}
void VolcRtcProtocol::SendText(const std::string& text) {
if (!rtc_handle_ || !is_connected_) {
ESP_LOGW(TAG, "不能发送文本消息RTC未准备好");// 不能发送文本消息RTC未准备好
return;
}
std::lock_guard<std::mutex> lock(rtc_mutex_);
volc_data_info_t data_info;
memset(&data_info, 0, sizeof(data_info));
data_info.type = VOLC_DATA_TYPE_MESSAGE; // 文本数据类型
int ret = volc_rtc_send(rtc_handle_, text.data(), text.size(), &data_info);
if (ret != 0) {
ESP_LOGE(TAG, "发送文本消息失败:%d", ret);// 发送文本消息失败:%d
} else {
ESP_LOGD(TAG, "发送文本消息: %s", text.c_str());// 发送文本消息:%s
}
}
// 🔊 发送开始监听消息到RTC
void VolcRtcProtocol::SendStartListening(ListeningMode mode) {
// 若尚未建立会话ID或远端未加入则排队待会话就绪后发送
if (session_id_.empty() || !is_connected_) {
start_listening_pending_ = true;
pending_listening_mode_ = mode;
ESP_LOGI(TAG, "延迟发送StartListening等待会话就绪");
return;
}
Protocol::SendStartListening(mode);// 调用基类方法发送开始监听消息
}
// 🔊 发送控制指令到RTC
void VolcRtcProtocol::SendCtrl(const std::string& json) {
if (!rtc_handle_ || !is_connected_) {
ESP_LOGW(TAG, "不能发送ctrl二进制消息RTC未准备好");// 不能发送ctrl二进制消息RTC未准备好
return;
}
std::lock_guard<std::mutex> lock(rtc_mutex_);// 🔊 发送控制指令到RTC时加锁保护RTC句柄
// 构建二进制消息:"ctrl" + 4字节大端长度 + JSON负载
const char magic[4] = {'c','t','r','l'};
const uint32_t len = (uint32_t)json.size();
std::vector<uint8_t> payload;
payload.reserve(4 + 4 + len);
payload.insert(payload.end(), magic, magic + 4);
payload.push_back((uint8_t)((len >> 24) & 0xFF));
payload.push_back((uint8_t)((len >> 16) & 0xFF));
payload.push_back((uint8_t)((len >> 8) & 0xFF));
payload.push_back((uint8_t)(len & 0xFF));
payload.insert(payload.end(), json.begin(), json.end());
volc_data_info_t data_info;
memset(&data_info, 0, sizeof(data_info));
data_info.type = VOLC_DATA_TYPE_MESSAGE;
data_info.info.message.is_binary = true;
int ret = volc_rtc_send(rtc_handle_, payload.data(), (int)payload.size(), &data_info);
if (ret != 0) {
ESP_LOGE(TAG, "发送ctrl二进制消息失败%d", ret);
} else {
ESP_LOGD(TAG, "发送ctrl二进制消息: %.*s", (int)json.size(), json.c_str());
}
}
// 🔊 发送函数调用指令到RTC
void VolcRtcProtocol::SendFunc(const std::string& json) {
if (!rtc_handle_ || !is_connected_) {
ESP_LOGW(TAG, "不能发送func二进制消息RTC未准备好");// 不能发送func二进制消息RTC未准备好
return;
}
std::lock_guard<std::mutex> lock(rtc_mutex_);// 🔊 发送函数调用指令到RTC时加锁保护RTC句柄
const char magic[4] = {'f','u','n','c'};
const uint32_t len = (uint32_t)json.size();
std::vector<uint8_t> payload;
payload.reserve(4 + 4 + len);
payload.insert(payload.end(), magic, magic + 4);
payload.push_back((uint8_t)((len >> 24) & 0xFF));
payload.push_back((uint8_t)((len >> 16) & 0xFF));
payload.push_back((uint8_t)((len >> 8) & 0xFF));
payload.push_back((uint8_t)(len & 0xFF));
payload.insert(payload.end(), json.begin(), json.end());
volc_data_info_t data_info;
memset(&data_info, 0, sizeof(data_info));
data_info.type = VOLC_DATA_TYPE_MESSAGE;
data_info.info.message.is_binary = true;
int ret = volc_rtc_send(rtc_handle_, payload.data(), (int)payload.size(), &data_info);
if (ret != 0) {
ESP_LOGE(TAG, "发送func二进制消息失败%d", ret);
} else {
ESP_LOGD(TAG, "发送func二进制消息: %.*s", (int)json.size(), json.c_str());
}
}
// 🔊 发送函数调用结果到RTC
void VolcRtcProtocol::SendFunctionResult(const std::string& tool_call_id, const std::string& content) {
cJSON* obj = cJSON_CreateObject();
if (!obj) {
ESP_LOGE(TAG, "创建函数结果JSON失败回退为文本");// 创建函数结果JSON失败回退为文本
Protocol::SendFunctionResult(tool_call_id, content);
return;
}
cJSON_AddStringToObject(obj, "ToolCallID", tool_call_id.c_str());// 添加函数调用ID到JSON
cJSON_AddStringToObject(obj, "Content", content.c_str());// 添加函数调用结果到JSON
char* printed = cJSON_PrintUnformatted(obj);
std::string json = printed ? printed : std::string();
if (printed) cJSON_free(printed);
cJSON_Delete(obj);
if (json.empty()) {
ESP_LOGW(TAG, "函数结果JSON为空回退为文本");
Protocol::SendFunctionResult(tool_call_id, content);
return;
}
SendFunc(json);
}
// 🔊 发送文本消息到RTC (传入大模型上下文信息)
void VolcRtcProtocol::SendTextMessage(const std::string& text) {
// 按官方方案封装ExternalTextToLLM确保进入LLM并触发TTS
cJSON* root = cJSON_CreateObject();
if (!root) {
ESP_LOGE(TAG, "创建JSON失败回退为文本消息");
Protocol::SendTextMessage(text);
return;
}
cJSON_AddStringToObject(root, "Command", "ExternalTextToLLM");
cJSON_AddStringToObject(root, "Message", text.c_str());
cJSON_AddNumberToObject(root, "InterruptMode", 1);
char* printed = cJSON_PrintUnformatted(root);
std::string json = printed ? printed : std::string();
if (printed) cJSON_free(printed);
cJSON_Delete(root);
if (json.empty()) {
ESP_LOGW(TAG, "生成的JSON为空回退为文本消息");
Protocol::SendTextMessage(text);
return;
}
SendCtrl(json);
}
// 🔊 发送中止通话请求
void VolcRtcProtocol::SendAbortSpeaking(AbortReason reason) {
if (!rtc_handle_ || !is_connected_ || !is_audio_channel_opened_) {
ESP_LOGW(TAG, "不能发送中止通话请求RTC未准备好");// 不能发送打断请求RTC未准备好
return;
}
std::lock_guard<std::mutex> lock(rtc_mutex_);// 🔊 发送中止通话请求时加锁保护RTC句柄
ESP_LOGI(TAG, "通过Volc RTC中断发送中止通话请求");// 发送打断请求通过火山RTC中断
// 调用火山RTC的打断API
int ret = volc_rtc_interrupt(rtc_handle_);
if (ret != 0) {
ESP_LOGE(TAG, "通过Volc RTC中断发送打断请求失败%d", ret);// 发送打断请求通过火山RTC中断失败%d
} else {
ESP_LOGI(TAG, "通过Volc RTC中断发送打断请求成功");// 发送打断请求通过火山RTC中断成功
}
}