toy-Kapi_Rtc/main/protocols/volc_rtc_protocol.cc
2026-01-20 16:55:17 +08:00

843 lines
39 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "volc_rtc_protocol.h"
#include <wifi_station.h>
#include "esp_log.h"
#include "sdkconfig.h"
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "esp_heap_caps.h"
#include "esp_system.h"
#include <cJSON.h>
#include <cstring>
#include <inttypes.h>
#include <sys/stat.h>
#include <errno.h>
#include <unistd.h>
// 新增包含 system_info.h 头文件以使用 SystemInfo 类
#include "system_info.h"
#include "application.h"
// SNTP is initialized in WiFi board after network is up; no duplicate init here
#include "base/volc_device_manager.h"
#include "settings.h"
static const char* TAG = "VolcRtcProtocol";
VolcRtcProtocol::VolcRtcProtocol() {
event_group_handle_ = xEventGroupCreate();
}
VolcRtcProtocol::~VolcRtcProtocol() {
if (event_group_handle_) {
vEventGroupDelete(event_group_handle_);
}
if (rtc_handle_) {
volc_rtc_stop(rtc_handle_);
volc_rtc_destroy(rtc_handle_);
}
// 释放动态分配的设备名称内存
if (iot_info_.device_name && iot_info_.device_name != (char*)CONFIG_VOLC_DEVICE_NAME) {
free(iot_info_.device_name);
iot_info_.device_name = nullptr;
}
}
void VolcRtcProtocol::Start() {
ESP_LOGI(TAG, "VolcRtcProtocol 开始启动...");// VolcRtcProtocol 开始启动...
esp_log_level_set(TAG, ESP_LOG_DEBUG);
// 注释掉所有文件系统相关操作,避免设备重启
// 这些操作需要文件系统支持,但当前设备可能没有正确挂载文件系统
// ESP_LOGI(TAG, "跳过文件系统操作以防止设备重启");// 跳过文件系统操作以防止设备重启
// TODO: Implement proper file system initialization if file logging is needed
// 禁用获取当前工作目录的操作,避免文件系统访问
// TODO: Re-enable if filesystem is properly initialized
// ESP_LOGI(TAG, "当前工作目录检查已禁用,以防止文件系统访问");// 当前工作目录检查已禁用,以防止文件系统访问
// 如果已有RTC实例先停止并销毁
if (rtc_handle_) {
volc_rtc_stop(rtc_handle_);
volc_rtc_destroy(rtc_handle_);
rtc_handle_ = nullptr;
}
// 创建火山RTC配置
cJSON* config = cJSON_CreateObject();
if (!config) {
ESP_LOGE(TAG, "RTC配置创建失败");// RTC配置创建失败
SetError("Failed to create RTC config");
return;
}
// 添加必要的RTC配置项
cJSON* audio_config = cJSON_CreateObject();
if (audio_config) {
cJSON_AddBoolToObject(audio_config, "publish", true);
cJSON_AddBoolToObject(audio_config, "subscribe", true);
cJSON_AddNumberToObject(audio_config, "codec", 4); // 设置音频编解码器为4根据设计文档
cJSON_AddItemToObject(config, "audio", audio_config);// 添加音频配置到RTC配置
}
cJSON* video_config = cJSON_CreateObject();
if (video_config) {
cJSON_AddBoolToObject(video_config, "publish", false);
cJSON_AddBoolToObject(video_config, "subscribe", false);
cJSON_AddNumberToObject(video_config, "codec", 1); // 设置视频编解码器为1根据设计文档
cJSON_AddItemToObject(config, "video", video_config);
}
cJSON_AddNumberToObject(config, "log_level", 1); // 设置日志级别
// 添加参数数组,与 Airhub_Rtc_h 项目保持一致
cJSON* params = cJSON_CreateArray();
if (params) {
// 只输出日志到控制台,不输出到文件
cJSON_AddItemToArray(params, cJSON_CreateString("{\"debug\":{\"log_to_console\":1}}"));// 添加日志到控制台配置
cJSON_AddItemToArray(params, cJSON_CreateString("{\"audio\":{\"codec\":{\"internal\":{\"enable\":1}}}}"));// 添加音频编解码器内部配置,启用 SDK 内部编解码
cJSON_AddItemToArray(params, cJSON_CreateString("{\"rtc\":{\"access\":{\"concurrent_requests\":1}}}"));// 添加RTC并发请求配置
cJSON_AddItemToArray(params, cJSON_CreateString("{\"rtc\":{\"ice\":{\"concurrent_agents\":1}}}"));// 添加RTC并发ICE代理配置
cJSON_AddItemToObject(config, "params", params);
}
// 创建IoT信息并优先从NVS加载
memset(&iot_info_, 0, sizeof(iot_info_));
iot_info_.instance_id = (char*)CONFIG_VOLC_INSTANCE_ID;
iot_info_.product_key = (char*)CONFIG_VOLC_PRODUCT_KEY;
iot_info_.product_secret = (char*)CONFIG_VOLC_PRODUCT_SECRET;
iot_info_.bot_id = (char*)CONFIG_VOLC_BOT_ID;
// 优先使用配置文件中的设备名称如果为空则使用MAC地址
if (CONFIG_VOLC_DEVICE_NAME && strlen(CONFIG_VOLC_DEVICE_NAME) > 0) {
// 使用配置文件中的设备名称
iot_info_.device_name = (char*)CONFIG_VOLC_DEVICE_NAME;
ESP_LOGI(TAG, "使用配置文件中的设备名称: %s", iot_info_.device_name);
} else {
// 配置文件中的设备名称为空使用MAC地址作为设备名称
std::string mac_address = SystemInfo::GetMacAddress();
// MAC地址中替换冒号为下划线,避免文件名中包含冒号
std::replace(mac_address.begin(), mac_address.end(), ':', '_');
char* mac_buffer = (char*)malloc(mac_address.length() + 1);
strcpy(mac_buffer, mac_address.c_str());
iot_info_.device_name = mac_buffer;
ESP_LOGI(TAG, "使用Wi-Fi MAC地址作为设备名称(已替换冒号为下划线): %s", iot_info_.device_name);
}
Settings s("volc");
auto saved_name = s.GetString("device_name", "");
bool name_mismatch = (!saved_name.empty() && strcmp(saved_name.c_str(), iot_info_.device_name) != 0);
std::string saved_secret;
std::string saved_appid;
if (name_mismatch) {
ESP_LOGW(TAG, "检测到设备名称变更:%s -> %s清除旧凭证", saved_name.c_str(), iot_info_.device_name);
Settings sw("volc", true);
sw.EraseKey("device_secret");
sw.EraseKey("rtc_app_id");
sw.SetString("device_name", iot_info_.device_name);
} else {
saved_secret = s.GetString("device_secret", "");
saved_appid = s.GetString("rtc_app_id", "");
if (saved_name.empty()) {
Settings sw("volc", true);
sw.SetString("device_name", iot_info_.device_name);
}
}
if (!saved_secret.empty()) {
iot_info_.device_secret = strdup(saved_secret.c_str());
}
if (!saved_appid.empty()) {
iot_info_.rtc_app_id = strdup(saved_appid.c_str());
}
ESP_LOGI(TAG, "NVS凭证已加载secret=%d appid=%d device_name=%s, free_heap=%u",
!saved_secret.empty(), !saved_appid.empty(), iot_info_.device_name,
(unsigned)heap_caps_get_free_size(MALLOC_CAP_DEFAULT));
// 创建一个结构体来传递参数给任务
struct InitParams {
VolcRtcProtocol* protocol;
cJSON* config;
};
InitParams* init_params = new InitParams();
init_params->protocol = this;
init_params->config = config;
// 将设备注册和RTC创建操作移到单独的任务中执行避免main任务栈溢出
xTaskCreate([](void* arg) {
InitParams* init_params = static_cast<InitParams*>(arg);
VolcRtcProtocol* protocol = init_params->protocol;
cJSON* config = init_params->config;
// 如果没有设备密钥或RTC应用ID进行设备注册
if (!protocol->iot_info_.device_secret || !protocol->iot_info_.rtc_app_id) {
char* device_secret_ptr = nullptr;
if (volc_device_register(&protocol->iot_info_, &device_secret_ptr) != 0 || device_secret_ptr == nullptr) {
ESP_LOGE(TAG, "设备注册失败");// 设备注册失败
protocol->SetError("Failed to register device");
cJSON_Delete(config);
delete init_params;
vTaskDelete(NULL);
return;
}
protocol->iot_info_.device_secret = device_secret_ptr;
Settings sw("volc", true);
sw.SetString("device_secret", protocol->iot_info_.device_secret);
if (protocol->iot_info_.rtc_app_id) {
sw.SetString("rtc_app_id", protocol->iot_info_.rtc_app_id);
}
sw.SetString("device_name", protocol->iot_info_.device_name);
}
// 创建RTC实例
protocol->rtc_handle_ = volc_rtc_create(
protocol->iot_info_.rtc_app_id ? protocol->iot_info_.rtc_app_id : CONFIG_VOLC_INSTANCE_ID,
protocol,
config,
&MessageCallback,
&DataCallback
);
cJSON_Delete(config);
delete init_params;
if (!protocol->rtc_handle_) {
ESP_LOGE(TAG, "RTC实例创建失败");// RTC实例创建失败
protocol->SetError("Failed to create RTC instance");
} else {
protocol->iot_ready_ = true;
ESP_LOGI(TAG, "RTC实例已准备就绪房间加入将在监听状态后执行");// RTC实例已准备就绪房间加入将在监听状态后执行
Application::GetInstance().InitializeWebsocketProtocol();// RTC初始化成功后初始化Websocket协议
}
vTaskDelete(NULL);
}, "volc_rtc_init", 16384, init_params, 5, NULL);
// 注意此处不再立即创建RTC实例而是将其推迟到任务中执行
ESP_LOGI(TAG, "VolcRtcProtocol初始化任务已创建");// VolcRtcProtocol初始化任务已创建
}
// 新增设置AgentConfig配置参数,包含body中的config参数和agent_config参数
void VolcRtcProtocol::SetAgentConfig(const std::string& params) {
extra_params_ = params;
ESP_LOGI(TAG, "设置Agent配置参数: %s", extra_params_.c_str());
}
// 🔊 发送音频数据到RTC
void VolcRtcProtocol::SendAudio(const std::vector<uint8_t>& data) {
if (!rtc_handle_ || !is_connected_ || !is_audio_channel_opened_) {
ESP_LOGW(TAG, "无法发送音频RTC未准备就绪");// 无法发送音频RTC未准备就绪
return;
}
std::lock_guard<std::mutex> lock(rtc_mutex_);
volc_data_info_t data_info;
memset(&data_info, 0, sizeof(data_info));
data_info.type = VOLC_DATA_TYPE_AUDIO; // 音频数据类型
data_info.info.audio.data_type = VOLC_AUDIO_DATA_TYPE_OPUS; // 格式OPUS
// 音频参数应该在RTC初始化时已经设置好这里只需要发送数据
int ret = volc_rtc_send(rtc_handle_, data.data(), data.size(), &data_info);
if (ret != 0) {
ESP_LOGE(TAG, "发送音频失败:%d", ret);// 发送音频失败
} else {
opus_bytes_accum_ += data.size();
opus_frames_accum_ += 1;
LogUplinkStatsMaybe();
}
}
// 🔊 发送PCM音频数据到RTC
void VolcRtcProtocol::SendPcm(const std::vector<uint8_t>& data) {
if (!rtc_handle_ || !is_connected_ || !is_audio_channel_opened_) {
ESP_LOGW(TAG, "无法发送音频RTC未准备就绪");
return;
}
std::lock_guard<std::mutex> lock(rtc_mutex_);
pcm_pending_.insert(pcm_pending_.end(), data.begin(), data.end());
// 以 20ms 固定帧打包 PCM8k/16bit/mono即 320 字节;静音段也持续发送以满足 AEC/RTC 的恒定节拍
const size_t frame_bytes = (size_t)(8000 * 20 / 1000) * sizeof(int16_t);
size_t offset = 0;
while (offset + frame_bytes <= pcm_pending_.size()) {
volc_data_info_t data_info;
memset(&data_info, 0, sizeof(data_info));
data_info.type = VOLC_DATA_TYPE_AUDIO;
data_info.info.audio.data_type = VOLC_AUDIO_DATA_TYPE_PCM;
data_info.info.audio.commit = false;
int ret = volc_rtc_send(rtc_handle_, pcm_pending_.data() + offset, frame_bytes, &data_info);
if (ret != 0) {
ESP_LOGE(TAG, "发送音频失败:%d", ret);
break;
} else {
pcm_bytes_accum_ += frame_bytes;
pcm_frames_accum_ += 1;
}
offset += frame_bytes;
}
if (offset > 0) {
pcm_pending_.erase(pcm_pending_.begin(), pcm_pending_.begin() + offset);
}
LogUplinkStatsMaybe();
}
// 🔊 发送G711A音频数据到RTC
void VolcRtcProtocol::SendG711A(const std::vector<uint8_t>& data) {
if (!rtc_handle_ || !is_connected_ || !is_audio_channel_opened_) {
ESP_LOGW(TAG, "无法发送音频RTC未准备就绪");
return;
}
std::lock_guard<std::mutex> lock(rtc_mutex_);
g711a_pending_.insert(g711a_pending_.end(), data.begin(), data.end());
const size_t frame_bytes = 160;
size_t offset = 0;
while (offset + frame_bytes <= g711a_pending_.size()) {
volc_data_info_t data_info;
memset(&data_info, 0, sizeof(data_info));
data_info.type = VOLC_DATA_TYPE_AUDIO;
data_info.info.audio.data_type = VOLC_AUDIO_DATA_TYPE_G711A;
data_info.info.audio.commit = true;
int ret = volc_rtc_send(rtc_handle_, g711a_pending_.data() + offset, frame_bytes, &data_info);
if (ret != 0) {
ESP_LOGE(TAG, "发送音频失败:%d", ret);
break;
} else {
ESP_LOGI(TAG, "发送上行G711A帧: 大小=%zu", (size_t)frame_bytes);
g711a_bytes_accum_ += frame_bytes;
g711a_frames_accum_ += 1;
}
offset += frame_bytes;
}
if (offset > 0) {
g711a_pending_.erase(g711a_pending_.begin(), g711a_pending_.begin() + offset);
}
LogUplinkStatsMaybe();
}
// 🔊 日志上行音频统计
void VolcRtcProtocol::LogUplinkStatsMaybe() {
uint64_t now_us = esp_timer_get_time();
if (uplink_last_log_us_ == 0) uplink_last_log_us_ = now_us;
uint64_t diff_us = now_us - uplink_last_log_us_;
if (diff_us >= 2000000) {
uint64_t bps = ((uint64_t)(opus_bytes_accum_ + pcm_bytes_accum_ + g711a_bytes_accum_) * 8 * 1000000ULL) / (diff_us ? diff_us : 1);
ESP_LOGI(TAG, "上行音频统计: PCM帧=%d 字节=%zu, G711A帧=%d 字节=%zu, 速率=%llu bps",
pcm_frames_accum_, (size_t)pcm_bytes_accum_, g711a_frames_accum_, (size_t)g711a_bytes_accum_, (unsigned long long)bps);
ESP_LOGI(TAG, "下行音频统计: PCM字节=%zu, OPUS字节=%zu",
(size_t)down_pcm_bytes_accum_, (size_t)down_opus_bytes_accum_);
opus_bytes_accum_ = 0;
pcm_bytes_accum_ = 0;
g711a_bytes_accum_ = 0;
down_pcm_bytes_accum_ = 0;
down_opus_bytes_accum_ = 0;
opus_frames_accum_ = 0;
pcm_frames_accum_ = 0;
g711a_frames_accum_ = 0;
uplink_last_log_us_ = now_us;
}
}
// 🔊 打开音频通道
bool VolcRtcProtocol::OpenAudioChannel() {
if (!rtc_handle_) {
ESP_LOGW(TAG, "无法打开音频通道RTC句柄未准备就绪");// 无法打开音频通道RTC句柄未准备就绪
return false;
}
if (!is_connected_) {
if (!iot_ready_) {
ESP_LOGE(TAG, "IoT信息未准备就绪无法加入房间");// IoT信息未准备就绪无法加入房间
ESP_LOGW(TAG, "Diag: app_id=%s device_name=%s bot_id=%s secret=%s", iot_info_.rtc_app_id ? iot_info_.rtc_app_id : "(null)", iot_info_.device_name ? iot_info_.device_name : "(null)", CONFIG_VOLC_BOT_ID, iot_info_.device_secret ? "yes" : "no");
return false;
}
xEventGroupClearBits(event_group_handle_, 0x1 | 0x2);
// 新增extra_params 用于传递额外的AgentConfig配置参数
ESP_LOGI(TAG, "Join RTC: handle=%p bot=%s iot_ready=%d free_heap=%u", rtc_handle_, CONFIG_VOLC_BOT_ID, (int)iot_ready_, (unsigned)heap_caps_get_free_size(MALLOC_CAP_DEFAULT));
int ret = volc_rtc_start(rtc_handle_, CONFIG_VOLC_BOT_ID, &iot_info_, extra_params_.empty() ? NULL : extra_params_.c_str());
if (ret != 0) {
ESP_LOGE(TAG, "RTC启动失败%d", ret);// RTC启动失败%d
ESP_LOGW(TAG, "Diag: start failed. Possible causes: invalid IoT creds, TLS/HTTP error, network unreachable, time not synced");// 诊断启动失败可能原因无效的IoT凭证、TLS/HTTP错误、网络不可达、时间未同步
return false;
}
EventBits_t bits = xEventGroupWaitBits(event_group_handle_, 0x1, pdFALSE, pdFALSE, pdMS_TO_TICKS(5000));
ESP_LOGI(TAG, "Wait connect bits=0x%x free_heap=%u", (unsigned)bits, (unsigned)heap_caps_get_free_size(MALLOC_CAP_DEFAULT));
if ((bits & 0x1) == 0) {
ESP_LOGE(TAG, "RTC连接超时");// RTC连接超时
ESP_LOGW(TAG, "Diag: check Wi-Fi, SNTP time sync, IoT creds, RTC server availability");// 诊断检查Wi-Fi、SNTP时间同步、IoT凭证、RTC服务器可用性
return false;
}
// Do not block audio readiness on remote user join; enable subscribe immediately
bits = xEventGroupWaitBits(event_group_handle_, 0x2, pdFALSE, pdFALSE, pdMS_TO_TICKS(3000));
if ((bits & 0x2) == 0) {
ESP_LOGW(TAG, "RTC远程用户未加入 yet - 主动开启音频通道");// RTC远程用户未加入 yet - 主动开启音频通道
// 远程用户未加入时,需要手动设置状态
server_sample_rate_ = 16000;
server_frame_duration_ = 60;
is_audio_channel_opened_ = true;
first_downlink_logged_ = false;
ESP_LOGI(TAG, "音频通道已打开");// 音频通道已打开
if (on_audio_channel_opened_) {
on_audio_channel_opened_();
}
} else {
// 远程用户已加入时不要重复打印日志因为MessageCallback中已经处理
// 但需要确保状态正确设置
if (!is_audio_channel_opened_) {
server_sample_rate_ = 16000;
server_frame_duration_ = 60;
is_audio_channel_opened_ = true;
first_downlink_logged_ = false;
ESP_LOGI(TAG, "音频通道已打开");// 音频通道已打开
if (on_audio_channel_opened_) {
on_audio_channel_opened_();
}
}
}
}
return true;
}
// 🔊 关闭音频通道
void VolcRtcProtocol::CloseAudioChannel() {
if (!rtc_handle_) {
return;
}
if (is_connected_) {
volc_rtc_stop(rtc_handle_);// 关闭RTC音频通道
is_connected_ = false;// 标记音频通道已关闭
}
ESP_LOGI(TAG, "音频通道已关闭");// 音频通道已关闭
is_audio_channel_opened_ = false;// 标记音频通道已关闭
if (on_audio_channel_closed_) {
on_audio_channel_closed_();// 调用音频通道关闭回调
}
}
// 🔊 检查音频通道是否已打开
bool VolcRtcProtocol::IsAudioChannelOpened() const {
return is_audio_channel_opened_;
}
void VolcRtcProtocol::MessageCallback(void* context, volc_msg_t* message) {
VolcRtcProtocol* protocol = static_cast<VolcRtcProtocol*>(context);
// 目前只处理简单的连接状态消息
switch (message->code) {
case VOLC_MSG_CONNECTED:
protocol->is_connected_ = true;
xEventGroupSetBits(protocol->event_group_handle_, 0x1);
protocol->server_sample_rate_ = 16000;
protocol->server_frame_duration_ = 60;
ESP_LOGI(TAG, "RTC连接成功");// RTC连接成功
//Application::GetInstance().InitializeWebsocketProtocol();// RTC连接成功后初始化Websocket协议
break;
case VOLC_MSG_DISCONNECTED:
protocol->is_connected_ = false;
protocol->is_audio_channel_opened_ = false;
xEventGroupClearBits(protocol->event_group_handle_, 0x1 | 0x2);
ESP_LOGI(TAG, "RTC断开连接");// RTC断开连接
break;
case VOLC_MSG_USER_JOINED:
// 只有在音频通道尚未打开的情况下才设置状态和调用回调
if (!protocol->is_audio_channel_opened_) {
protocol->is_audio_channel_opened_ = true;
xEventGroupSetBits(protocol->event_group_handle_, 0x2);
ESP_LOGI(TAG, "RTC远程用户加入");// RTC远程用户加入
// Set default decoder parameters before audio starts
protocol->server_sample_rate_ = 16000;
protocol->server_frame_duration_ = 60;
// 调用音频通道打开回调
if (protocol->on_audio_channel_opened_) {
protocol->on_audio_channel_opened_();
}
} else {
// 音频通道已经打开,只更新事件标志
xEventGroupSetBits(protocol->event_group_handle_, 0x2);
ESP_LOGD(TAG, "RTC远程用户加入音频通道已打开");// 调试信息,不重复打印
}
break;
case VOLC_MSG_KEY_FRAME_REQ:
// 关键帧请求消息不需要处理msg字段
ESP_LOGI(TAG, "接收RTC关键帧请求");// 接收RTC关键帧请求
break;
case VOLC_MSG_TARGET_BITRATE_CHANGED:
// 目标码率变化消息使用target_bitrate字段
// ESP_LOGI(TAG, "RTC target bitrate changed: %lu bps", message->data.target_bitrate);
break;
case VOLC_MSG_CONV_STATUS:
// 会话状态消息使用conv_status字段
ESP_LOGI(TAG, "RTC会话状态%lu", message->data.conv_status);
if (message && message->data.msg && message->data.msg[0] != '\0') {
std::string text(message->data.msg);
ESP_LOGI(TAG, "RTC会话状态消息内容: %s", text.c_str());
cJSON* root = cJSON_Parse(text.c_str());
if (root) {
const char* sid_keys[] = {"sessionId", "session_id", "sid"};
cJSON* sid = nullptr;
for (size_t i = 0; i < sizeof(sid_keys) / sizeof(sid_keys[0]); ++i) {
sid = cJSON_GetObjectItem(root, sid_keys[i]);
if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') {
break;
}
sid = nullptr;
}
if (!sid) {
const char* containers[] = {"data", "payload", "context", "session"};
for (size_t i = 0; i < sizeof(containers) / sizeof(containers[0]); ++i) {
cJSON* obj = cJSON_GetObjectItem(root, containers[i]);
if (obj && cJSON_IsObject(obj)) {
for (size_t j = 0; j < sizeof(sid_keys) / sizeof(sid_keys[0]); ++j) {
sid = cJSON_GetObjectItem(obj, sid_keys[j]);
if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') {
break;
}
}
}
if (sid) break;
}
}
if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') {
protocol->session_id_ = sid->valuestring;
ESP_LOGI(TAG, "Session ID set: %s", protocol->session_id_.c_str());
if (protocol->is_audio_channel_opened_ && protocol->start_listening_pending_) {
ListeningMode m = protocol->pending_listening_mode_;
protocol->start_listening_pending_ = false;
protocol->SendStartListening(m);
}
}
if (protocol->on_incoming_json_) {
protocol->on_incoming_json_(root);
}
cJSON_Delete(root);
}
}
break;
default:
ESP_LOGI(TAG, "接收RTC消息%d", message->code);// 接收RTC消息%d
if (message && message->data.msg && message->data.msg[0] != '\0') {
std::string text(message->data.msg);
ESP_LOGI(TAG, "RTC消息内容: %s", text.c_str());
cJSON* root = cJSON_Parse(text.c_str());
if (root) {
const char* sid_keys[] = {"sessionId", "session_id", "sid"};
cJSON* sid = nullptr;
for (size_t i = 0; i < sizeof(sid_keys) / sizeof(sid_keys[0]); ++i) {
sid = cJSON_GetObjectItem(root, sid_keys[i]);
if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') {
break;
}
sid = nullptr;
}
if (!sid) {
const char* containers[] = {"data", "payload", "context", "session"};
for (size_t i = 0; i < sizeof(containers) / sizeof(containers[0]); ++i) {
cJSON* obj = cJSON_GetObjectItem(root, containers[i]);
if (obj && cJSON_IsObject(obj)) {
for (size_t j = 0; j < sizeof(sid_keys) / sizeof(sid_keys[0]); ++j) {
sid = cJSON_GetObjectItem(obj, sid_keys[j]);
if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') {
break;
}
}
}
if (sid) break;
}
}
if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') {
protocol->session_id_ = sid->valuestring;
ESP_LOGI(TAG, "Session ID set: %s", protocol->session_id_.c_str());
if (protocol->is_audio_channel_opened_ && protocol->start_listening_pending_) {
ListeningMode m = protocol->pending_listening_mode_;
protocol->start_listening_pending_ = false;
protocol->SendStartListening(m);
}
}
if (protocol->on_incoming_json_) {
protocol->on_incoming_json_(root);// 调用回调函数处理JSON消息
}
cJSON_Delete(root);// 删除JSON根对象释放内存
}
}
break;
}
}
// 处理RTC音频数据
void VolcRtcProtocol::DataCallback(void* context, const void* data, size_t len, volc_data_info_t* info) {
VolcRtcProtocol* protocol = static_cast<VolcRtcProtocol*>(context);
// ESP_LOGI(TAG, "RTC data: type=%d len=%u free_heap=%u", info->type, (unsigned)len, (unsigned)heap_caps_get_free_size(MALLOC_CAP_DEFAULT));
if (info->type == VOLC_DATA_TYPE_AUDIO) {
if (info) {
protocol->downlink_is_pcm_ = (info->info.audio.data_type == VOLC_AUDIO_DATA_TYPE_PCM);
if (protocol->downlink_is_pcm_) {
protocol->down_pcm_bytes_accum_ += len;
protocol->server_sample_rate_ = 8000;
protocol->server_frame_duration_ = 20;
} else {
protocol->down_opus_bytes_accum_ += len;
protocol->server_sample_rate_ = 16000;
protocol->server_frame_duration_ = 60;
}
if (!protocol->first_downlink_logged_) {
ESP_LOGI(TAG, "接收下行音频首包: 类型=%s 大小=%d", protocol->downlink_is_pcm_ ? "PCM" : "OPUS", (int)len);// 接收下行音频首包: 类型=%s 大小=%d
protocol->first_downlink_logged_ = true;// 标记已记录首包
}
}
protocol->ProcessAudioData(data, len);// 处理音频数据
} else if (info->type == VOLC_DATA_TYPE_MESSAGE) {
if (data && len > 0) {
const uint8_t* buf = static_cast<const uint8_t*>(data);
std::string json_text;
if (info->info.message.is_binary && len >= 8) {
bool is_ctrl = (memcmp(buf, "ctrl", 4) == 0);
bool is_conv = (memcmp(buf, "conv", 4) == 0);
bool is_tool = (memcmp(buf, "tool", 4) == 0);
if (is_ctrl || is_conv || is_tool) {
uint32_t json_len = (uint32_t)((buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | (buf[7]));
if (json_len > 0 && (size_t)(8 + json_len) <= len) {
json_text.assign(reinterpret_cast<const char*>(buf + 8), json_len);
if (!protocol->suppress_incoming_message_log_) {
ESP_LOGI(TAG, "接收下行二进制消息(%s): %.*s", is_ctrl ? "ctrl" : (is_conv ? "conv" : "tool"), (int)json_text.size(), json_text.c_str());
}
}
}
}
if (json_text.empty()) {
json_text.assign(reinterpret_cast<const char*>(data), len);
if (!protocol->suppress_incoming_message_log_) {
ESP_LOGI(TAG, "接收下行消息: %.*s", (int)json_text.size(), json_text.c_str());
}
}
cJSON* root = cJSON_Parse(json_text.c_str());
if (root) {
const char* sid_keys[] = {"sessionId", "session_id", "sid"};
cJSON* sid = nullptr;
for (size_t i = 0; i < sizeof(sid_keys) / sizeof(sid_keys[0]); ++i) {
sid = cJSON_GetObjectItem(root, sid_keys[i]);
if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') {
break;
}
sid = nullptr;
}
if (!sid) {
const char* containers[] = {"data", "payload", "context", "session"};
for (size_t i = 0; i < sizeof(containers) / sizeof(containers[0]); ++i) {
cJSON* obj = cJSON_GetObjectItem(root, containers[i]);
if (obj && cJSON_IsObject(obj)) {
for (size_t j = 0; j < sizeof(sid_keys) / sizeof(sid_keys[0]); ++j) {
sid = cJSON_GetObjectItem(obj, sid_keys[j]);
if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') {
break;
}
}
}
if (sid) break;
}
}
if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') {
protocol->session_id_ = sid->valuestring;
ESP_LOGI(TAG, "Session ID set: %s", protocol->session_id_.c_str());
if (protocol->is_audio_channel_opened_ && protocol->start_listening_pending_) {
ListeningMode m = protocol->pending_listening_mode_;
protocol->start_listening_pending_ = false;
protocol->SendStartListening(m);
}
}
if (protocol->on_incoming_json_) {
protocol->on_incoming_json_(root);
}
cJSON_Delete(root);
}
}
}
}
// 解析服务器发送的JSON消息
void VolcRtcProtocol::ParseServerMessage(const char* message) {
ESP_LOGI(TAG, "接收服务器消息:%s", message);// 接收服务器消息:%s
cJSON* root = cJSON_Parse(message);
if (!root) {
ESP_LOGE(TAG, "解析服务器消息失败");// 解析服务器消息失败
return;
}
if (on_incoming_json_) {
on_incoming_json_(root);
}
cJSON_Delete(root);
}
void VolcRtcProtocol::ProcessAudioData(const void* data, int size) {
if (!on_incoming_audio_) {
return;
}
ESP_LOGD(TAG, "接收音频数据,大小:%d 字节", size);// 接收音频数据,大小:%d 字节
// 直接使用原始数据指针,避免内存分配
// 如果on_incoming_audio_需要持久化数据它应该自己负责复制
on_incoming_audio_(std::vector<uint8_t>(static_cast<const uint8_t*>(data), static_cast<const uint8_t*>(data) + size));
}
void VolcRtcProtocol::SendText(const std::string& text) {
if (!rtc_handle_ || !is_connected_) {
ESP_LOGW(TAG, "不能发送文本消息RTC未准备好");// 不能发送文本消息RTC未准备好
return;
}
std::lock_guard<std::mutex> lock(rtc_mutex_);
volc_data_info_t data_info;
memset(&data_info, 0, sizeof(data_info));
data_info.type = VOLC_DATA_TYPE_MESSAGE; // 文本数据类型
int ret = volc_rtc_send(rtc_handle_, text.data(), text.size(), &data_info);
if (ret != 0) {
ESP_LOGE(TAG, "发送文本消息失败:%d", ret);// 发送文本消息失败:%d
} else {
ESP_LOGD(TAG, "发送文本消息: %s", text.c_str());// 发送文本消息:%s
}
}
// 🔊 发送开始监听消息到RTC
void VolcRtcProtocol::SendStartListening(ListeningMode mode) {
// 若尚未建立会话ID或远端未加入则排队待会话就绪后发送
if (session_id_.empty() || !is_connected_) {
start_listening_pending_ = true;
pending_listening_mode_ = mode;
ESP_LOGI(TAG, "延迟发送StartListening等待会话就绪");
return;
}
Protocol::SendStartListening(mode);// 调用基类方法发送开始监听消息
}
// 🔊 发送控制指令到RTC
void VolcRtcProtocol::SendCtrl(const std::string& json) {
if (!rtc_handle_ || !is_connected_) {
ESP_LOGW(TAG, "不能发送ctrl二进制消息RTC未准备好");// 不能发送ctrl二进制消息RTC未准备好
return;
}
std::lock_guard<std::mutex> lock(rtc_mutex_);// 🔊 发送控制指令到RTC时加锁保护RTC句柄
// 构建二进制消息:"ctrl" + 4字节大端长度 + JSON负载
const char magic[4] = {'c','t','r','l'};
const uint32_t len = (uint32_t)json.size();
std::vector<uint8_t> payload;
payload.reserve(4 + 4 + len);
payload.insert(payload.end(), magic, magic + 4);
payload.push_back((uint8_t)((len >> 24) & 0xFF));
payload.push_back((uint8_t)((len >> 16) & 0xFF));
payload.push_back((uint8_t)((len >> 8) & 0xFF));
payload.push_back((uint8_t)(len & 0xFF));
payload.insert(payload.end(), json.begin(), json.end());
volc_data_info_t data_info;
memset(&data_info, 0, sizeof(data_info));
data_info.type = VOLC_DATA_TYPE_MESSAGE;
data_info.info.message.is_binary = true;
int ret = volc_rtc_send(rtc_handle_, payload.data(), (int)payload.size(), &data_info);
if (ret != 0) {
ESP_LOGE(TAG, "发送ctrl二进制消息失败%d", ret);
} else {
ESP_LOGD(TAG, "发送ctrl二进制消息: %.*s", (int)json.size(), json.c_str());
}
}
// 🔊 发送函数调用指令到RTC
void VolcRtcProtocol::SendFunc(const std::string& json) {
if (!rtc_handle_ || !is_connected_) {
ESP_LOGW(TAG, "不能发送func二进制消息RTC未准备好");// 不能发送func二进制消息RTC未准备好
return;
}
std::lock_guard<std::mutex> lock(rtc_mutex_);// 🔊 发送函数调用指令到RTC时加锁保护RTC句柄
const char magic[4] = {'f','u','n','c'};
const uint32_t len = (uint32_t)json.size();
std::vector<uint8_t> payload;
payload.reserve(4 + 4 + len);
payload.insert(payload.end(), magic, magic + 4);
payload.push_back((uint8_t)((len >> 24) & 0xFF));
payload.push_back((uint8_t)((len >> 16) & 0xFF));
payload.push_back((uint8_t)((len >> 8) & 0xFF));
payload.push_back((uint8_t)(len & 0xFF));
payload.insert(payload.end(), json.begin(), json.end());
volc_data_info_t data_info;
memset(&data_info, 0, sizeof(data_info));
data_info.type = VOLC_DATA_TYPE_MESSAGE;
data_info.info.message.is_binary = true;
int ret = volc_rtc_send(rtc_handle_, payload.data(), (int)payload.size(), &data_info);
if (ret != 0) {
ESP_LOGE(TAG, "发送func二进制消息失败%d", ret);
} else {
ESP_LOGD(TAG, "发送func二进制消息: %.*s", (int)json.size(), json.c_str());
}
}
// 🔊 发送函数调用结果到RTC
void VolcRtcProtocol::SendFunctionResult(const std::string& tool_call_id, const std::string& content) {
cJSON* obj = cJSON_CreateObject();
if (!obj) {
ESP_LOGE(TAG, "创建函数结果JSON失败回退为文本");// 创建函数结果JSON失败回退为文本
Protocol::SendFunctionResult(tool_call_id, content);
return;
}
cJSON_AddStringToObject(obj, "ToolCallID", tool_call_id.c_str());// 添加函数调用ID到JSON
cJSON_AddStringToObject(obj, "Content", content.c_str());// 添加函数调用结果到JSON
char* printed = cJSON_PrintUnformatted(obj);
std::string json = printed ? printed : std::string();
if (printed) cJSON_free(printed);
cJSON_Delete(obj);
if (json.empty()) {
ESP_LOGW(TAG, "函数结果JSON为空回退为文本");
Protocol::SendFunctionResult(tool_call_id, content);
return;
}
SendFunc(json);
}
// 🔊 发送文本消息到RTC (传入大模型上下文信息)
void VolcRtcProtocol::SendTextMessage(const std::string& text) {
// 按官方方案封装ExternalTextToLLM确保进入LLM并触发TTS
cJSON* root = cJSON_CreateObject();
if (!root) {
ESP_LOGE(TAG, "创建JSON失败回退为文本消息");
Protocol::SendTextMessage(text);
return;
}
cJSON_AddStringToObject(root, "Command", "ExternalTextToLLM");
cJSON_AddStringToObject(root, "Message", text.c_str());
cJSON_AddNumberToObject(root, "InterruptMode", 1);
char* printed = cJSON_PrintUnformatted(root);
std::string json = printed ? printed : std::string();
if (printed) cJSON_free(printed);
cJSON_Delete(root);
if (json.empty()) {
ESP_LOGW(TAG, "生成的JSON为空回退为文本消息");
Protocol::SendTextMessage(text);
return;
}
SendCtrl(json);
}
// 🔊 发送中止通话请求
void VolcRtcProtocol::SendAbortSpeaking(AbortReason reason) {
if (!rtc_handle_ || !is_connected_ || !is_audio_channel_opened_) {
ESP_LOGW(TAG, "不能发送中止通话请求RTC未准备好");// 不能发送打断请求RTC未准备好
return;
}
std::lock_guard<std::mutex> lock(rtc_mutex_);// 🔊 发送中止通话请求时加锁保护RTC句柄
ESP_LOGI(TAG, "通过Volc RTC中断发送中止通话请求");// 发送打断请求通过火山RTC中断
// 调用火山RTC的打断API
int ret = volc_rtc_interrupt(rtc_handle_);
if (ret != 0) {
ESP_LOGE(TAG, "通过Volc RTC中断发送打断请求失败%d", ret);// 发送打断请求通过火山RTC中断失败%d
} else {
ESP_LOGI(TAG, "通过Volc RTC中断发送打断请求成功");// 发送打断请求通过火山RTC中断成功
}
}