843 lines
39 KiB
C++
843 lines
39 KiB
C++
#include "volc_rtc_protocol.h"
|
||
#include <wifi_station.h>
|
||
#include "esp_log.h"
|
||
#include "sdkconfig.h"
|
||
#include "freertos/FreeRTOS.h"
|
||
#include "freertos/task.h"
|
||
#include "esp_heap_caps.h"
|
||
#include "esp_system.h"
|
||
#include <cJSON.h>
|
||
#include <cstring>
|
||
#include <inttypes.h>
|
||
#include <sys/stat.h>
|
||
#include <errno.h>
|
||
#include <unistd.h>
|
||
// 新增包含 system_info.h 头文件以使用 SystemInfo 类
|
||
#include "system_info.h"
|
||
#include "application.h"
|
||
// SNTP is initialized in WiFi board after network is up; no duplicate init here
|
||
#include "base/volc_device_manager.h"
|
||
#include "settings.h"
|
||
|
||
static const char* TAG = "VolcRtcProtocol";
|
||
|
||
VolcRtcProtocol::VolcRtcProtocol() {
|
||
event_group_handle_ = xEventGroupCreate();
|
||
}
|
||
|
||
VolcRtcProtocol::~VolcRtcProtocol() {
|
||
if (event_group_handle_) {
|
||
vEventGroupDelete(event_group_handle_);
|
||
}
|
||
if (rtc_handle_) {
|
||
volc_rtc_stop(rtc_handle_);
|
||
volc_rtc_destroy(rtc_handle_);
|
||
}
|
||
// 释放动态分配的设备名称内存
|
||
if (iot_info_.device_name && iot_info_.device_name != (char*)CONFIG_VOLC_DEVICE_NAME) {
|
||
free(iot_info_.device_name);
|
||
iot_info_.device_name = nullptr;
|
||
}
|
||
}
|
||
|
||
void VolcRtcProtocol::Start() {
|
||
ESP_LOGI(TAG, "VolcRtcProtocol 开始启动...");// VolcRtcProtocol 开始启动...
|
||
esp_log_level_set(TAG, ESP_LOG_DEBUG);
|
||
|
||
// 注释掉所有文件系统相关操作,避免设备重启
|
||
// 这些操作需要文件系统支持,但当前设备可能没有正确挂载文件系统
|
||
// ESP_LOGI(TAG, "跳过文件系统操作以防止设备重启");// 跳过文件系统操作以防止设备重启
|
||
// TODO: Implement proper file system initialization if file logging is needed
|
||
|
||
// 禁用获取当前工作目录的操作,避免文件系统访问
|
||
// TODO: Re-enable if filesystem is properly initialized
|
||
// ESP_LOGI(TAG, "当前工作目录检查已禁用,以防止文件系统访问");// 当前工作目录检查已禁用,以防止文件系统访问
|
||
|
||
// 如果已有RTC实例,先停止并销毁
|
||
if (rtc_handle_) {
|
||
volc_rtc_stop(rtc_handle_);
|
||
volc_rtc_destroy(rtc_handle_);
|
||
rtc_handle_ = nullptr;
|
||
}
|
||
|
||
// 创建火山RTC配置
|
||
cJSON* config = cJSON_CreateObject();
|
||
if (!config) {
|
||
ESP_LOGE(TAG, "RTC配置创建失败");// RTC配置创建失败
|
||
SetError("Failed to create RTC config");
|
||
return;
|
||
}
|
||
|
||
// 添加必要的RTC配置项
|
||
cJSON* audio_config = cJSON_CreateObject();
|
||
if (audio_config) {
|
||
cJSON_AddBoolToObject(audio_config, "publish", true);
|
||
cJSON_AddBoolToObject(audio_config, "subscribe", true);
|
||
cJSON_AddNumberToObject(audio_config, "codec", 4); // 设置音频编解码器为4(根据设计文档)
|
||
cJSON_AddItemToObject(config, "audio", audio_config);// 添加音频配置到RTC配置
|
||
}
|
||
|
||
cJSON* video_config = cJSON_CreateObject();
|
||
if (video_config) {
|
||
cJSON_AddBoolToObject(video_config, "publish", false);
|
||
cJSON_AddBoolToObject(video_config, "subscribe", false);
|
||
cJSON_AddNumberToObject(video_config, "codec", 1); // 设置视频编解码器为1(根据设计文档)
|
||
cJSON_AddItemToObject(config, "video", video_config);
|
||
}
|
||
|
||
cJSON_AddNumberToObject(config, "log_level", 1); // 设置日志级别
|
||
|
||
// 添加参数数组,与 Airhub_Rtc_h 项目保持一致
|
||
cJSON* params = cJSON_CreateArray();
|
||
if (params) {
|
||
// 只输出日志到控制台,不输出到文件
|
||
cJSON_AddItemToArray(params, cJSON_CreateString("{\"debug\":{\"log_to_console\":1}}"));// 添加日志到控制台配置
|
||
cJSON_AddItemToArray(params, cJSON_CreateString("{\"audio\":{\"codec\":{\"internal\":{\"enable\":1}}}}"));// 添加音频编解码器内部配置,启用 SDK 内部编解码
|
||
cJSON_AddItemToArray(params, cJSON_CreateString("{\"rtc\":{\"access\":{\"concurrent_requests\":1}}}"));// 添加RTC并发请求配置
|
||
cJSON_AddItemToArray(params, cJSON_CreateString("{\"rtc\":{\"ice\":{\"concurrent_agents\":1}}}"));// 添加RTC并发ICE代理配置
|
||
cJSON_AddItemToObject(config, "params", params);
|
||
}
|
||
|
||
// 创建IoT信息并优先从NVS加载
|
||
memset(&iot_info_, 0, sizeof(iot_info_));
|
||
iot_info_.instance_id = (char*)CONFIG_VOLC_INSTANCE_ID;
|
||
iot_info_.product_key = (char*)CONFIG_VOLC_PRODUCT_KEY;
|
||
iot_info_.product_secret = (char*)CONFIG_VOLC_PRODUCT_SECRET;
|
||
iot_info_.bot_id = (char*)CONFIG_VOLC_BOT_ID;
|
||
|
||
// 优先使用配置文件中的设备名称,如果为空则使用MAC地址
|
||
if (CONFIG_VOLC_DEVICE_NAME && strlen(CONFIG_VOLC_DEVICE_NAME) > 0) {
|
||
// 使用配置文件中的设备名称
|
||
iot_info_.device_name = (char*)CONFIG_VOLC_DEVICE_NAME;
|
||
ESP_LOGI(TAG, "使用配置文件中的设备名称: %s", iot_info_.device_name);
|
||
} else {
|
||
// 配置文件中的设备名称为空,使用MAC地址作为设备名称
|
||
std::string mac_address = SystemInfo::GetMacAddress();
|
||
// MAC地址中替换冒号为下划线,避免文件名中包含冒号
|
||
std::replace(mac_address.begin(), mac_address.end(), ':', '_');
|
||
char* mac_buffer = (char*)malloc(mac_address.length() + 1);
|
||
strcpy(mac_buffer, mac_address.c_str());
|
||
iot_info_.device_name = mac_buffer;
|
||
ESP_LOGI(TAG, "使用Wi-Fi MAC地址作为设备名称(已替换冒号为下划线): %s", iot_info_.device_name);
|
||
}
|
||
|
||
Settings s("volc");
|
||
auto saved_name = s.GetString("device_name", "");
|
||
bool name_mismatch = (!saved_name.empty() && strcmp(saved_name.c_str(), iot_info_.device_name) != 0);
|
||
std::string saved_secret;
|
||
std::string saved_appid;
|
||
if (name_mismatch) {
|
||
ESP_LOGW(TAG, "检测到设备名称变更:%s -> %s,清除旧凭证", saved_name.c_str(), iot_info_.device_name);
|
||
Settings sw("volc", true);
|
||
sw.EraseKey("device_secret");
|
||
sw.EraseKey("rtc_app_id");
|
||
sw.SetString("device_name", iot_info_.device_name);
|
||
} else {
|
||
saved_secret = s.GetString("device_secret", "");
|
||
saved_appid = s.GetString("rtc_app_id", "");
|
||
if (saved_name.empty()) {
|
||
Settings sw("volc", true);
|
||
sw.SetString("device_name", iot_info_.device_name);
|
||
}
|
||
}
|
||
if (!saved_secret.empty()) {
|
||
iot_info_.device_secret = strdup(saved_secret.c_str());
|
||
}
|
||
if (!saved_appid.empty()) {
|
||
iot_info_.rtc_app_id = strdup(saved_appid.c_str());
|
||
}
|
||
ESP_LOGI(TAG, "NVS凭证已加载:secret=%d appid=%d device_name=%s, free_heap=%u",
|
||
!saved_secret.empty(), !saved_appid.empty(), iot_info_.device_name,
|
||
(unsigned)heap_caps_get_free_size(MALLOC_CAP_DEFAULT));
|
||
|
||
// 创建一个结构体来传递参数给任务
|
||
struct InitParams {
|
||
VolcRtcProtocol* protocol;
|
||
cJSON* config;
|
||
};
|
||
|
||
InitParams* init_params = new InitParams();
|
||
init_params->protocol = this;
|
||
init_params->config = config;
|
||
|
||
// 将设备注册和RTC创建操作移到单独的任务中执行,避免main任务栈溢出
|
||
xTaskCreate([](void* arg) {
|
||
InitParams* init_params = static_cast<InitParams*>(arg);
|
||
VolcRtcProtocol* protocol = init_params->protocol;
|
||
cJSON* config = init_params->config;
|
||
|
||
// 如果没有设备密钥或RTC应用ID,进行设备注册
|
||
if (!protocol->iot_info_.device_secret || !protocol->iot_info_.rtc_app_id) {
|
||
char* device_secret_ptr = nullptr;
|
||
if (volc_device_register(&protocol->iot_info_, &device_secret_ptr) != 0 || device_secret_ptr == nullptr) {
|
||
ESP_LOGE(TAG, "设备注册失败");// 设备注册失败
|
||
protocol->SetError("Failed to register device");
|
||
cJSON_Delete(config);
|
||
delete init_params;
|
||
vTaskDelete(NULL);
|
||
return;
|
||
}
|
||
protocol->iot_info_.device_secret = device_secret_ptr;
|
||
Settings sw("volc", true);
|
||
sw.SetString("device_secret", protocol->iot_info_.device_secret);
|
||
if (protocol->iot_info_.rtc_app_id) {
|
||
sw.SetString("rtc_app_id", protocol->iot_info_.rtc_app_id);
|
||
}
|
||
sw.SetString("device_name", protocol->iot_info_.device_name);
|
||
}
|
||
|
||
// 创建RTC实例
|
||
protocol->rtc_handle_ = volc_rtc_create(
|
||
protocol->iot_info_.rtc_app_id ? protocol->iot_info_.rtc_app_id : CONFIG_VOLC_INSTANCE_ID,
|
||
protocol,
|
||
config,
|
||
&MessageCallback,
|
||
&DataCallback
|
||
);
|
||
cJSON_Delete(config);
|
||
delete init_params;
|
||
|
||
if (!protocol->rtc_handle_) {
|
||
ESP_LOGE(TAG, "RTC实例创建失败");// RTC实例创建失败
|
||
protocol->SetError("Failed to create RTC instance");
|
||
} else {
|
||
protocol->iot_ready_ = true;
|
||
ESP_LOGI(TAG, "RTC实例已准备就绪;房间加入将在监听状态后执行");// RTC实例已准备就绪;房间加入将在监听状态后执行
|
||
Application::GetInstance().InitializeWebsocketProtocol();// RTC初始化成功后,初始化Websocket协议
|
||
}
|
||
|
||
vTaskDelete(NULL);
|
||
}, "volc_rtc_init", 16384, init_params, 5, NULL);
|
||
|
||
// 注意:此处不再立即创建RTC实例,而是将其推迟到任务中执行
|
||
ESP_LOGI(TAG, "VolcRtcProtocol初始化任务已创建");// VolcRtcProtocol初始化任务已创建
|
||
}
|
||
|
||
// 新增:设置AgentConfig配置参数,包含body中的config参数和agent_config参数
|
||
void VolcRtcProtocol::SetAgentConfig(const std::string& params) {
|
||
extra_params_ = params;
|
||
ESP_LOGI(TAG, "设置Agent配置参数: %s", extra_params_.c_str());
|
||
}
|
||
|
||
// 🔊 发送音频数据到RTC
|
||
void VolcRtcProtocol::SendAudio(const std::vector<uint8_t>& data) {
|
||
if (!rtc_handle_ || !is_connected_ || !is_audio_channel_opened_) {
|
||
ESP_LOGW(TAG, "无法发送音频:RTC未准备就绪");// 无法发送音频:RTC未准备就绪
|
||
return;
|
||
}
|
||
|
||
std::lock_guard<std::mutex> lock(rtc_mutex_);
|
||
|
||
volc_data_info_t data_info;
|
||
memset(&data_info, 0, sizeof(data_info));
|
||
data_info.type = VOLC_DATA_TYPE_AUDIO; // 音频数据类型
|
||
data_info.info.audio.data_type = VOLC_AUDIO_DATA_TYPE_OPUS; // 格式:OPUS
|
||
|
||
// 音频参数应该在RTC初始化时已经设置好,这里只需要发送数据
|
||
int ret = volc_rtc_send(rtc_handle_, data.data(), data.size(), &data_info);
|
||
if (ret != 0) {
|
||
ESP_LOGE(TAG, "发送音频失败:%d", ret);// 发送音频失败
|
||
} else {
|
||
opus_bytes_accum_ += data.size();
|
||
opus_frames_accum_ += 1;
|
||
LogUplinkStatsMaybe();
|
||
}
|
||
}
|
||
|
||
// 🔊 发送PCM音频数据到RTC
|
||
void VolcRtcProtocol::SendPcm(const std::vector<uint8_t>& data) {
|
||
if (!rtc_handle_ || !is_connected_ || !is_audio_channel_opened_) {
|
||
ESP_LOGW(TAG, "无法发送音频:RTC未准备就绪");
|
||
return;
|
||
}
|
||
std::lock_guard<std::mutex> lock(rtc_mutex_);
|
||
pcm_pending_.insert(pcm_pending_.end(), data.begin(), data.end());
|
||
// 以 20ms 固定帧打包 PCM(8k/16bit/mono),即 320 字节;静音段也持续发送以满足 AEC/RTC 的恒定节拍
|
||
const size_t frame_bytes = (size_t)(8000 * 20 / 1000) * sizeof(int16_t);
|
||
size_t offset = 0;
|
||
while (offset + frame_bytes <= pcm_pending_.size()) {
|
||
volc_data_info_t data_info;
|
||
memset(&data_info, 0, sizeof(data_info));
|
||
data_info.type = VOLC_DATA_TYPE_AUDIO;
|
||
data_info.info.audio.data_type = VOLC_AUDIO_DATA_TYPE_PCM;
|
||
data_info.info.audio.commit = false;
|
||
int ret = volc_rtc_send(rtc_handle_, pcm_pending_.data() + offset, frame_bytes, &data_info);
|
||
if (ret != 0) {
|
||
ESP_LOGE(TAG, "发送音频失败:%d", ret);
|
||
break;
|
||
} else {
|
||
pcm_bytes_accum_ += frame_bytes;
|
||
pcm_frames_accum_ += 1;
|
||
}
|
||
offset += frame_bytes;
|
||
}
|
||
if (offset > 0) {
|
||
pcm_pending_.erase(pcm_pending_.begin(), pcm_pending_.begin() + offset);
|
||
}
|
||
LogUplinkStatsMaybe();
|
||
}
|
||
|
||
// 🔊 发送G711A音频数据到RTC
|
||
void VolcRtcProtocol::SendG711A(const std::vector<uint8_t>& data) {
|
||
if (!rtc_handle_ || !is_connected_ || !is_audio_channel_opened_) {
|
||
ESP_LOGW(TAG, "无法发送音频:RTC未准备就绪");
|
||
return;
|
||
}
|
||
std::lock_guard<std::mutex> lock(rtc_mutex_);
|
||
g711a_pending_.insert(g711a_pending_.end(), data.begin(), data.end());
|
||
const size_t frame_bytes = 160;
|
||
size_t offset = 0;
|
||
while (offset + frame_bytes <= g711a_pending_.size()) {
|
||
volc_data_info_t data_info;
|
||
memset(&data_info, 0, sizeof(data_info));
|
||
data_info.type = VOLC_DATA_TYPE_AUDIO;
|
||
data_info.info.audio.data_type = VOLC_AUDIO_DATA_TYPE_G711A;
|
||
data_info.info.audio.commit = true;
|
||
int ret = volc_rtc_send(rtc_handle_, g711a_pending_.data() + offset, frame_bytes, &data_info);
|
||
if (ret != 0) {
|
||
ESP_LOGE(TAG, "发送音频失败:%d", ret);
|
||
break;
|
||
} else {
|
||
ESP_LOGI(TAG, "发送上行G711A帧: 大小=%zu", (size_t)frame_bytes);
|
||
g711a_bytes_accum_ += frame_bytes;
|
||
g711a_frames_accum_ += 1;
|
||
}
|
||
offset += frame_bytes;
|
||
}
|
||
if (offset > 0) {
|
||
g711a_pending_.erase(g711a_pending_.begin(), g711a_pending_.begin() + offset);
|
||
}
|
||
LogUplinkStatsMaybe();
|
||
}
|
||
|
||
// 🔊 日志上行音频统计
|
||
void VolcRtcProtocol::LogUplinkStatsMaybe() {
|
||
uint64_t now_us = esp_timer_get_time();
|
||
if (uplink_last_log_us_ == 0) uplink_last_log_us_ = now_us;
|
||
uint64_t diff_us = now_us - uplink_last_log_us_;
|
||
if (diff_us >= 2000000) {
|
||
uint64_t bps = ((uint64_t)(opus_bytes_accum_ + pcm_bytes_accum_ + g711a_bytes_accum_) * 8 * 1000000ULL) / (diff_us ? diff_us : 1);
|
||
ESP_LOGI(TAG, "上行音频统计: PCM帧=%d 字节=%zu, G711A帧=%d 字节=%zu, 速率=%llu bps",
|
||
pcm_frames_accum_, (size_t)pcm_bytes_accum_, g711a_frames_accum_, (size_t)g711a_bytes_accum_, (unsigned long long)bps);
|
||
ESP_LOGI(TAG, "下行音频统计: PCM字节=%zu, OPUS字节=%zu",
|
||
(size_t)down_pcm_bytes_accum_, (size_t)down_opus_bytes_accum_);
|
||
opus_bytes_accum_ = 0;
|
||
pcm_bytes_accum_ = 0;
|
||
g711a_bytes_accum_ = 0;
|
||
down_pcm_bytes_accum_ = 0;
|
||
down_opus_bytes_accum_ = 0;
|
||
opus_frames_accum_ = 0;
|
||
pcm_frames_accum_ = 0;
|
||
g711a_frames_accum_ = 0;
|
||
uplink_last_log_us_ = now_us;
|
||
}
|
||
}
|
||
// 🔊 打开音频通道
|
||
bool VolcRtcProtocol::OpenAudioChannel() {
|
||
if (!rtc_handle_) {
|
||
ESP_LOGW(TAG, "无法打开音频通道:RTC句柄未准备就绪");// 无法打开音频通道:RTC句柄未准备就绪
|
||
return false;
|
||
}
|
||
if (!is_connected_) {
|
||
if (!iot_ready_) {
|
||
ESP_LOGE(TAG, "IoT信息未准备就绪,无法加入房间");// IoT信息未准备就绪,无法加入房间
|
||
ESP_LOGW(TAG, "Diag: app_id=%s device_name=%s bot_id=%s secret=%s", iot_info_.rtc_app_id ? iot_info_.rtc_app_id : "(null)", iot_info_.device_name ? iot_info_.device_name : "(null)", CONFIG_VOLC_BOT_ID, iot_info_.device_secret ? "yes" : "no");
|
||
return false;
|
||
}
|
||
xEventGroupClearBits(event_group_handle_, 0x1 | 0x2);
|
||
// 新增:extra_params 用于传递额外的AgentConfig配置参数
|
||
ESP_LOGI(TAG, "Join RTC: handle=%p bot=%s iot_ready=%d free_heap=%u", rtc_handle_, CONFIG_VOLC_BOT_ID, (int)iot_ready_, (unsigned)heap_caps_get_free_size(MALLOC_CAP_DEFAULT));
|
||
int ret = volc_rtc_start(rtc_handle_, CONFIG_VOLC_BOT_ID, &iot_info_, extra_params_.empty() ? NULL : extra_params_.c_str());
|
||
if (ret != 0) {
|
||
ESP_LOGE(TAG, "RTC启动失败:%d", ret);// RTC启动失败:%d
|
||
ESP_LOGW(TAG, "Diag: start failed. Possible causes: invalid IoT creds, TLS/HTTP error, network unreachable, time not synced");// 诊断:启动失败可能原因:无效的IoT凭证、TLS/HTTP错误、网络不可达、时间未同步
|
||
return false;
|
||
}
|
||
EventBits_t bits = xEventGroupWaitBits(event_group_handle_, 0x1, pdFALSE, pdFALSE, pdMS_TO_TICKS(5000));
|
||
ESP_LOGI(TAG, "Wait connect bits=0x%x free_heap=%u", (unsigned)bits, (unsigned)heap_caps_get_free_size(MALLOC_CAP_DEFAULT));
|
||
if ((bits & 0x1) == 0) {
|
||
ESP_LOGE(TAG, "RTC连接超时");// RTC连接超时
|
||
ESP_LOGW(TAG, "Diag: check Wi-Fi, SNTP time sync, IoT creds, RTC server availability");// 诊断:检查Wi-Fi、SNTP时间同步、IoT凭证、RTC服务器可用性
|
||
return false;
|
||
}
|
||
// Do not block audio readiness on remote user join; enable subscribe immediately
|
||
bits = xEventGroupWaitBits(event_group_handle_, 0x2, pdFALSE, pdFALSE, pdMS_TO_TICKS(3000));
|
||
if ((bits & 0x2) == 0) {
|
||
ESP_LOGW(TAG, "RTC远程用户未加入 yet - 主动开启音频通道");// RTC远程用户未加入 yet - 主动开启音频通道
|
||
// 远程用户未加入时,需要手动设置状态
|
||
server_sample_rate_ = 16000;
|
||
server_frame_duration_ = 60;
|
||
is_audio_channel_opened_ = true;
|
||
first_downlink_logged_ = false;
|
||
ESP_LOGI(TAG, "音频通道已打开");// 音频通道已打开
|
||
if (on_audio_channel_opened_) {
|
||
on_audio_channel_opened_();
|
||
}
|
||
} else {
|
||
// 远程用户已加入时,不要重复打印日志,因为MessageCallback中已经处理
|
||
// 但需要确保状态正确设置
|
||
if (!is_audio_channel_opened_) {
|
||
server_sample_rate_ = 16000;
|
||
server_frame_duration_ = 60;
|
||
is_audio_channel_opened_ = true;
|
||
first_downlink_logged_ = false;
|
||
ESP_LOGI(TAG, "音频通道已打开");// 音频通道已打开
|
||
if (on_audio_channel_opened_) {
|
||
on_audio_channel_opened_();
|
||
}
|
||
}
|
||
}
|
||
}
|
||
return true;
|
||
}
|
||
// 🔊 关闭音频通道
|
||
void VolcRtcProtocol::CloseAudioChannel() {
|
||
if (!rtc_handle_) {
|
||
return;
|
||
}
|
||
if (is_connected_) {
|
||
volc_rtc_stop(rtc_handle_);// 关闭RTC音频通道
|
||
is_connected_ = false;// 标记音频通道已关闭
|
||
}
|
||
ESP_LOGI(TAG, "音频通道已关闭");// 音频通道已关闭
|
||
is_audio_channel_opened_ = false;// 标记音频通道已关闭
|
||
if (on_audio_channel_closed_) {
|
||
on_audio_channel_closed_();// 调用音频通道关闭回调
|
||
}
|
||
}
|
||
|
||
// 🔊 检查音频通道是否已打开
|
||
bool VolcRtcProtocol::IsAudioChannelOpened() const {
|
||
return is_audio_channel_opened_;
|
||
}
|
||
|
||
void VolcRtcProtocol::MessageCallback(void* context, volc_msg_t* message) {
|
||
VolcRtcProtocol* protocol = static_cast<VolcRtcProtocol*>(context);
|
||
// 目前只处理简单的连接状态消息
|
||
switch (message->code) {
|
||
case VOLC_MSG_CONNECTED:
|
||
protocol->is_connected_ = true;
|
||
xEventGroupSetBits(protocol->event_group_handle_, 0x1);
|
||
protocol->server_sample_rate_ = 16000;
|
||
protocol->server_frame_duration_ = 60;
|
||
ESP_LOGI(TAG, "RTC连接成功");// RTC连接成功
|
||
//Application::GetInstance().InitializeWebsocketProtocol();// RTC连接成功后初始化Websocket协议
|
||
break;
|
||
case VOLC_MSG_DISCONNECTED:
|
||
protocol->is_connected_ = false;
|
||
protocol->is_audio_channel_opened_ = false;
|
||
xEventGroupClearBits(protocol->event_group_handle_, 0x1 | 0x2);
|
||
ESP_LOGI(TAG, "RTC断开连接");// RTC断开连接
|
||
break;
|
||
case VOLC_MSG_USER_JOINED:
|
||
// 只有在音频通道尚未打开的情况下才设置状态和调用回调
|
||
if (!protocol->is_audio_channel_opened_) {
|
||
protocol->is_audio_channel_opened_ = true;
|
||
xEventGroupSetBits(protocol->event_group_handle_, 0x2);
|
||
ESP_LOGI(TAG, "RTC远程用户加入");// RTC远程用户加入
|
||
// Set default decoder parameters before audio starts
|
||
protocol->server_sample_rate_ = 16000;
|
||
protocol->server_frame_duration_ = 60;
|
||
// 调用音频通道打开回调
|
||
if (protocol->on_audio_channel_opened_) {
|
||
protocol->on_audio_channel_opened_();
|
||
}
|
||
} else {
|
||
// 音频通道已经打开,只更新事件标志
|
||
xEventGroupSetBits(protocol->event_group_handle_, 0x2);
|
||
ESP_LOGD(TAG, "RTC远程用户加入,音频通道已打开");// 调试信息,不重复打印
|
||
}
|
||
break;
|
||
case VOLC_MSG_KEY_FRAME_REQ:
|
||
// 关键帧请求消息,不需要处理msg字段
|
||
ESP_LOGI(TAG, "接收RTC关键帧请求");// 接收RTC关键帧请求
|
||
break;
|
||
case VOLC_MSG_TARGET_BITRATE_CHANGED:
|
||
// 目标码率变化消息,使用target_bitrate字段
|
||
// ESP_LOGI(TAG, "RTC target bitrate changed: %lu bps", message->data.target_bitrate);
|
||
break;
|
||
case VOLC_MSG_CONV_STATUS:
|
||
// 会话状态消息,使用conv_status字段
|
||
ESP_LOGI(TAG, "RTC会话状态:%lu", message->data.conv_status);
|
||
if (message && message->data.msg && message->data.msg[0] != '\0') {
|
||
std::string text(message->data.msg);
|
||
ESP_LOGI(TAG, "RTC会话状态消息内容: %s", text.c_str());
|
||
cJSON* root = cJSON_Parse(text.c_str());
|
||
if (root) {
|
||
const char* sid_keys[] = {"sessionId", "session_id", "sid"};
|
||
cJSON* sid = nullptr;
|
||
for (size_t i = 0; i < sizeof(sid_keys) / sizeof(sid_keys[0]); ++i) {
|
||
sid = cJSON_GetObjectItem(root, sid_keys[i]);
|
||
if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') {
|
||
break;
|
||
}
|
||
sid = nullptr;
|
||
}
|
||
if (!sid) {
|
||
const char* containers[] = {"data", "payload", "context", "session"};
|
||
for (size_t i = 0; i < sizeof(containers) / sizeof(containers[0]); ++i) {
|
||
cJSON* obj = cJSON_GetObjectItem(root, containers[i]);
|
||
if (obj && cJSON_IsObject(obj)) {
|
||
for (size_t j = 0; j < sizeof(sid_keys) / sizeof(sid_keys[0]); ++j) {
|
||
sid = cJSON_GetObjectItem(obj, sid_keys[j]);
|
||
if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') {
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
if (sid) break;
|
||
}
|
||
}
|
||
if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') {
|
||
protocol->session_id_ = sid->valuestring;
|
||
ESP_LOGI(TAG, "Session ID set: %s", protocol->session_id_.c_str());
|
||
if (protocol->is_audio_channel_opened_ && protocol->start_listening_pending_) {
|
||
ListeningMode m = protocol->pending_listening_mode_;
|
||
protocol->start_listening_pending_ = false;
|
||
protocol->SendStartListening(m);
|
||
}
|
||
}
|
||
if (protocol->on_incoming_json_) {
|
||
protocol->on_incoming_json_(root);
|
||
}
|
||
cJSON_Delete(root);
|
||
}
|
||
}
|
||
break;
|
||
default:
|
||
ESP_LOGI(TAG, "接收RTC消息:%d", message->code);// 接收RTC消息:%d
|
||
if (message && message->data.msg && message->data.msg[0] != '\0') {
|
||
std::string text(message->data.msg);
|
||
ESP_LOGI(TAG, "RTC消息内容: %s", text.c_str());
|
||
cJSON* root = cJSON_Parse(text.c_str());
|
||
if (root) {
|
||
const char* sid_keys[] = {"sessionId", "session_id", "sid"};
|
||
cJSON* sid = nullptr;
|
||
for (size_t i = 0; i < sizeof(sid_keys) / sizeof(sid_keys[0]); ++i) {
|
||
sid = cJSON_GetObjectItem(root, sid_keys[i]);
|
||
if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') {
|
||
break;
|
||
}
|
||
sid = nullptr;
|
||
}
|
||
if (!sid) {
|
||
const char* containers[] = {"data", "payload", "context", "session"};
|
||
for (size_t i = 0; i < sizeof(containers) / sizeof(containers[0]); ++i) {
|
||
cJSON* obj = cJSON_GetObjectItem(root, containers[i]);
|
||
if (obj && cJSON_IsObject(obj)) {
|
||
for (size_t j = 0; j < sizeof(sid_keys) / sizeof(sid_keys[0]); ++j) {
|
||
sid = cJSON_GetObjectItem(obj, sid_keys[j]);
|
||
if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') {
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
if (sid) break;
|
||
}
|
||
}
|
||
if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') {
|
||
protocol->session_id_ = sid->valuestring;
|
||
ESP_LOGI(TAG, "Session ID set: %s", protocol->session_id_.c_str());
|
||
if (protocol->is_audio_channel_opened_ && protocol->start_listening_pending_) {
|
||
ListeningMode m = protocol->pending_listening_mode_;
|
||
protocol->start_listening_pending_ = false;
|
||
protocol->SendStartListening(m);
|
||
}
|
||
}
|
||
if (protocol->on_incoming_json_) {
|
||
protocol->on_incoming_json_(root);// 调用回调函数处理JSON消息
|
||
}
|
||
cJSON_Delete(root);// 删除JSON根对象,释放内存
|
||
}
|
||
}
|
||
break;
|
||
}
|
||
}
|
||
// 处理RTC音频数据
|
||
void VolcRtcProtocol::DataCallback(void* context, const void* data, size_t len, volc_data_info_t* info) {
|
||
VolcRtcProtocol* protocol = static_cast<VolcRtcProtocol*>(context);
|
||
// ESP_LOGI(TAG, "RTC data: type=%d len=%u free_heap=%u", info->type, (unsigned)len, (unsigned)heap_caps_get_free_size(MALLOC_CAP_DEFAULT));
|
||
if (info->type == VOLC_DATA_TYPE_AUDIO) {
|
||
if (info) {
|
||
protocol->downlink_is_pcm_ = (info->info.audio.data_type == VOLC_AUDIO_DATA_TYPE_PCM);
|
||
if (protocol->downlink_is_pcm_) {
|
||
protocol->down_pcm_bytes_accum_ += len;
|
||
protocol->server_sample_rate_ = 8000;
|
||
protocol->server_frame_duration_ = 20;
|
||
} else {
|
||
protocol->down_opus_bytes_accum_ += len;
|
||
protocol->server_sample_rate_ = 16000;
|
||
protocol->server_frame_duration_ = 60;
|
||
}
|
||
if (!protocol->first_downlink_logged_) {
|
||
ESP_LOGI(TAG, "接收下行音频首包: 类型=%s 大小=%d", protocol->downlink_is_pcm_ ? "PCM" : "OPUS", (int)len);// 接收下行音频首包: 类型=%s 大小=%d
|
||
protocol->first_downlink_logged_ = true;// 标记已记录首包
|
||
}
|
||
}
|
||
protocol->ProcessAudioData(data, len);// 处理音频数据
|
||
} else if (info->type == VOLC_DATA_TYPE_MESSAGE) {
|
||
if (data && len > 0) {
|
||
const uint8_t* buf = static_cast<const uint8_t*>(data);
|
||
std::string json_text;
|
||
if (info->info.message.is_binary && len >= 8) {
|
||
bool is_ctrl = (memcmp(buf, "ctrl", 4) == 0);
|
||
bool is_conv = (memcmp(buf, "conv", 4) == 0);
|
||
bool is_tool = (memcmp(buf, "tool", 4) == 0);
|
||
if (is_ctrl || is_conv || is_tool) {
|
||
uint32_t json_len = (uint32_t)((buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | (buf[7]));
|
||
if (json_len > 0 && (size_t)(8 + json_len) <= len) {
|
||
json_text.assign(reinterpret_cast<const char*>(buf + 8), json_len);
|
||
if (!protocol->suppress_incoming_message_log_) {
|
||
ESP_LOGI(TAG, "接收下行二进制消息(%s): %.*s", is_ctrl ? "ctrl" : (is_conv ? "conv" : "tool"), (int)json_text.size(), json_text.c_str());
|
||
}
|
||
}
|
||
}
|
||
}
|
||
if (json_text.empty()) {
|
||
json_text.assign(reinterpret_cast<const char*>(data), len);
|
||
if (!protocol->suppress_incoming_message_log_) {
|
||
ESP_LOGI(TAG, "接收下行消息: %.*s", (int)json_text.size(), json_text.c_str());
|
||
}
|
||
}
|
||
cJSON* root = cJSON_Parse(json_text.c_str());
|
||
if (root) {
|
||
const char* sid_keys[] = {"sessionId", "session_id", "sid"};
|
||
cJSON* sid = nullptr;
|
||
for (size_t i = 0; i < sizeof(sid_keys) / sizeof(sid_keys[0]); ++i) {
|
||
sid = cJSON_GetObjectItem(root, sid_keys[i]);
|
||
if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') {
|
||
break;
|
||
}
|
||
sid = nullptr;
|
||
}
|
||
if (!sid) {
|
||
const char* containers[] = {"data", "payload", "context", "session"};
|
||
for (size_t i = 0; i < sizeof(containers) / sizeof(containers[0]); ++i) {
|
||
cJSON* obj = cJSON_GetObjectItem(root, containers[i]);
|
||
if (obj && cJSON_IsObject(obj)) {
|
||
for (size_t j = 0; j < sizeof(sid_keys) / sizeof(sid_keys[0]); ++j) {
|
||
sid = cJSON_GetObjectItem(obj, sid_keys[j]);
|
||
if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') {
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
if (sid) break;
|
||
}
|
||
}
|
||
if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') {
|
||
protocol->session_id_ = sid->valuestring;
|
||
ESP_LOGI(TAG, "Session ID set: %s", protocol->session_id_.c_str());
|
||
if (protocol->is_audio_channel_opened_ && protocol->start_listening_pending_) {
|
||
ListeningMode m = protocol->pending_listening_mode_;
|
||
protocol->start_listening_pending_ = false;
|
||
protocol->SendStartListening(m);
|
||
}
|
||
}
|
||
if (protocol->on_incoming_json_) {
|
||
protocol->on_incoming_json_(root);
|
||
}
|
||
cJSON_Delete(root);
|
||
}
|
||
}
|
||
}
|
||
|
||
}
|
||
|
||
// 解析服务器发送的JSON消息
|
||
void VolcRtcProtocol::ParseServerMessage(const char* message) {
|
||
ESP_LOGI(TAG, "接收服务器消息:%s", message);// 接收服务器消息:%s
|
||
|
||
cJSON* root = cJSON_Parse(message);
|
||
if (!root) {
|
||
ESP_LOGE(TAG, "解析服务器消息失败");// 解析服务器消息失败
|
||
return;
|
||
}
|
||
|
||
if (on_incoming_json_) {
|
||
on_incoming_json_(root);
|
||
}
|
||
|
||
cJSON_Delete(root);
|
||
}
|
||
|
||
void VolcRtcProtocol::ProcessAudioData(const void* data, int size) {
|
||
if (!on_incoming_audio_) {
|
||
return;
|
||
}
|
||
|
||
ESP_LOGD(TAG, "接收音频数据,大小:%d 字节", size);// 接收音频数据,大小:%d 字节
|
||
|
||
// 直接使用原始数据指针,避免内存分配
|
||
// 如果on_incoming_audio_需要持久化数据,它应该自己负责复制
|
||
on_incoming_audio_(std::vector<uint8_t>(static_cast<const uint8_t*>(data), static_cast<const uint8_t*>(data) + size));
|
||
}
|
||
|
||
void VolcRtcProtocol::SendText(const std::string& text) {
|
||
if (!rtc_handle_ || !is_connected_) {
|
||
ESP_LOGW(TAG, "不能发送文本消息:RTC未准备好");// 不能发送文本消息,RTC未准备好
|
||
return;
|
||
}
|
||
|
||
std::lock_guard<std::mutex> lock(rtc_mutex_);
|
||
|
||
volc_data_info_t data_info;
|
||
memset(&data_info, 0, sizeof(data_info));
|
||
data_info.type = VOLC_DATA_TYPE_MESSAGE; // 文本数据类型
|
||
|
||
int ret = volc_rtc_send(rtc_handle_, text.data(), text.size(), &data_info);
|
||
if (ret != 0) {
|
||
ESP_LOGE(TAG, "发送文本消息失败:%d", ret);// 发送文本消息失败:%d
|
||
} else {
|
||
ESP_LOGD(TAG, "发送文本消息: %s", text.c_str());// 发送文本消息:%s
|
||
}
|
||
}
|
||
|
||
// 🔊 发送开始监听消息到RTC
|
||
void VolcRtcProtocol::SendStartListening(ListeningMode mode) {
|
||
// 若尚未建立会话ID或远端未加入,则排队,待会话就绪后发送
|
||
if (session_id_.empty() || !is_connected_) {
|
||
start_listening_pending_ = true;
|
||
pending_listening_mode_ = mode;
|
||
ESP_LOGI(TAG, "延迟发送StartListening,等待会话就绪");
|
||
return;
|
||
}
|
||
|
||
Protocol::SendStartListening(mode);// 调用基类方法发送开始监听消息
|
||
}
|
||
|
||
// 🔊 发送控制指令到RTC
|
||
void VolcRtcProtocol::SendCtrl(const std::string& json) {
|
||
if (!rtc_handle_ || !is_connected_) {
|
||
ESP_LOGW(TAG, "不能发送ctrl二进制消息:RTC未准备好");// 不能发送ctrl二进制消息,RTC未准备好
|
||
return;
|
||
}
|
||
|
||
std::lock_guard<std::mutex> lock(rtc_mutex_);// 🔊 发送控制指令到RTC时,加锁保护RTC句柄
|
||
|
||
// 构建二进制消息:"ctrl" + 4字节大端长度 + JSON负载
|
||
const char magic[4] = {'c','t','r','l'};
|
||
const uint32_t len = (uint32_t)json.size();
|
||
std::vector<uint8_t> payload;
|
||
payload.reserve(4 + 4 + len);
|
||
payload.insert(payload.end(), magic, magic + 4);
|
||
payload.push_back((uint8_t)((len >> 24) & 0xFF));
|
||
payload.push_back((uint8_t)((len >> 16) & 0xFF));
|
||
payload.push_back((uint8_t)((len >> 8) & 0xFF));
|
||
payload.push_back((uint8_t)(len & 0xFF));
|
||
payload.insert(payload.end(), json.begin(), json.end());
|
||
|
||
volc_data_info_t data_info;
|
||
memset(&data_info, 0, sizeof(data_info));
|
||
data_info.type = VOLC_DATA_TYPE_MESSAGE;
|
||
data_info.info.message.is_binary = true;
|
||
|
||
int ret = volc_rtc_send(rtc_handle_, payload.data(), (int)payload.size(), &data_info);
|
||
if (ret != 0) {
|
||
ESP_LOGE(TAG, "发送ctrl二进制消息失败:%d", ret);
|
||
} else {
|
||
ESP_LOGD(TAG, "发送ctrl二进制消息: %.*s", (int)json.size(), json.c_str());
|
||
}
|
||
}
|
||
|
||
// 🔊 发送函数调用指令到RTC
|
||
void VolcRtcProtocol::SendFunc(const std::string& json) {
|
||
if (!rtc_handle_ || !is_connected_) {
|
||
ESP_LOGW(TAG, "不能发送func二进制消息:RTC未准备好");// 不能发送func二进制消息,RTC未准备好
|
||
return;
|
||
}
|
||
|
||
std::lock_guard<std::mutex> lock(rtc_mutex_);// 🔊 发送函数调用指令到RTC时,加锁保护RTC句柄
|
||
|
||
const char magic[4] = {'f','u','n','c'};
|
||
const uint32_t len = (uint32_t)json.size();
|
||
std::vector<uint8_t> payload;
|
||
payload.reserve(4 + 4 + len);
|
||
payload.insert(payload.end(), magic, magic + 4);
|
||
payload.push_back((uint8_t)((len >> 24) & 0xFF));
|
||
payload.push_back((uint8_t)((len >> 16) & 0xFF));
|
||
payload.push_back((uint8_t)((len >> 8) & 0xFF));
|
||
payload.push_back((uint8_t)(len & 0xFF));
|
||
payload.insert(payload.end(), json.begin(), json.end());
|
||
|
||
volc_data_info_t data_info;
|
||
memset(&data_info, 0, sizeof(data_info));
|
||
data_info.type = VOLC_DATA_TYPE_MESSAGE;
|
||
data_info.info.message.is_binary = true;
|
||
|
||
int ret = volc_rtc_send(rtc_handle_, payload.data(), (int)payload.size(), &data_info);
|
||
if (ret != 0) {
|
||
ESP_LOGE(TAG, "发送func二进制消息失败:%d", ret);
|
||
} else {
|
||
ESP_LOGD(TAG, "发送func二进制消息: %.*s", (int)json.size(), json.c_str());
|
||
}
|
||
}
|
||
|
||
// 🔊 发送函数调用结果到RTC
|
||
void VolcRtcProtocol::SendFunctionResult(const std::string& tool_call_id, const std::string& content) {
|
||
cJSON* obj = cJSON_CreateObject();
|
||
if (!obj) {
|
||
ESP_LOGE(TAG, "创建函数结果JSON失败,回退为文本");// 创建函数结果JSON失败,回退为文本
|
||
Protocol::SendFunctionResult(tool_call_id, content);
|
||
return;
|
||
}
|
||
cJSON_AddStringToObject(obj, "ToolCallID", tool_call_id.c_str());// 添加函数调用ID到JSON
|
||
cJSON_AddStringToObject(obj, "Content", content.c_str());// 添加函数调用结果到JSON
|
||
char* printed = cJSON_PrintUnformatted(obj);
|
||
std::string json = printed ? printed : std::string();
|
||
if (printed) cJSON_free(printed);
|
||
cJSON_Delete(obj);
|
||
if (json.empty()) {
|
||
ESP_LOGW(TAG, "函数结果JSON为空,回退为文本");
|
||
Protocol::SendFunctionResult(tool_call_id, content);
|
||
return;
|
||
}
|
||
SendFunc(json);
|
||
}
|
||
|
||
// 🔊 发送文本消息到RTC (传入大模型上下文信息)
|
||
void VolcRtcProtocol::SendTextMessage(const std::string& text) {
|
||
// 按官方方案封装:ExternalTextToLLM,确保进入LLM并触发TTS
|
||
cJSON* root = cJSON_CreateObject();
|
||
if (!root) {
|
||
ESP_LOGE(TAG, "创建JSON失败,回退为文本消息");
|
||
Protocol::SendTextMessage(text);
|
||
return;
|
||
}
|
||
cJSON_AddStringToObject(root, "Command", "ExternalTextToLLM");
|
||
cJSON_AddStringToObject(root, "Message", text.c_str());
|
||
cJSON_AddNumberToObject(root, "InterruptMode", 1);
|
||
char* printed = cJSON_PrintUnformatted(root);
|
||
std::string json = printed ? printed : std::string();
|
||
if (printed) cJSON_free(printed);
|
||
cJSON_Delete(root);
|
||
|
||
if (json.empty()) {
|
||
ESP_LOGW(TAG, "生成的JSON为空,回退为文本消息");
|
||
Protocol::SendTextMessage(text);
|
||
return;
|
||
}
|
||
|
||
SendCtrl(json);
|
||
}
|
||
|
||
// 🔊 发送中止通话请求
|
||
void VolcRtcProtocol::SendAbortSpeaking(AbortReason reason) {
|
||
if (!rtc_handle_ || !is_connected_ || !is_audio_channel_opened_) {
|
||
ESP_LOGW(TAG, "不能发送中止通话请求:RTC未准备好");// 不能发送打断请求,RTC未准备好
|
||
return;
|
||
}
|
||
|
||
std::lock_guard<std::mutex> lock(rtc_mutex_);// 🔊 发送中止通话请求时,加锁保护RTC句柄
|
||
|
||
ESP_LOGI(TAG, "通过Volc RTC中断发送中止通话请求!");// 发送打断请求,通过火山RTC中断
|
||
|
||
// 调用火山RTC的打断API
|
||
int ret = volc_rtc_interrupt(rtc_handle_);
|
||
if (ret != 0) {
|
||
ESP_LOGE(TAG, "通过Volc RTC中断发送打断请求失败:%d", ret);// 发送打断请求,通过火山RTC中断失败:%d
|
||
} else {
|
||
ESP_LOGI(TAG, "通过Volc RTC中断发送打断请求成功!");// 发送打断请求,通过火山RTC中断成功
|
||
}
|
||
}
|