1、新增HTTPS故事播放功能(SendStoryRequest通过蓝牙MAC请求故事API,支持intro+body两段式无缝播放);

2、新增HttpsPlaybackFromUrl通用HTTPS音频下载播放方法,obtain_story同时支持HTTPS URL和WebSocket两种方式;
3、新增RTC↔HTTPS双向音频切换三标志位状态机(opus_playback_active_/https_playback_active_/https_playback_abort_),HTTPS播放期间静默丢弃RTC PCM包,OnAudioOutput捕获is_opus_frame防止残留Opus帧杂音;
4、新增AbortHttpsPlayback中止方法,使用独立高优先级任务(priority=10)执行DMA flush;
5、协议层新增OnBotMessage回调,Bot下行消息立即中止HTTPS播放;volc_rtc_protocol移除is_binary依赖改为直接前缀检测,新增info前缀和subv跳过逻辑;
6、新增subtitle字幕消息解析,通过bot_前缀区分USER/AI,用户说话时立即中止HTTPS播放;
7、AbortSpeaking新增HTTPS中止信号和DMA缓冲区flush;
8、Kconfig新增STORY_API_URL故事播放API地址配置;

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Rdzleo 2026-03-05 11:27:07 +08:00
parent d494a1025f
commit b9bbcc456c
6 changed files with 940 additions and 78 deletions

View File

@ -12,6 +12,12 @@ config DEVICE_STATUS_REPORT_URL
help
URL for reporting device status to server
config STORY_API_URL
string "Story API URL"
default "http://192.168.124.8:8000/api/v1/devices/stories/"
help
故事播放API接口地址设备会附加 ?mac_address=XX:XX:XX:XX:XX:XX 参数请求
choice
prompt "语言选择"
default LANGUAGE_ZH_CN

File diff suppressed because it is too large Load Diff

View File

@ -70,7 +70,9 @@ public:
void Alert(const char* status, const char* message, const char* emotion = "", const std::string_view& sound = "");// 警报管理 状态、消息、情感、声音
void DismissAlert();// 关闭警报
void AbortSpeaking(AbortReason reason);// 打断语音播报
void SendStoryRequest(); // 发送讲故事 请求
void AbortHttpsPlayback(const char* reason);// 中止HTTPS音频播放并清空DMA
void SendStoryRequest(); // 发送讲故事 请求WebSocket方式
void HttpsPlaybackFromUrl(const std::string& url); // 通过HTTPS下载JSON并播放音频故事/歌曲等)
void ToggleChatState();// 切换聊天状态
void ToggleListeningState();// 切换监听状态
void StartListening();// 开始监听
@ -145,7 +147,9 @@ private:
bool realtime_chat_enabled_ = false;
#endif
std::atomic<bool> ws_downlink_enabled_{true};// 🌐 WebSocket下行通道是否启用
std::atomic<bool> ws_playback_active_{false};// 🌐 WebSocket下行播放活跃标志
std::atomic<bool> opus_playback_active_{false};// 🌐 Opus解码播放活跃标志WS/HTTPS共用
std::atomic<bool> https_playback_active_{false};// 🌐 HTTPS音频播放进行中标志
std::atomic<bool> https_playback_abort_{false};// 🌐 HTTPS音频播放中止标志
bool aborted_ = false;
bool voice_detected_ = false;
bool audio_paused_ = false; // 音频暂停状态标志

View File

@ -24,6 +24,10 @@ void Protocol::OnNetworkError(std::function<void(const std::string& message)> ca
on_network_error_ = callback;
}
void Protocol::OnBotMessage(std::function<void()> callback) {
on_bot_message_ = callback;
}
void Protocol::SetError(const std::string& message) {
error_occurred_ = true;
if (on_network_error_ != nullptr) {

View File

@ -49,6 +49,7 @@ public:
void OnAudioChannelOpened(std::function<void()> callback);
void OnAudioChannelClosed(std::function<void()> callback);
void OnNetworkError(std::function<void(const std::string& message)> callback);
void OnBotMessage(std::function<void()> callback);
virtual void Start() = 0;
virtual bool OpenAudioChannel() = 0;
@ -76,6 +77,7 @@ protected:
std::function<void()> on_audio_channel_opened_;
std::function<void()> on_audio_channel_closed_;
std::function<void(const std::string& message)> on_network_error_;
std::function<void()> on_bot_message_;
int server_sample_rate_ = 24000;
int server_frame_duration_ = 60;

View File

@ -579,16 +579,23 @@ void VolcRtcProtocol::DataCallback(void* context, const void* data, size_t len,
if (data && len > 0) {
const uint8_t* buf = static_cast<const uint8_t*>(data);
std::string json_text;
if (info->info.message.is_binary && len >= 8) {
// 检测二进制前缀格式: [prefix(4字节)] + [json_len(4字节大端)] + [JSON]
// 注意: SDK DataCallback中 is_binary 始终为false不能依赖此字段
bool is_subv = false;
if (len >= 8) {
bool is_ctrl = (memcmp(buf, "ctrl", 4) == 0);
bool is_conv = (memcmp(buf, "conv", 4) == 0);
bool is_tool = (memcmp(buf, "tool", 4) == 0);
if (is_ctrl || is_conv || is_tool) {
is_subv = (memcmp(buf, "subv", 4) == 0);
bool is_info = (memcmp(buf, "info", 4) == 0);
if (is_ctrl || is_conv || is_tool || is_subv || is_info) {
uint32_t json_len = (uint32_t)((buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | (buf[7]));
if (json_len > 0 && (size_t)(8 + json_len) <= len) {
json_text.assign(reinterpret_cast<const char*>(buf + 8), json_len);
if (!protocol->suppress_incoming_message_log_) {
ESP_LOGI(TAG, "接收下行二进制消息(%s): %.*s", is_ctrl ? "ctrl" : (is_conv ? "conv" : "tool"), (int)json_text.size(), json_text.c_str());
// 字幕消息不打印内容(频率高)
if (!is_subv && !protocol->suppress_incoming_message_log_) {
const char* prefix = is_ctrl ? "ctrl" : (is_conv ? "conv" : (is_tool ? "tool" : "info"));
ESP_LOGI(TAG, "接收下行消息(%s): %.*s", prefix, (int)json_text.size(), json_text.c_str());
}
}
}
@ -599,48 +606,52 @@ void VolcRtcProtocol::DataCallback(void* context, const void* data, size_t len,
ESP_LOGI(TAG, "接收下行消息: %.*s", (int)json_text.size(), json_text.c_str());
}
}
cJSON* root = cJSON_Parse(json_text.c_str());
if (root) {
const char* sid_keys[] = {"sessionId", "session_id", "sid"};
cJSON* sid = nullptr;
for (size_t i = 0; i < sizeof(sid_keys) / sizeof(sid_keys[0]); ++i) {
sid = cJSON_GetObjectItem(root, sid_keys[i]);
if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') {
break;
// 非subv消息立即通知应用层中止HTTPS播放尽早触发不等JSON解析
// subv字幕消息由应用层subtitle handler处理可区分USER/AI
if (!is_subv && protocol->on_bot_message_) {
protocol->on_bot_message_();
}
sid = nullptr;
}
if (!sid) {
const char* containers[] = {"data", "payload", "context", "session"};
for (size_t i = 0; i < sizeof(containers) / sizeof(containers[0]); ++i) {
cJSON* obj = cJSON_GetObjectItem(root, containers[i]);
if (obj && cJSON_IsObject(obj)) {
for (size_t j = 0; j < sizeof(sid_keys) / sizeof(sid_keys[0]); ++j) {
sid = cJSON_GetObjectItem(obj, sid_keys[j]);
if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') {
break;
cJSON* root = cJSON_Parse(json_text.c_str());
if (root) {
// 提取 Session ID支持多种字段名和嵌套位置
const char* sid_keys[] = {"sessionId", "session_id", "sid"};
cJSON* sid = nullptr;
for (size_t i = 0; i < sizeof(sid_keys) / sizeof(sid_keys[0]); ++i) {
sid = cJSON_GetObjectItem(root, sid_keys[i]);
if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') break;
sid = nullptr;
}
if (!sid) {
const char* containers[] = {"data", "payload", "context", "session"};
for (size_t i = 0; i < sizeof(containers) / sizeof(containers[0]); ++i) {
cJSON* obj = cJSON_GetObjectItem(root, containers[i]);
if (obj && cJSON_IsObject(obj)) {
for (size_t j = 0; j < sizeof(sid_keys) / sizeof(sid_keys[0]); ++j) {
sid = cJSON_GetObjectItem(obj, sid_keys[j]);
if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') break;
}
}
if (sid) break;
}
}
if (sid) break;
if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') {
protocol->session_id_ = sid->valuestring;
ESP_LOGI(TAG, "Session ID set: %s", protocol->session_id_.c_str());
if (protocol->is_audio_channel_opened_ && protocol->start_listening_pending_) {
ListeningMode m = protocol->pending_listening_mode_;
protocol->start_listening_pending_ = false;
protocol->SendStartListening(m);
}
}
if (protocol->on_incoming_json_) {
protocol->on_incoming_json_(root);
}
cJSON_Delete(root);
}
}
if (sid && cJSON_IsString(sid) && sid->valuestring && sid->valuestring[0] != '\0') {
protocol->session_id_ = sid->valuestring;
ESP_LOGI(TAG, "Session ID set: %s", protocol->session_id_.c_str());
if (protocol->is_audio_channel_opened_ && protocol->start_listening_pending_) {
ListeningMode m = protocol->pending_listening_mode_;
protocol->start_listening_pending_ = false;
protocol->SendStartListening(m);
}
}
if (protocol->on_incoming_json_) {
protocol->on_incoming_json_(root);
}
cJSON_Delete(root);
}
}
}
}