从 Kapi commit b1577d8 / a3a476f 完整移植 5 个修复,覆盖三类问题:
1. 开机/唤醒后按 BOOT 进 RTC 房间,欢迎语前 1-3 秒杂音
2. 软 RTC 退出(41s 无对话触发 Dialog watchdog)后待命音"卡卡正在待命"无声/杂音/被截
3. 软退出后按 BOOT 唤醒,欢迎语前杂音
【修复 1】OnAudioChannelOpened EnableOutput(true) 后立刻灌 200ms silence
- 防止 I2S DMA 启动后到 RTC 真实 PCM 到达 1-3s 空窗的杂音
【修复 2】LeaveRoom 加 notify_closed 参数(默认 true 不变老路径)
- hibernate 路径传 false 跳过 on_audio_channel_closed_ 回调
- 避免回调链 player_pipeline_close → EnableOutput(false) 误关 codec
导致待命音无声
【修复 3】LeaveRoom 不再 volc_rtc_destroy, 保留 rtc_handle_
- 唤醒时 OpenAudioChannel 直接 volc_rtc_start 复用 handle, 不死循环
- 服务端 AI 任务无需 destroy 也会按 180s 兜底机制清理
【修复 4 - 最隐蔽】LeaveRoom 末尾重置 downlink_is_pcm_ = false
- 火山 RTC 下行是 PCM, DataCallback 设 downlink_is_pcm_=true
- 不重置 → PlaySound 的 Opus 包被 OnAudioOutput 当成 raw PCM 字节流
直接写 codec → 杂音而非待命音
- 唤醒重连后 DataCallback 收下一包会自动重置, 不影响欢迎语
【修复 5】OnAudioInput 入口加 hibernating_ guard
- hibernate 期间禁用输入侧, 防止访问关闭的 codec → std::bad_alloc abort
- 不冻结 OnAudioOutput, 让待命音队列正常被消费
【EnterIdleHibernate 重写】套用 Kapi 新顺序:
Step 0: hibernating_=true + 50ms (让 OnAudioInput guard 生效)
Step 1: LeaveRoom(false) (codec output 保留)
Step 2: background_task->WaitForCompletion
Step 3: 清空 audio_decode_queue_
Step 4: EnableInput(false) + close recorder_pipeline
Step 5: 强制 esp_pm 禁用 Light Sleep
Step 5.5: EnableOutput(false→true) + 200ms silence (清 LeaveRoom 副作用)
Step 6: SetDeviceState(idle) → PlaySound 待命音
Step 7: WaitForAudioPlayback (队列消费完毕)
Step 7.5: background_task->WaitForCompletion + vTaskDelay(1000)
(DMA + ES8311 FIFO + 功放尾音衰减, 防尾音截断)
Step 8: player_pipeline_close
Step 9: NVS idle_cycles_++
Step 10: 显示字幕"已自动退出RTC对话..."(数字人特有, 保留)
【WakeFromHibernate】调整 hibernating_=false 顺序
- 先放下 hibernating_, 让 ToggleChatState 期间 OnAudioInput guard 通过
- 否则 ToggleChatState 期间音频上行迟迟不开
编译: kapi.bin 0x41c000 (4.21MB), 分区 25% 空闲。
实测三项全通: 欢迎语干净 + 待命音清晰完整 + 唤醒欢迎语干净。
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
103 lines
3.9 KiB
C++
103 lines
3.9 KiB
C++
#ifndef PROTOCOL_H
|
||
#define PROTOCOL_H
|
||
|
||
#include <cJSON.h>
|
||
#include <string>
|
||
#include <functional>
|
||
#include <chrono>
|
||
|
||
struct BinaryProtocol3 {
|
||
uint8_t type;
|
||
uint8_t reserved;
|
||
uint16_t payload_size;
|
||
uint8_t payload[];
|
||
} __attribute__((packed));
|
||
|
||
enum AbortReason {
|
||
kAbortReasonNone,
|
||
kAbortReasonWakeWordDetected,
|
||
kAbortReasonVoiceInterrupt
|
||
//kAbortReasonNewStory // websocket推送新故事时中断当前播放
|
||
};
|
||
|
||
enum ListeningMode {
|
||
kListeningModeAutoStop,
|
||
kListeningModeManualStop,
|
||
kListeningModeRealtime // 需要 AEC 支持
|
||
};
|
||
|
||
class Protocol {
|
||
public:
|
||
virtual ~Protocol() = default;
|
||
|
||
inline int server_sample_rate() const {
|
||
return server_sample_rate_;
|
||
}
|
||
inline int server_frame_duration() const {
|
||
return server_frame_duration_;
|
||
}
|
||
inline bool downlink_is_pcm() const {
|
||
return downlink_is_pcm_;
|
||
}
|
||
inline const std::string& session_id() const {
|
||
return session_id_;
|
||
}
|
||
inline void SetSuppressIncomingMessageLog(bool v) { suppress_incoming_message_log_ = v; }
|
||
|
||
void OnIncomingAudio(std::function<void(std::vector<uint8_t>&& data)> callback);
|
||
void OnIncomingJson(std::function<void(const cJSON* root)> callback);
|
||
void OnAudioChannelOpened(std::function<void()> callback);
|
||
void OnAudioChannelClosed(std::function<void()> callback);
|
||
void OnNetworkError(std::function<void(const std::string& message)> callback);
|
||
void OnBotMessage(std::function<void()> callback);
|
||
|
||
virtual void Start() = 0;
|
||
virtual bool OpenAudioChannel() = 0;
|
||
virtual void CloseAudioChannel() = 0;
|
||
// Phase 6: 真退出 RTC 房间(释放 License),默认回退到 CloseAudioChannel
|
||
// notify_closed=true: 触发 on_audio_channel_closed_ 回调(默认,兼容老路径)
|
||
// notify_closed=false: 不触发回调,供 EnterIdleHibernate 使用——避免回调里的
|
||
// player_pipeline_close → EnableOutput(false) 误关 codec output 导致待命音无声
|
||
virtual void LeaveRoom(bool notify_closed = true) { (void)notify_closed; CloseAudioChannel(); }
|
||
virtual bool IsAudioChannelOpened() const = 0;
|
||
virtual void SendAudio(const std::vector<uint8_t>& data) = 0;
|
||
virtual void SendPcm(const std::vector<uint8_t>& data) {}
|
||
virtual void SendG711A(const std::vector<uint8_t>& data) {}
|
||
virtual void SendWakeWordDetected(const std::string& wake_word);
|
||
virtual void SendStartListening(ListeningMode mode);
|
||
virtual void SendStopListening();
|
||
virtual void SendAbortSpeaking(AbortReason reason);
|
||
virtual void SendTextMessage(const std::string& text);
|
||
virtual void SendStoryRequest(); // 声明 发送讲故事请求 【新增】
|
||
virtual void SendIotDescriptors(const std::string& descriptors);
|
||
virtual void SendIotStates(const std::string& states);
|
||
virtual void SendFunctionResult(const std::string& tool_call_id, const std::string& content) {
|
||
(void)tool_call_id;
|
||
SendTextMessage(content);
|
||
}
|
||
|
||
protected:
|
||
std::function<void(const cJSON* root)> on_incoming_json_;
|
||
std::function<void(std::vector<uint8_t>&& data)> on_incoming_audio_;
|
||
std::function<void()> on_audio_channel_opened_;
|
||
std::function<void()> on_audio_channel_closed_;
|
||
std::function<void(const std::string& message)> on_network_error_;
|
||
std::function<void()> on_bot_message_;
|
||
|
||
int server_sample_rate_ = 24000;
|
||
int server_frame_duration_ = 60;
|
||
bool downlink_is_pcm_ = false;// 是否是PCM格式
|
||
bool error_occurred_ = false;
|
||
std::string session_id_;
|
||
bool start_listening_pending_ = false;// 是否有待处理的监听请求
|
||
ListeningMode pending_listening_mode_ = kListeningModeRealtime;// 待处理的监听模式
|
||
std::chrono::time_point<std::chrono::steady_clock> last_incoming_time_;
|
||
bool suppress_incoming_message_log_ = false;
|
||
|
||
virtual void SendText(const std::string& text) = 0;
|
||
virtual void SetError(const std::string& message);
|
||
virtual bool IsTimeout() const;
|
||
};
|
||
|
||
#endif // PROTOCOL_H
|