From 70f0cdd07a6dc14a42ef29a106238589f7c21c1a Mon Sep 17 00:00:00 2001 From: Rdzleo Date: Thu, 21 May 2026 10:23:21 +0800 Subject: [PATCH] =?UTF-8?q?feat(rtc):=20=E5=81=B6=E5=8F=91=E8=BF=9E?= =?UTF-8?q?=E6=8E=A5=E5=A4=B1=E8=B4=A5=E5=AE=8C=E6=95=B4=E4=BF=AE=E5=A4=8D?= =?UTF-8?q?=20(A+B+C=20=E4=B8=89=E4=BB=B6=E5=A5=97)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 实测根因 (DIAG 埋点确认): 火山 RTC SDK 启动时一次性申请大量 lwIP socket fd, 默认 CONFIG_LWIP_MAX_SOCKETS=10 不够 SDK 分配, 触发 SocketConnection-Lite.c:191 bind local ip failed → ICE 协商失败 → wait connect bits=0x0 超时. 实测对比: 修复前: 冷启动 RTC join 30+ 秒超时 × 3 次失败 修复后: 冷启动 RTC join 1.6 秒成功, 软退出 + 唤醒重连 2.3 秒成功 ✅ 修复内容: [A] sdkconfig: CONFIG_LWIP_MAX_SOCKETS=10 → 20 根治 lwIP socket fd 不足. 16 是临界值, 20 留 25% 余量应对 burst 场景 (HTTP 重试 / DNS 查询 / NTP 同步并发). 代价: +6 fd × ~200B = 1.2 KB RAM (忽略). [B] application.h/cc + volc_rtc_protocol.h/cc: 失败 3 次后销毁 + 重建 engine 新增 VolcRtcProtocol::ForceRebuildEngine() public 方法. OpenAudioChannel 连续失败 3 次时调用 (application.cc:566-573): - 销毁 rtc_handle_ + reset SDK 内部状态污染 - 等待 2 秒让 lwIP 释放残留 socket fd (TIME_WAIT) - 触发 Phase 6 重建路径 (rtc_handle_=nullptr → Start()) 应对 A 修复后仍可能出现的 SDK 内部状态错乱 (e.g. ICE Agent 异常). 本次实测未触发 (A 已解决主要问题), 但保留作为兜底防御. [C] volc_rtc_protocol.cc: DIAG_RTC_BIND_ENABLE 一键开关诊断埋点 在 join_room 前/后 + ForceRebuildEngine 前/后打印: - lwIP socket fd 使用量 (sockets=N/MAX) - heap free + psram free - WiFi rssi - 失败时的 errno + strerror 验证完成后改 0 关闭, 编译器消除 #if 块, 零运行时开销. 文件改动: sdkconfig | LWIP_MAX_SOCKETS 10→20 main/application.h | +audio_channel_retry_count_ main/application.cc | +重试计数 + static_cast → ForceRebuildEngine 调用 main/protocols/volc_rtc_protocol.h | +ForceRebuildEngine() 声明 main/protocols/volc_rtc_protocol.cc | +DIAG 埋点 + diag_count_used_sockets() + ForceRebuildEngine() Co-Authored-By: Claude Opus 4.7 (1M context) --- main/application.cc | 19 ++++++- main/application.h | 1 + main/protocols/volc_rtc_protocol.cc | 77 +++++++++++++++++++++++++++++ main/protocols/volc_rtc_protocol.h | 5 ++ sdkconfig | 2 +- 5 files changed, 102 insertions(+), 2 deletions(-) diff --git a/main/application.cc b/main/application.cc index f71dad2..c4f60be 100644 --- a/main/application.cc +++ b/main/application.cc @@ -555,10 +555,25 @@ void Application::ToggleChatState() { Board::GetInstance().SetPowerSaveMode(false);// 关闭低功耗模式 if (!protocol_->OpenAudioChannel()) { auto ac = Board::GetInstance().GetAudioCodec(); - ESP_LOGW(TAG, "打开音频通道失败,将在2秒后重试"); + audio_channel_retry_count_++; + ESP_LOGW(TAG, "打开音频通道失败 (第 %d 次), 将在2秒后重试", audio_channel_retry_count_); if (ac) { ESP_LOGW(TAG, "Diag: codec out_channels=%d in_channels=%d out_sr=%d in_sr=%d", ac->output_channels(), ac->input_channels(), ac->output_sample_rate(), ac->input_sample_rate()); } + // 方案 B: 连续失败 3 次后销毁 + 重建 RTC engine + // 原因: SDK 内部状态污染 (lwIP socket fd 残留 / 内部缓存错乱) 单纯重试无效, + // 必须重建 engine 清理. 触发 Phase 6 的 rtc_handle_=nullptr → Start() 重建路径 + if (audio_channel_retry_count_ >= 3) { + ESP_LOGW(TAG, "🔄 连续失败 3 次, 触发 RTC engine 重建 (清理 SDK 状态)"); + // protocol_ 是基类 unique_ptr, 需 dynamic_cast 到 VolcRtcProtocol + // ESP-IDF 默认 -fno-rtti, 不能用 dynamic_cast. + // protocol_ 在 Init 时只赋值为 VolcRtcProtocol (line 932), 用 static_cast 安全 + auto* volc_rtc = static_cast(protocol_.get()); + if (volc_rtc) { + volc_rtc->ForceRebuildEngine(); + } + audio_channel_retry_count_ = 0; // 重置计数, 重建后从 0 开始计 + } SetDeviceState(kDeviceStateIdle); Schedule([this]() { vTaskDelay(pdMS_TO_TICKS(2000)); @@ -567,6 +582,8 @@ void Application::ToggleChatState() { }); return; } + // 连接成功重置重试计数 + audio_channel_retry_count_ = 0; listening_mode_ = kListeningModeRealtime;// 设置监听模式为实时监听 SetDeviceState(kDeviceStateDialog);// 设置设备状态为对话模式 diff --git a/main/application.h b/main/application.h index a0c4df0..489e951 100644 --- a/main/application.h +++ b/main/application.h @@ -167,6 +167,7 @@ private: std::atomic https_playback_active_{false};// HTTPS音频播放进行中标志 std::atomic https_playback_abort_{false};// HTTPS音频播放中止标志 std::atomic post_abort_debug_frames_{0};// HTTPS中止后诊断日志计数(追踪前N帧音频) + int audio_channel_retry_count_ = 0;// RTC 偶发连接失败重试计数 (方案 B: 失败 3 次后销毁 + 重建 engine) bool aborted_ = false; bool voice_detected_ = false; bool audio_paused_ = false; // 音频暂停状态标志 diff --git a/main/protocols/volc_rtc_protocol.cc b/main/protocols/volc_rtc_protocol.cc index 39544ff..c316015 100644 --- a/main/protocols/volc_rtc_protocol.cc +++ b/main/protocols/volc_rtc_protocol.cc @@ -21,6 +21,29 @@ static const char* TAG = "VolcRtcProtocol"; +// ============================================================ +// 方案 C: RTC bind 失败诊断埋点 (一键关闭, 零运行时开销) +// 验证完成后改 0 关闭, 编译器消除 #if 块, 不占 Flash/CPU +// 排查 "Cache.c:273 status=0x9 + SocketConnection-Lite.c:191 bind failed" 偶发问题 +// ============================================================ +#ifndef DIAG_RTC_BIND_ENABLE +#define DIAG_RTC_BIND_ENABLE 1 +#endif + +#if DIAG_RTC_BIND_ENABLE +#include "esp_wifi.h" +#include "lwip/sockets.h" // LWIP_SOCKET_OFFSET +// 统计当前 lwIP socket fd 使用量 (在 LWIP_SOCKET_OFFSET 偏移之上扫描) +static int diag_count_used_sockets(void) { + int used = 0; + for (int fd = LWIP_SOCKET_OFFSET; fd < LWIP_SOCKET_OFFSET + CONFIG_LWIP_MAX_SOCKETS; fd++) { + struct stat st; + if (fstat(fd, &st) == 0) used++; + } + return used; +} +#endif + VolcRtcProtocol::VolcRtcProtocol() { event_group_handle_ = xEventGroupCreate(); } @@ -364,6 +387,18 @@ bool VolcRtcProtocol::OpenAudioChannel() { xEventGroupClearBits(event_group_handle_, 0x1 | 0x2); // 新增:extra_params 用于传递额外的AgentConfig配置参数 ESP_LOGI(TAG, "Join RTC: handle=%p bot=%s iot_ready=%d free_heap=%u", rtc_handle_, CONFIG_VOLC_BOT_ID, (int)iot_ready_, (unsigned)heap_caps_get_free_size(MALLOC_CAP_DEFAULT)); +#if DIAG_RTC_BIND_ENABLE + { + int sockets_used = diag_count_used_sockets(); + wifi_ap_record_t ap_info = {}; + int rssi = (esp_wifi_sta_get_ap_info(&ap_info) == ESP_OK) ? ap_info.rssi : -127; + ESP_LOGW("DIAG-RTC", "Pre-Join: sockets=%d/%d heap=%u psram=%u rssi=%d", + sockets_used, CONFIG_LWIP_MAX_SOCKETS, + (unsigned)heap_caps_get_free_size(MALLOC_CAP_DEFAULT), + (unsigned)heap_caps_get_free_size(MALLOC_CAP_SPIRAM), + rssi); + } +#endif int ret = volc_rtc_start(rtc_handle_, CONFIG_VOLC_BOT_ID, &iot_info_, extra_params_.empty() ? NULL : extra_params_.c_str()); if (ret != 0) { ESP_LOGE(TAG, "RTC启动失败:%d", ret);// RTC启动失败:%d @@ -375,6 +410,16 @@ bool VolcRtcProtocol::OpenAudioChannel() { if ((bits & 0x1) == 0) { ESP_LOGE(TAG, "RTC连接超时");// RTC连接超时 ESP_LOGW(TAG, "Diag: check Wi-Fi, SNTP time sync, IoT creds, RTC server availability");// 诊断:检查Wi-Fi、SNTP时间同步、IoT凭证、RTC服务器可用性 +#if DIAG_RTC_BIND_ENABLE + { + int sockets_used = diag_count_used_sockets(); + ESP_LOGW("DIAG-RTC", "Post-Fail: sockets=%d/%d heap=%u psram=%u errno=%d(%s)", + sockets_used, CONFIG_LWIP_MAX_SOCKETS, + (unsigned)heap_caps_get_free_size(MALLOC_CAP_DEFAULT), + (unsigned)heap_caps_get_free_size(MALLOC_CAP_SPIRAM), + errno, strerror(errno)); + } +#endif return false; } // Do not block audio readiness on remote user join; enable subscribe immediately @@ -452,6 +497,38 @@ void VolcRtcProtocol::LeaveRoom(bool notify_closed) { } } +// 方案 B: 强制销毁并重建 RTC engine +// 用途: OpenAudioChannel 连续失败 N 次后调用, 清理 SDK 内部错乱状态 +// 实现: 销毁 rtc_handle_ + 触发 Phase 6 重建路径 +// 下次 OpenAudioChannel 看到 rtc_handle_=nullptr → Start() 异步重建 +void VolcRtcProtocol::ForceRebuildEngine() { + ESP_LOGW(TAG, "🔄 ForceRebuildEngine: 销毁 RTC engine 以清理 SDK 状态"); +#if DIAG_RTC_BIND_ENABLE + ESP_LOGW("DIAG-RTC", "Pre-Rebuild: sockets=%d/%d heap=%u", + diag_count_used_sockets(), CONFIG_LWIP_MAX_SOCKETS, + (unsigned)heap_caps_get_free_size(MALLOC_CAP_DEFAULT)); +#endif + if (rtc_handle_) { + if (is_connected_) { + volc_rtc_stop(rtc_handle_); + is_connected_ = false; + } + volc_rtc_destroy(rtc_handle_); + rtc_handle_ = nullptr; + } + is_audio_channel_opened_ = false; + downlink_is_pcm_ = false; + first_downlink_logged_ = false; + // 等 2 秒让 lwIP 释放残留 socket fd (TIME_WAIT 状态) + vTaskDelay(pdMS_TO_TICKS(2000)); +#if DIAG_RTC_BIND_ENABLE + ESP_LOGW("DIAG-RTC", "Post-Rebuild-Wait: sockets=%d/%d heap=%u", + diag_count_used_sockets(), CONFIG_LWIP_MAX_SOCKETS, + (unsigned)heap_caps_get_free_size(MALLOC_CAP_DEFAULT)); +#endif + ESP_LOGI(TAG, "🔄 engine 已销毁, 下次 OpenAudioChannel 触发 Phase 6 重建"); +} + // 🔊 检查音频通道是否已打开 bool VolcRtcProtocol::IsAudioChannelOpened() const { return is_audio_channel_opened_; diff --git a/main/protocols/volc_rtc_protocol.h b/main/protocols/volc_rtc_protocol.h index 98e4512..8fbdb74 100644 --- a/main/protocols/volc_rtc_protocol.h +++ b/main/protocols/volc_rtc_protocol.h @@ -26,6 +26,11 @@ public: // 与 CloseAudioChannel 区别:CloseAudioChannel 只停媒体流,房间仍占用 void LeaveRoom(bool notify_closed = true) override; + // 方案 B: 强制销毁并重建 RTC engine. 当 OpenAudioChannel 连续失败 N 次时调用, + // 清理 SDK 内部错乱状态 (如 lwIP socket fd 残留 / 内部缓存污染), + // 触发 Phase 6 的 rtc_handle_=nullptr → Start() 重建路径 + void ForceRebuildEngine(); + bool IsAudioChannelOpened() const override;// 🔊 检查音频通道是否已打开 void SendAbortSpeaking(AbortReason reason) override;// 🔊 发送中止通话请求 void SendStartListening(ListeningMode mode) override;// 🔊 发送开始监听请求 diff --git a/sdkconfig b/sdkconfig index 1d8662d..42d62f5 100644 --- a/sdkconfig +++ b/sdkconfig @@ -2106,7 +2106,7 @@ CONFIG_LWIP_DNS_SUPPORT_MDNS_QUERIES=y CONFIG_LWIP_TIMERS_ONDEMAND=y CONFIG_LWIP_ND6=y # CONFIG_LWIP_FORCE_ROUTER_FORWARDING is not set -CONFIG_LWIP_MAX_SOCKETS=10 +CONFIG_LWIP_MAX_SOCKETS=20 # CONFIG_LWIP_USE_ONLY_LWIP_SELECT is not set # CONFIG_LWIP_SO_LINGER is not set CONFIG_LWIP_SO_REUSE=y