diff --git a/components/common/src/volc_rtc.c b/components/common/src/volc_rtc.c index 9db5bff..1c0520f 100644 --- a/components/common/src/volc_rtc.c +++ b/components/common/src/volc_rtc.c @@ -427,7 +427,10 @@ static void _on_global_error(byte_rtc_engine_t engine, int code, const char* mes rtc->b_channel_joined = false; rtc->b_first_keyframe_received = false; - LOGI("global error %d %s\n", code, message); + // 防御性判空: 火山 RTC SDK 在某些 ICE Agent 失败路径下会用 message=NULL 调用本回调, + // 导致 printf("%s", NULL) → strlen(NULL) → LoadProhibited panic → 设备重启 + // (idle ≥ 10 分钟后服务端 session 超时 / NAT 表过期等场景偶发触发) + LOGI("global error %d %s\n", code, message ? message : "(null)"); LOGI("global error heap_free=%u", (unsigned)heap_caps_get_free_size(MALLOC_CAP_DEFAULT)); msg_data.code = VOLC_MSG_DISCONNECTED; _send_message_2_user(rtc, &msg_data); diff --git a/main/CMakeLists.txt b/main/CMakeLists.txt index f8ca527..a1a52fa 100644 --- a/main/CMakeLists.txt +++ b/main/CMakeLists.txt @@ -291,7 +291,9 @@ endif() idf_component_register(SRCS ${SOURCES} EMBED_FILES ${LANG_SOUNDS} ${COMMON_SOUNDS} INCLUDE_DIRS ${INCLUDE_DIRS} - REQUIRES esp_wifi esp_netif esp_event nvs_flash bt spi_flash app_update efuse volc_engine_rtc_lite common zlib esp_lcd driver + # 路径 D'' AEC: esp-sr 提供 esp_aec.h 底层同步 API (aec_create/aec_process/aec_destroy) + # 配合软件 loopback ref (DAC PCM copy 到 ring buffer) 实现设备端 AEC + REQUIRES esp_wifi esp_netif esp_event nvs_flash bt spi_flash app_update efuse volc_engine_rtc_lite common zlib esp_lcd driver esp-sr WHOLE_ARCHIVE ) diff --git a/main/application.cc b/main/application.cc index ad2a1b8..f71dad2 100644 --- a/main/application.cc +++ b/main/application.cc @@ -1,4 +1,6 @@ #include "application.h" +#include "esp_aec.h" // 路径 D'' AEC: esp-sr 底层同步 API +#include "esp_heap_caps.h" // PSRAM 上分配 ref_ring_buf_ // #include "ble_service_config.h" // BLE JSON Service 暂不使用 #include "board.h" #include "wifi_board.h" @@ -51,10 +53,11 @@ extern "C" void ai_chat_resume_animation(void); // ============================================================ // Phase 8: 音频卡顿诊断埋点(一键开关,关闭后零运行时开销) -// 完成根因定位后改为 0 或 git revert 即可移除全部埋点。 +// 根因已定位 (LVGL 抢调度 → 切 EAF; 软件 AEC 已实施),关闭埋点减少日志噪声。 +// 如需再次诊断改回 1 即可。 // ============================================================ #ifndef PHASE8_DIAG_ENABLE -#define PHASE8_DIAG_ENABLE 1 +#define PHASE8_DIAG_ENABLE 0 #endif #if PHASE8_DIAG_ENABLE @@ -133,9 +136,189 @@ Application::~Application() { player_pipeline_close(player_pipeline_); player_pipeline_ = nullptr; } + DeinitAec(); vEventGroupDelete(event_group_); } +// 路径 D'' AEC: esp_aec.h 底层同步 API + 软件 loopback ref +// 原理: codec MIC1|MIC2 → mono mic 信号; DAC 输出 PCM 复制到 ref ring buffer; +// aec_process(mic, delayed_ref, clean) 输出消除回声的 PCM 上行 RTC +// 特点: 不启后台任务, 应用主导调度, 不抢 RTC; codec 保持 baseline 1ch 16-bit +void Application::InitAec() { + if (aec_handle_ != nullptr) { + return; + } + // VOIP_LOW_COST 模式适合实时对话, CPU 占用 ~5-15%; filter_length=4 推荐值 + aec_handle_t* handle = aec_create(16000, 4, 1, AEC_MODE_VOIP_LOW_COST); + if (handle == nullptr) { + ESP_LOGE(TAG, "❌ AEC 初始化失败 (aec_create 返回 NULL)"); + return; + } + aec_handle_ = handle; + aec_chunk_size_ = aec_get_chunksize(handle); + + // ref ring buffer 容量: 取 max(200ms, 2*chunk_size + delay_samples), 留出足够余量 + int min_capacity = aec_ref_delay_samples_ + aec_chunk_size_ * 2 + 320; + int desired_capacity = 16000 / 5; // 200ms @16kHz = 3200 samples + ref_ring_capacity_ = (min_capacity > desired_capacity) ? min_capacity : desired_capacity; + ref_ring_buf_ = (int16_t *)heap_caps_calloc(ref_ring_capacity_, sizeof(int16_t), + MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); + if (ref_ring_buf_ == nullptr) { + ESP_LOGE(TAG, "❌ ref_ring_buf 分配失败 capacity=%d", ref_ring_capacity_); + aec_destroy(handle); + aec_handle_ = nullptr; + aec_chunk_size_ = 0; + return; + } + ref_ring_write_idx_ = 0; + ref_ring_filled_ = 0; + // 用 FreeRTOS mutex 替代 portMUX, 避免禁中断与 WiFi 协议栈 pm_coex 冲突 + if (ref_ring_mutex_ == nullptr) { + ref_ring_mutex_ = xSemaphoreCreateMutex(); + } + ESP_LOGI(TAG, "✅ AEC 初始化成功: chunk_size=%d samples (%d ms @16kHz), mode=VOIP_LOW_COST, " + "ref_ring_capacity=%d samples (%d ms), delay=%d samples (%d ms)", + aec_chunk_size_, aec_chunk_size_ * 1000 / 16000, + ref_ring_capacity_, ref_ring_capacity_ * 1000 / 16000, + aec_ref_delay_samples_, aec_ref_delay_samples_ * 1000 / 16000); +} + +void Application::DeinitAec() { + if (aec_handle_ != nullptr) { + aec_destroy(static_cast(aec_handle_)); + aec_handle_ = nullptr; + aec_chunk_size_ = 0; + } + if (ref_ring_buf_ != nullptr) { + heap_caps_free(ref_ring_buf_); + ref_ring_buf_ = nullptr; + ref_ring_capacity_ = 0; + ref_ring_write_idx_ = 0; + ref_ring_filled_ = 0; + } + if (ref_ring_mutex_ != nullptr) { + vSemaphoreDelete(ref_ring_mutex_); + ref_ring_mutex_ = nullptr; + } +} + +// 把 DAC 输出的 PCM (16kHz mono 16-bit) 推入 ref ring buffer +// 调用方: OnAudioOutput 在 codec->OutputData(pcm) 之前调用 +// 线程安全: mutex 保护 ring buffer 读写 +void Application::AppendRefSamples(const int16_t *pcm, int samples) { + if (ref_ring_buf_ == nullptr || pcm == nullptr || samples <= 0 || ref_ring_mutex_ == nullptr) { + return; + } + // mutex 替代 portMUX: 不禁中断, 不干扰 WiFi 协议栈 (避免 pm_coex panic) + if (xSemaphoreTake(ref_ring_mutex_, pdMS_TO_TICKS(2)) != pdTRUE) { + return; // mutex 持锁超时 (极少发生), 丢一帧 ref 避免阻塞 + } + for (int i = 0; i < samples; i++) { + ref_ring_buf_[ref_ring_write_idx_] = pcm[i]; + ref_ring_write_idx_ = (ref_ring_write_idx_ + 1) % ref_ring_capacity_; + } + if (ref_ring_filled_ < ref_ring_capacity_) { + ref_ring_filled_ = std::min(ref_ring_filled_ + samples, ref_ring_capacity_); + } + xSemaphoreGive(ref_ring_mutex_); +} + +// 从 ref ring buffer 取 delayed ref samples (mic 同步用) +// 返回点: write_idx - delay_samples - samples ~ write_idx - delay_samples +// 累积不足时填 0 (AEC 自适应滤波器会逐渐收敛) +void Application::GetDelayedRef(int16_t *ref_out, int samples) { + if (ref_ring_buf_ == nullptr || ref_out == nullptr || samples <= 0 || ref_ring_mutex_ == nullptr) { + if (ref_out) memset(ref_out, 0, samples * sizeof(int16_t)); + return; + } + if (xSemaphoreTake(ref_ring_mutex_, pdMS_TO_TICKS(2)) != pdTRUE) { + memset(ref_out, 0, samples * sizeof(int16_t)); + return; + } + int total_offset = aec_ref_delay_samples_ + samples; + if (ref_ring_filled_ < total_offset) { + memset(ref_out, 0, samples * sizeof(int16_t)); + xSemaphoreGive(ref_ring_mutex_); + return; + } + int read_idx = (ref_ring_write_idx_ - total_offset + ref_ring_capacity_) % ref_ring_capacity_; + for (int i = 0; i < samples; i++) { + ref_out[i] = ref_ring_buf_[read_idx]; + read_idx = (read_idx + 1) % ref_ring_capacity_; + } + xSemaphoreGive(ref_ring_mutex_); +} + +// 对 mic PCM 调 aec_process, 输出 clean PCM (in-place 修改 mic_inout) +// mic_inout: 输入 mic PCM (size 必须是 chunk_size 的整数倍, 通常调用方读 chunk_size) +// 累积不足 chunk_size 或 ref 未就绪时 → passthrough (不改 mic) +void Application::ApplyAEC(std::vector& mic_inout) { + if (aec_handle_ == nullptr) { + InitAec(); // lazy init + if (aec_handle_ == nullptr || aec_chunk_size_ <= 0) { + return; // 初始化失败, passthrough + } + } + int n = (int)mic_inout.size(); + int chunk = aec_chunk_size_; + if (n < chunk) { + return; // 数据不足一个 chunk, passthrough + } + int processed = 0; + std::vector ref(chunk); + std::vector clean(chunk); + int64_t mic_sq = 0, ref_sq = 0, clean_sq = 0; + int passthrough_chunks = 0; + // ref 静音阈值: RMS < 50 视为 AI 不说话, 跳过 aec_process passthrough + // 否则 AEC 自适应滤波器在 AI 静音后仍维持之前学的 echo 模式, 错误压制用户语音 + const int REF_SILENCE_RMS_THRESHOLD = 50; + while (processed + chunk <= n) { + GetDelayedRef(ref.data(), chunk); + // 计算本 chunk ref RMS + int64_t ref_chunk_sq = 0; + for (int i = 0; i < chunk; i++) { + int16_t r = ref[i]; + ref_chunk_sq += (int64_t)r * r; + ref_sq += (int64_t)r * r; + } + int ref_chunk_rms = (int)sqrt((double)ref_chunk_sq / chunk); + bool ref_silent = (ref_chunk_rms < REF_SILENCE_RMS_THRESHOLD); + if (ref_silent) { + // AI 不说话, mic_inout 保持原值 (passthrough); 仅累计 RMS 用于诊断 + for (int i = 0; i < chunk; i++) { + int16_t m = mic_inout[processed + i]; + mic_sq += (int64_t)m * m; + clean_sq += (int64_t)m * m; // clean == mic in passthrough + } + passthrough_chunks++; + } else { + // AI 正在说话, 调 aec_process 消除回声 + aec_process(static_cast(aec_handle_), + mic_inout.data() + processed, ref.data(), clean.data()); + for (int i = 0; i < chunk; i++) { + int16_t m = mic_inout[processed + i]; + int16_t c = clean[i]; + mic_sq += (int64_t)m * m; + clean_sq += (int64_t)c * c; + mic_inout[processed + i] = c; + } + } + processed += chunk; + } + // 🔬 RMS 诊断: 每 2 秒打印, 调优延迟参数 + 判断 AEC 效果 + static uint64_t last_rms_log_us = 0; + uint64_t now_us = esp_timer_get_time(); + if (now_us - last_rms_log_us > 2000000 && processed > 0) { + int mic_rms = (int)sqrt((double)mic_sq / processed); + int ref_rms = (int)sqrt((double)ref_sq / processed); + int clean_rms = (int)sqrt((double)clean_sq / processed); + ESP_LOGI(TAG, "🔬 AEC RMS mic=%d ref=%d clean=%d (AI 说话时 ref↑, clean 应接近 mic 静音; " + "用户说话时 mic↑ clean≈mic; delay=%d samples)", + mic_rms, ref_rms, clean_rms, aec_ref_delay_samples_); + last_rms_log_us = now_us; + } +} + void Application::CheckNewVersion() { // ESP_LOGI(TAG, "OTA版本检查已临时禁用"); // return; @@ -2308,6 +2491,9 @@ void Application::OnAudioOutput() { player_pipeline_set_src_rate(player_pipeline_, src_rate); int bytes = (int)(pcm.size() * sizeof(int16_t)); ESP_LOGD(TAG, "写入播放管道: 采样率=%d 字节=%d", src_rate, bytes); + // 路径 D'' AEC: 把 DAC PCM 推入 ref ring buffer 用于回声消除参考 + // ApplyAEC 从 ring buffer 取延迟后 ref, 跟 mic 对齐做回声消除 + AppendRefSamples(pcm.data(), (int)pcm.size()); player_pipeline_write(player_pipeline_, (char*)pcm.data(), bytes); #ifdef PHASE6_ENABLE_AUDIO_FALLBACK if (bytes > 0) { @@ -2320,6 +2506,8 @@ void Application::OnAudioOutput() { } } else { ESP_LOGD(TAG, "直接输出PCM到编解码器: 样本=%zu", pcm.size()); + // 路径 D'' AEC: 把 DAC PCM 推入 ref ring buffer 用于回声消除参考 + AppendRefSamples(pcm.data(), (int)pcm.size()); #if PHASE8_DIAG_ENABLE // Phase 8 DIAG-2: codec PCM 写入耗时(>15ms 阈值告警) int64_t _diag_t = esp_timer_get_time(); @@ -2518,12 +2706,21 @@ void Application::ReadAudio(std::vector& data, int sample_rate, int sam // 默认优先使用recorder管道读取(目标采样率16000),无参考通道需求 if (recorder_pipeline_ && sample_rate == 16000) { - int need_bytes = samples * (int)sizeof(int16_t); + // 路径 D'' AEC: lazy init aec_handle_, 启用时强制读 chunk_size 满足 aec_process 输入 + if (aec_handle_ == nullptr) { + InitAec(); + } + bool aec_active = (aec_handle_ != nullptr && aec_chunk_size_ > 0 && ref_ring_buf_ != nullptr); + // AEC 模式 target_samples = max(caller_samples, chunk_size), 保持 caller 期望的 PCM 帧大小 + // 避免上行 PCM 帧大小变化 (16ms → 服务端 ASR 不识别) 而非 baseline 的 20ms 帧 + // 实际处理: ApplyAEC 处理整数个 chunk, 剩余 samples ( out; - out.reserve(samples);// 预分配内存空间,避免后续动态扩容 + out.reserve(target_samples);// 预分配内存空间,避免后续动态扩容 std::vector buf(default_bytes);// 内存音频缓冲区,用于存储从录音管道读取的音频数据 - while ((int)out.size() < samples) { + while ((int)out.size() < target_samples) { int to_read = std::min(default_bytes, (need_bytes - (int)out.size() * (int)sizeof(int16_t)));// 计算本次读取的字节数,不超过默认读取大小和剩余需要读取的字节数 if (to_read <= 0) break;// 读取到的数据大小小于等于0,跳出循环 int got = recorder_pipeline_read(recorder_pipeline_, buf.data(), to_read);// 从录音管道读取音频数据,并赋值给内存音频缓冲区 @@ -2533,12 +2730,23 @@ void Application::ReadAudio(std::vector& data, int sample_rate, int sam } int got_samples = got / (int)sizeof(int16_t);// 计算本次读取的样本数,即读取到的字节数除以每个样本的字节数 int16_t* p = (int16_t*)buf.data();// 将内存音频缓冲区转换为int16_t指针,方便按样本读取 - for (int i = 0; i < got_samples && (int)out.size() < samples; ++i) { + for (int i = 0; i < got_samples && (int)out.size() < target_samples; ++i) { out.push_back(p[i]);// 将读取到的样本添加到输出向量中,直到达到预期样本数或读取完所有数据 } } if (!out.empty()) { data.assign(out.begin(), out.end());// 将输出向量中的数据赋值给输出参数data + // 路径 D'' AEC: 对 mic PCM 调 aec_process, 输出 clean PCM (in-place) + // 数据量 chunk_size (32ms) > caller 请求 (20/30ms), caller 用 data.size() 动态处理 + if (aec_active && (int)data.size() >= aec_chunk_size_) { + ApplyAEC(data); + static bool first_aec_logged = false; + if (!first_aec_logged) { + ESP_LOGI(TAG, "AEC 首包: 请求 samples=%d 实际 chunk=%d data.size=%zu", + samples, aec_chunk_size_, data.size()); + first_aec_logged = true; + } + } return; } } @@ -2574,6 +2782,31 @@ void Application::ReadAudio(std::vector& data, int sample_rate, int sam data = std::move(resampled); } } else { + // 路径 D'' AEC: codec mono 16-bit + 软件 loopback ref → aec_process → 输出 clean mono PCM + // lazy init: 首次走此路径时 aec_create (避免开机占内部 SRAM 影响 WiFi 启动) + // 按 chunk_size (512 samples @16kHz = 32ms) 读取, ApplyAEC in-place 处理 + // data.size() 变为 chunk_size, caller 用 data.size() 动态计算下游帧 (兼容) + if (aec_handle_ == nullptr) { + InitAec(); + } + if (aec_handle_ != nullptr && aec_chunk_size_ > 0 && ref_ring_buf_ != nullptr && + codec->input_channels() == 1 && codec->input_sample_rate() == sample_rate) { + int chunk = aec_chunk_size_; + data.resize(chunk); + if (!codec->InputData(data)) { + ESP_LOGW(TAG, "🎙️ 麦克风采样失败 (AEC 路径)"); + return; + } + ApplyAEC(data); // in-place 修改 data: mic PCM → clean PCM + static bool first_aec_logged = false; + if (!first_aec_logged) { + ESP_LOGI(TAG, "AEC 首包: 请求 samples=%d 实际 chunk=%d data.size=%zu", + samples, chunk, data.size()); + first_aec_logged = true; + } + return; + } + // 非 AEC 模式: baseline 直读 (AEC 初始化失败 / codec 配置不匹配时回退) data.resize(samples); if (!codec->InputData(data)) { ESP_LOGW(TAG, "🎙️ 麦克风采样失败(直读路径),未收到输入数据"); diff --git a/main/application.h b/main/application.h index 29edc03..a0c4df0 100644 --- a/main/application.h +++ b/main/application.h @@ -204,6 +204,25 @@ private: player_pipeline_handle_t player_pipeline_ = nullptr; recorder_pipeline_handle_t recorder_pipeline_ = nullptr; + // 路径 D'' AEC: esp_aec.h 底层同步 API + 软件 loopback ref + // codec 保持 baseline 1ch 16-bit (MIC1|MIC2 ES7210 内部混合 mono) + // DAC 输出 PCM 同步复制到 ref_ring_buf, ReadAudio 调 aec_process(mic, delayed_ref) → clean + // ⚠️ portMUX (spinlock) 会禁用本核中断, 与 WiFi 协议栈 pm_coex_set_reconnect_policy 冲突 + // 实测引发 IllegalInstruction panic。改用 FreeRTOS mutex (不禁中断, 仅 task 间互斥) + void *aec_handle_ = nullptr; // aec_handle_t* (避免暴露 esp_aec.h 类型) + int aec_chunk_size_ = 0; // aec_get_chunksize 返回 (16k 通常 256 samples = 16ms) + int16_t *ref_ring_buf_ = nullptr; // PSRAM 上分配 ~200ms ref ring buffer + int ref_ring_capacity_ = 0; + int ref_ring_write_idx_ = 0; + int ref_ring_filled_ = 0; + int aec_ref_delay_samples_ = 800; // 延迟补偿 samples (默认 50ms @16kHz, 调优范围 30-80ms) + SemaphoreHandle_t ref_ring_mutex_ = nullptr; + void InitAec(); + void DeinitAec(); + void AppendRefSamples(const int16_t *pcm, int samples); // OnAudioOutput 调用, DAC PCM 推入 ring buffer + void GetDelayedRef(int16_t *ref_out, int samples); // ApplyAEC 内部使用, 取延迟后 ref + void ApplyAEC(std::vector& mic_inout); // ReadAudio 调用, in-place 处理 mic → clean + void MainLoop();// 主事件循环 void OnAudioInput();// 音频输入回调 void OnAudioOutput();// 音频输出回调