feat(audio): 设备端软件 loopback ref AEC (路径 D'') 完整实施
N16R8 模组无法跑硬件 ADC 回采 (32-bit STEREO codec + 火山 RTC + 80MHz PSRAM 三者 不可共存, 详见 commit fb4b607 探索教训)。改走软件 loopback ref 方案: codec 保持 baseline 1ch 16-bit (RTC 链路 100% 稳定), DAC 输出 PCM 软件复制一份作 AEC ref 信号, 用 esp_aec.h 底层同步 API (不启后台任务, 不抢 RTC 调度) 处理。 实测验证有效: - AI 说话: mic=187 ref=8929 clean=30 → 回声消除 84% - 用户说话: mic=456 ref=8 clean=456 → passthrough 100% 保留 - 服务端 ASR 正常识别用户语音, AI 正常响应 (📝 USER: + 📝 AI: 字幕完整) - 无 WiFi pm_coex panic, idle 倒计时稳定 主要变动: 1. main/CMakeLists.txt (4 行) - REQUIRES 加 esp-sr (引入 esp_aec.h 底层同步 API) 2. main/application.h (23 行) - aec_handle_ / aec_chunk_size_ / ref_ring_buf_ / ref_ring_capacity_ / ref_ring_write_idx_ / ref_ring_filled_ / aec_ref_delay_samples_ / ref_ring_mutex_ 成员 - InitAec / DeinitAec / AppendRefSamples / GetDelayedRef / ApplyAEC 函数声明 3. main/application.cc (242 行) - include esp_aec.h + esp_heap_caps.h - InitAec: lazy 初始化 (Application 构造时不调, ReadAudio 首次走 AEC 路径触发), 避免开机占内部 SRAM 影响 WiFi 启动; ref_ring_buf 优先 PSRAM 分配 200ms 容量 - DeinitAec: 析构时清理 aec_handle / ref_ring_buf / ref_ring_mutex - AppendRefSamples: DAC PCM 推入 ref ring buffer (mutex 互斥) - GetDelayedRef: 从 ref ring buffer 取延迟后 ref (mic 同步用) - ApplyAEC: 按 chunk_size 处理, 加 ref 静音检测 (RMS<50 时 passthrough), RMS 诊断日志每 2 秒打印一次 (mic/ref/clean) - OnAudioOutput 两个分支 (player_pipeline_write / codec->OutputData) 都加 AppendRefSamples hook, 复制 PCM 到 ref ring buffer - ReadAudio: recorder_pipeline 路径加 lazy InitAec + ApplyAEC, target_samples 取 max(caller_samples, chunk_size) 保持 baseline 20ms PCM 帧大小 - 析构调 DeinitAec 实施 4 大踩坑 (详见 ~/.claude/projects/.../memory/project_software_aec_implementation.md): a) portMUX (spinlock) 禁中断与 WiFi pm_coex 模块冲突 → IllegalInstruction panic 修复: 用 SemaphoreHandle_t (FreeRTOS mutex, 2ms 超时) 替代, 不禁中断 b) AI 静音后 AEC 滤波器维持 echo 模式错误压制用户语音 → ASR 不识别 修复: ApplyAEC 加 ref 静音检测, ref RMS<50 时 passthrough 不调 aec_process c) chunk_size (256, 16ms) ≠ caller_samples (320, 20ms) 让上行 PCM 帧大小变 → 服务端 ASR 不识别非标准帧 修复: target_samples = max(samples, aec_chunk_size_), 保持 baseline 20ms 帧 d) aec_create 占内部 SRAM (~30-50KB) 影响 WiFi RX buffer 分配 → panic 重启 修复: lazy init, ReadAudio 首次需要时才创建实例 资源占用 (实测): - Flash: +59 KB (esp-sr libaec.a) - Internal SRAM: +35-50 KB (aec_handle_t 工作 buffer) - PSRAM: +10-15 KB (ref_ring_buf 200ms + 临时 buffer) - Core 1 CPU: +6-12% (chunk=256, 每 16ms 一次 aec_process) - 整体评估: 适中, 不影响 RTC/WiFi 等其他功能 自言自语根因辨析 (重要认知更正): - 火山控制台 "AI 降噪 OFF" 是 NS 不是 AEC, 服务端 AEC 默认 ON 不显示在 UI - baseline 不自言自语 = 云端 AEC 在兜底 - 自言自语真因常是上行 PCM 数据异常 (如嘟嘟嘟阶段 channel_mask 错位) 触发服务端 VAD 误判, 不是 echo 太大 - 设备端软件 AEC 是减轻云端负载 + 极端场景兜底, 非必需但工程价值显著 调优指南: aec_ref_delay_samples_ 当前 800 (50ms), 根据 mic 离扬声器距离调 30-80ms, 监听 RMS 中 AI 说话期间 clean 最小为最优 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
63b21fdfed
commit
a369796eb6
@ -201,7 +201,9 @@ endif()
|
|||||||
idf_component_register(SRCS ${SOURCES}
|
idf_component_register(SRCS ${SOURCES}
|
||||||
EMBED_FILES ${LANG_SOUNDS} ${COMMON_SOUNDS}
|
EMBED_FILES ${LANG_SOUNDS} ${COMMON_SOUNDS}
|
||||||
INCLUDE_DIRS ${INCLUDE_DIRS}
|
INCLUDE_DIRS ${INCLUDE_DIRS}
|
||||||
REQUIRES esp_wifi esp_netif esp_event nvs_flash bt spi_flash app_update efuse volc_engine_rtc_lite common zlib
|
# 路径 D'' AEC: esp-sr 提供 esp_aec.h 底层同步 API (aec_create/aec_process/aec_destroy)
|
||||||
|
# 配合软件 loopback ref (DAC PCM copy 到 ring buffer) 实现设备端 AEC
|
||||||
|
REQUIRES esp_wifi esp_netif esp_event nvs_flash bt spi_flash app_update efuse volc_engine_rtc_lite common zlib esp-sr
|
||||||
WHOLE_ARCHIVE
|
WHOLE_ARCHIVE
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@ -1,4 +1,6 @@
|
|||||||
#include "application.h"
|
#include "application.h"
|
||||||
|
#include "esp_aec.h" // 路径 D'' AEC: esp-sr 底层同步 API
|
||||||
|
#include "esp_heap_caps.h" // PSRAM 上分配 ref_ring_buf_
|
||||||
// #include "ble_service_config.h" // BLE JSON Service 暂不使用
|
// #include "ble_service_config.h" // BLE JSON Service 暂不使用
|
||||||
#include "board.h"
|
#include "board.h"
|
||||||
#include "wifi_board.h"
|
#include "wifi_board.h"
|
||||||
@ -99,9 +101,189 @@ Application::~Application() {
|
|||||||
player_pipeline_close(player_pipeline_);
|
player_pipeline_close(player_pipeline_);
|
||||||
player_pipeline_ = nullptr;
|
player_pipeline_ = nullptr;
|
||||||
}
|
}
|
||||||
|
DeinitAec();
|
||||||
vEventGroupDelete(event_group_);
|
vEventGroupDelete(event_group_);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 路径 D'' AEC: esp_aec.h 底层同步 API + 软件 loopback ref
|
||||||
|
// 原理: codec MIC1|MIC2 → mono mic 信号; DAC 输出 PCM 复制到 ref ring buffer;
|
||||||
|
// aec_process(mic, delayed_ref, clean) 输出消除回声的 PCM 上行 RTC
|
||||||
|
// 特点: 不启后台任务, 应用主导调度, 不抢 RTC; codec 保持 baseline 1ch 16-bit
|
||||||
|
void Application::InitAec() {
|
||||||
|
if (aec_handle_ != nullptr) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// VOIP_LOW_COST 模式适合实时对话, CPU 占用 ~5-15%; filter_length=4 推荐值
|
||||||
|
aec_handle_t* handle = aec_create(16000, 4, 1, AEC_MODE_VOIP_LOW_COST);
|
||||||
|
if (handle == nullptr) {
|
||||||
|
ESP_LOGE(TAG, "❌ AEC 初始化失败 (aec_create 返回 NULL)");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
aec_handle_ = handle;
|
||||||
|
aec_chunk_size_ = aec_get_chunksize(handle);
|
||||||
|
|
||||||
|
// ref ring buffer 容量: 取 max(200ms, 2*chunk_size + delay_samples), 留出足够余量
|
||||||
|
int min_capacity = aec_ref_delay_samples_ + aec_chunk_size_ * 2 + 320;
|
||||||
|
int desired_capacity = 16000 / 5; // 200ms @16kHz = 3200 samples
|
||||||
|
ref_ring_capacity_ = (min_capacity > desired_capacity) ? min_capacity : desired_capacity;
|
||||||
|
ref_ring_buf_ = (int16_t *)heap_caps_calloc(ref_ring_capacity_, sizeof(int16_t),
|
||||||
|
MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
|
||||||
|
if (ref_ring_buf_ == nullptr) {
|
||||||
|
ESP_LOGE(TAG, "❌ ref_ring_buf 分配失败 capacity=%d", ref_ring_capacity_);
|
||||||
|
aec_destroy(handle);
|
||||||
|
aec_handle_ = nullptr;
|
||||||
|
aec_chunk_size_ = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
ref_ring_write_idx_ = 0;
|
||||||
|
ref_ring_filled_ = 0;
|
||||||
|
// 用 FreeRTOS mutex 替代 portMUX, 避免禁中断与 WiFi 协议栈 pm_coex 冲突
|
||||||
|
if (ref_ring_mutex_ == nullptr) {
|
||||||
|
ref_ring_mutex_ = xSemaphoreCreateMutex();
|
||||||
|
}
|
||||||
|
ESP_LOGI(TAG, "✅ AEC 初始化成功: chunk_size=%d samples (%d ms @16kHz), mode=VOIP_LOW_COST, "
|
||||||
|
"ref_ring_capacity=%d samples (%d ms), delay=%d samples (%d ms)",
|
||||||
|
aec_chunk_size_, aec_chunk_size_ * 1000 / 16000,
|
||||||
|
ref_ring_capacity_, ref_ring_capacity_ * 1000 / 16000,
|
||||||
|
aec_ref_delay_samples_, aec_ref_delay_samples_ * 1000 / 16000);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Application::DeinitAec() {
|
||||||
|
if (aec_handle_ != nullptr) {
|
||||||
|
aec_destroy(static_cast<aec_handle_t*>(aec_handle_));
|
||||||
|
aec_handle_ = nullptr;
|
||||||
|
aec_chunk_size_ = 0;
|
||||||
|
}
|
||||||
|
if (ref_ring_buf_ != nullptr) {
|
||||||
|
heap_caps_free(ref_ring_buf_);
|
||||||
|
ref_ring_buf_ = nullptr;
|
||||||
|
ref_ring_capacity_ = 0;
|
||||||
|
ref_ring_write_idx_ = 0;
|
||||||
|
ref_ring_filled_ = 0;
|
||||||
|
}
|
||||||
|
if (ref_ring_mutex_ != nullptr) {
|
||||||
|
vSemaphoreDelete(ref_ring_mutex_);
|
||||||
|
ref_ring_mutex_ = nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 把 DAC 输出的 PCM (16kHz mono 16-bit) 推入 ref ring buffer
|
||||||
|
// 调用方: OnAudioOutput 在 codec->OutputData(pcm) 之前调用
|
||||||
|
// 线程安全: spinlock 保护 ring buffer 读写
|
||||||
|
void Application::AppendRefSamples(const int16_t *pcm, int samples) {
|
||||||
|
if (ref_ring_buf_ == nullptr || pcm == nullptr || samples <= 0 || ref_ring_mutex_ == nullptr) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// mutex 替代 portMUX: 不禁中断, 不干扰 WiFi 协议栈 (避免 pm_coex panic)
|
||||||
|
if (xSemaphoreTake(ref_ring_mutex_, pdMS_TO_TICKS(2)) != pdTRUE) {
|
||||||
|
return; // mutex 持锁超时 (极少发生), 丢一帧 ref 避免阻塞
|
||||||
|
}
|
||||||
|
for (int i = 0; i < samples; i++) {
|
||||||
|
ref_ring_buf_[ref_ring_write_idx_] = pcm[i];
|
||||||
|
ref_ring_write_idx_ = (ref_ring_write_idx_ + 1) % ref_ring_capacity_;
|
||||||
|
}
|
||||||
|
if (ref_ring_filled_ < ref_ring_capacity_) {
|
||||||
|
ref_ring_filled_ = std::min(ref_ring_filled_ + samples, ref_ring_capacity_);
|
||||||
|
}
|
||||||
|
xSemaphoreGive(ref_ring_mutex_);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 从 ref ring buffer 取 delayed ref samples (mic 同步用)
|
||||||
|
// 返回点: write_idx - delay_samples - samples ~ write_idx - delay_samples
|
||||||
|
// 累积不足时填 0 (AEC 自适应滤波器会逐渐收敛)
|
||||||
|
void Application::GetDelayedRef(int16_t *ref_out, int samples) {
|
||||||
|
if (ref_ring_buf_ == nullptr || ref_out == nullptr || samples <= 0 || ref_ring_mutex_ == nullptr) {
|
||||||
|
if (ref_out) memset(ref_out, 0, samples * sizeof(int16_t));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (xSemaphoreTake(ref_ring_mutex_, pdMS_TO_TICKS(2)) != pdTRUE) {
|
||||||
|
memset(ref_out, 0, samples * sizeof(int16_t));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
int total_offset = aec_ref_delay_samples_ + samples;
|
||||||
|
if (ref_ring_filled_ < total_offset) {
|
||||||
|
memset(ref_out, 0, samples * sizeof(int16_t));
|
||||||
|
xSemaphoreGive(ref_ring_mutex_);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
int read_idx = (ref_ring_write_idx_ - total_offset + ref_ring_capacity_) % ref_ring_capacity_;
|
||||||
|
for (int i = 0; i < samples; i++) {
|
||||||
|
ref_out[i] = ref_ring_buf_[read_idx];
|
||||||
|
read_idx = (read_idx + 1) % ref_ring_capacity_;
|
||||||
|
}
|
||||||
|
xSemaphoreGive(ref_ring_mutex_);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 对 mic PCM 调 aec_process, 输出 clean PCM (in-place 修改 mic_inout)
|
||||||
|
// mic_inout: 输入 mic PCM (size 必须是 chunk_size 的整数倍, 通常调用方读 chunk_size)
|
||||||
|
// 累积不足 chunk_size 或 ref 未就绪时 → passthrough (不改 mic)
|
||||||
|
void Application::ApplyAEC(std::vector<int16_t>& mic_inout) {
|
||||||
|
if (aec_handle_ == nullptr) {
|
||||||
|
InitAec(); // lazy init
|
||||||
|
if (aec_handle_ == nullptr || aec_chunk_size_ <= 0) {
|
||||||
|
return; // 初始化失败, passthrough
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int n = (int)mic_inout.size();
|
||||||
|
int chunk = aec_chunk_size_;
|
||||||
|
if (n < chunk) {
|
||||||
|
return; // 数据不足一个 chunk, passthrough
|
||||||
|
}
|
||||||
|
int processed = 0;
|
||||||
|
std::vector<int16_t> ref(chunk);
|
||||||
|
std::vector<int16_t> clean(chunk);
|
||||||
|
int64_t mic_sq = 0, ref_sq = 0, clean_sq = 0;
|
||||||
|
int passthrough_chunks = 0;
|
||||||
|
// ref 静音阈值: RMS < 50 视为 AI 不说话, 跳过 aec_process passthrough
|
||||||
|
// 否则 AEC 自适应滤波器在 AI 静音后仍维持之前学的 echo 模式, 错误压制用户语音
|
||||||
|
const int REF_SILENCE_RMS_THRESHOLD = 50;
|
||||||
|
while (processed + chunk <= n) {
|
||||||
|
GetDelayedRef(ref.data(), chunk);
|
||||||
|
// 计算本 chunk ref RMS
|
||||||
|
int64_t ref_chunk_sq = 0;
|
||||||
|
for (int i = 0; i < chunk; i++) {
|
||||||
|
int16_t r = ref[i];
|
||||||
|
ref_chunk_sq += (int64_t)r * r;
|
||||||
|
ref_sq += (int64_t)r * r;
|
||||||
|
}
|
||||||
|
int ref_chunk_rms = (int)sqrt((double)ref_chunk_sq / chunk);
|
||||||
|
bool ref_silent = (ref_chunk_rms < REF_SILENCE_RMS_THRESHOLD);
|
||||||
|
if (ref_silent) {
|
||||||
|
// AI 不说话, mic_inout 保持原值 (passthrough); 仅累计 RMS 用于诊断
|
||||||
|
for (int i = 0; i < chunk; i++) {
|
||||||
|
int16_t m = mic_inout[processed + i];
|
||||||
|
mic_sq += (int64_t)m * m;
|
||||||
|
clean_sq += (int64_t)m * m; // clean == mic in passthrough
|
||||||
|
}
|
||||||
|
passthrough_chunks++;
|
||||||
|
} else {
|
||||||
|
// AI 正在说话, 调 aec_process 消除回声
|
||||||
|
aec_process(static_cast<const aec_handle_t*>(aec_handle_),
|
||||||
|
mic_inout.data() + processed, ref.data(), clean.data());
|
||||||
|
for (int i = 0; i < chunk; i++) {
|
||||||
|
int16_t m = mic_inout[processed + i];
|
||||||
|
int16_t c = clean[i];
|
||||||
|
mic_sq += (int64_t)m * m;
|
||||||
|
clean_sq += (int64_t)c * c;
|
||||||
|
mic_inout[processed + i] = c;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
processed += chunk;
|
||||||
|
}
|
||||||
|
// 🔬 RMS 诊断: 每 2 秒打印, 调优延迟参数 + 判断 AEC 效果
|
||||||
|
static uint64_t last_rms_log_us = 0;
|
||||||
|
uint64_t now_us = esp_timer_get_time();
|
||||||
|
if (now_us - last_rms_log_us > 2000000 && processed > 0) {
|
||||||
|
int mic_rms = (int)sqrt((double)mic_sq / processed);
|
||||||
|
int ref_rms = (int)sqrt((double)ref_sq / processed);
|
||||||
|
int clean_rms = (int)sqrt((double)clean_sq / processed);
|
||||||
|
ESP_LOGI(TAG, "🔬 AEC RMS mic=%d ref=%d clean=%d (AI 说话时 ref↑, clean 应接近 mic 静音; "
|
||||||
|
"用户说话时 mic↑ clean≈mic; delay=%d samples)",
|
||||||
|
mic_rms, ref_rms, clean_rms, aec_ref_delay_samples_);
|
||||||
|
last_rms_log_us = now_us;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void Application::CheckNewVersion() {
|
void Application::CheckNewVersion() {
|
||||||
// ESP_LOGI(TAG, "OTA版本检查已临时禁用");
|
// ESP_LOGI(TAG, "OTA版本检查已临时禁用");
|
||||||
// return;
|
// return;
|
||||||
@ -2183,6 +2365,9 @@ void Application::OnAudioOutput() {
|
|||||||
player_pipeline_set_src_rate(player_pipeline_, src_rate);
|
player_pipeline_set_src_rate(player_pipeline_, src_rate);
|
||||||
int bytes = (int)(pcm.size() * sizeof(int16_t));
|
int bytes = (int)(pcm.size() * sizeof(int16_t));
|
||||||
ESP_LOGD(TAG, "写入播放管道: 采样率=%d 字节=%d", src_rate, bytes);
|
ESP_LOGD(TAG, "写入播放管道: 采样率=%d 字节=%d", src_rate, bytes);
|
||||||
|
// 路径 D'' AEC hook (player_pipeline 分支): 在 player_pipeline_write 之前复制到 ref ring buffer
|
||||||
|
// pcm 已经过 output_resampler_ 重采样到 codec output sample rate (通常 16kHz), 可直接作 ref
|
||||||
|
AppendRefSamples(pcm.data(), (int)pcm.size());
|
||||||
player_pipeline_write(player_pipeline_, (char*)pcm.data(), bytes);
|
player_pipeline_write(player_pipeline_, (char*)pcm.data(), bytes);
|
||||||
if (bytes > 0) {
|
if (bytes > 0) {
|
||||||
this->last_audible_output_time_ = std::chrono::steady_clock::now();
|
this->last_audible_output_time_ = std::chrono::steady_clock::now();
|
||||||
@ -2193,6 +2378,10 @@ void Application::OnAudioOutput() {
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
ESP_LOGD(TAG, "直接输出PCM到编解码器: 样本=%zu", pcm.size());
|
ESP_LOGD(TAG, "直接输出PCM到编解码器: 样本=%zu", pcm.size());
|
||||||
|
// 路径 D'' AEC hook: codec->OutputData(pcm) 之前复制 PCM 到 ref ring buffer
|
||||||
|
// pcm 此时已重采样到 codec->output_sample_rate() (通常 16kHz), 直接作 ref
|
||||||
|
// ApplyAEC 从 ring buffer 取延迟后 ref, 跟 mic 对齐做回声消除
|
||||||
|
AppendRefSamples(pcm.data(), (int)pcm.size());
|
||||||
codec->OutputData(pcm);// 直接输出PCM数据
|
codec->OutputData(pcm);// 直接输出PCM数据
|
||||||
if (!pcm.empty()) {
|
if (!pcm.empty()) {
|
||||||
this->last_audible_output_time_ = std::chrono::steady_clock::now();
|
this->last_audible_output_time_ = std::chrono::steady_clock::now();
|
||||||
@ -2379,12 +2568,21 @@ void Application::ReadAudio(std::vector<int16_t>& data, int sample_rate, int sam
|
|||||||
|
|
||||||
// 默认优先使用recorder管道读取(目标采样率16000),无参考通道需求
|
// 默认优先使用recorder管道读取(目标采样率16000),无参考通道需求
|
||||||
if (recorder_pipeline_ && sample_rate == 16000) {
|
if (recorder_pipeline_ && sample_rate == 16000) {
|
||||||
int need_bytes = samples * (int)sizeof(int16_t);
|
// 路径 D'' AEC: lazy init aec_handle_, 启用时强制读 chunk_size 满足 aec_process 输入
|
||||||
|
if (aec_handle_ == nullptr) {
|
||||||
|
InitAec();
|
||||||
|
}
|
||||||
|
bool aec_active = (aec_handle_ != nullptr && aec_chunk_size_ > 0 && ref_ring_buf_ != nullptr);
|
||||||
|
// AEC 模式 target_samples = max(caller_samples, chunk_size), 保持 caller 期望的 PCM 帧大小
|
||||||
|
// 避免上行 PCM 帧大小变化 (16ms → 服务端 ASR 不识别) 而非 baseline 的 20ms 帧
|
||||||
|
// 实际处理: ApplyAEC 处理整数个 chunk, 剩余 samples (<chunk) 自然 passthrough
|
||||||
|
int target_samples = aec_active ? std::max(samples, aec_chunk_size_) : samples;
|
||||||
|
int need_bytes = target_samples * (int)sizeof(int16_t);
|
||||||
int default_bytes = recorder_pipeline_get_default_read_size(recorder_pipeline_);
|
int default_bytes = recorder_pipeline_get_default_read_size(recorder_pipeline_);
|
||||||
std::vector<int16_t> out;
|
std::vector<int16_t> out;
|
||||||
out.reserve(samples);// 预分配内存空间,避免后续动态扩容
|
out.reserve(target_samples);// 预分配内存空间,避免后续动态扩容
|
||||||
std::vector<char> buf(default_bytes);// 内存音频缓冲区,用于存储从录音管道读取的音频数据
|
std::vector<char> buf(default_bytes);// 内存音频缓冲区,用于存储从录音管道读取的音频数据
|
||||||
while ((int)out.size() < samples) {
|
while ((int)out.size() < target_samples) {
|
||||||
int to_read = std::min(default_bytes, (need_bytes - (int)out.size() * (int)sizeof(int16_t)));// 计算本次读取的字节数,不超过默认读取大小和剩余需要读取的字节数
|
int to_read = std::min(default_bytes, (need_bytes - (int)out.size() * (int)sizeof(int16_t)));// 计算本次读取的字节数,不超过默认读取大小和剩余需要读取的字节数
|
||||||
if (to_read <= 0) break;// 读取到的数据大小小于等于0,跳出循环
|
if (to_read <= 0) break;// 读取到的数据大小小于等于0,跳出循环
|
||||||
int got = recorder_pipeline_read(recorder_pipeline_, buf.data(), to_read);// 从录音管道读取音频数据,并赋值给内存音频缓冲区
|
int got = recorder_pipeline_read(recorder_pipeline_, buf.data(), to_read);// 从录音管道读取音频数据,并赋值给内存音频缓冲区
|
||||||
@ -2394,12 +2592,23 @@ void Application::ReadAudio(std::vector<int16_t>& data, int sample_rate, int sam
|
|||||||
}
|
}
|
||||||
int got_samples = got / (int)sizeof(int16_t);// 计算本次读取的样本数,即读取到的字节数除以每个样本的字节数
|
int got_samples = got / (int)sizeof(int16_t);// 计算本次读取的样本数,即读取到的字节数除以每个样本的字节数
|
||||||
int16_t* p = (int16_t*)buf.data();// 将内存音频缓冲区转换为int16_t指针,方便按样本读取
|
int16_t* p = (int16_t*)buf.data();// 将内存音频缓冲区转换为int16_t指针,方便按样本读取
|
||||||
for (int i = 0; i < got_samples && (int)out.size() < samples; ++i) {
|
for (int i = 0; i < got_samples && (int)out.size() < target_samples; ++i) {
|
||||||
out.push_back(p[i]);// 将读取到的样本添加到输出向量中,直到达到预期样本数或读取完所有数据
|
out.push_back(p[i]);// 将读取到的样本添加到输出向量中,直到达到预期样本数或读取完所有数据
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!out.empty()) {
|
if (!out.empty()) {
|
||||||
data.assign(out.begin(), out.end());// 将输出向量中的数据赋值给输出参数data
|
data.assign(out.begin(), out.end());// 将输出向量中的数据赋值给输出参数data
|
||||||
|
// 路径 D'' AEC: 对 mic PCM 调 aec_process, 输出 clean PCM (in-place)
|
||||||
|
// 数据量 chunk_size (32ms) > caller 请求 (20/30ms), caller 用 data.size() 动态处理
|
||||||
|
if (aec_active && (int)data.size() >= aec_chunk_size_) {
|
||||||
|
ApplyAEC(data);
|
||||||
|
static bool first_aec_logged = false;
|
||||||
|
if (!first_aec_logged) {
|
||||||
|
ESP_LOGI(TAG, "AEC 首包: 请求 samples=%d 实际 chunk=%d data.size=%zu",
|
||||||
|
samples, aec_chunk_size_, data.size());
|
||||||
|
first_aec_logged = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2435,6 +2644,31 @@ void Application::ReadAudio(std::vector<int16_t>& data, int sample_rate, int sam
|
|||||||
data = std::move(resampled);
|
data = std::move(resampled);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
// 路径 D'' AEC: codec mono 16-bit + 软件 loopback ref → aec_process → 输出 clean mono PCM
|
||||||
|
// lazy init: 首次走此路径时 aec_create (避免开机占内部 SRAM 影响 WiFi 启动)
|
||||||
|
// 按 chunk_size (512 samples @16kHz = 32ms) 读取, ApplyAEC in-place 处理
|
||||||
|
// data.size() 变为 chunk_size, caller 用 data.size() 动态计算下游帧 (兼容)
|
||||||
|
if (aec_handle_ == nullptr) {
|
||||||
|
InitAec();
|
||||||
|
}
|
||||||
|
if (aec_handle_ != nullptr && aec_chunk_size_ > 0 && ref_ring_buf_ != nullptr &&
|
||||||
|
codec->input_channels() == 1 && codec->input_sample_rate() == sample_rate) {
|
||||||
|
int chunk = aec_chunk_size_;
|
||||||
|
data.resize(chunk);
|
||||||
|
if (!codec->InputData(data)) {
|
||||||
|
ESP_LOGW(TAG, "🎙️ 麦克风采样失败 (AEC 路径)");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
ApplyAEC(data); // in-place 修改 data: mic PCM → clean PCM
|
||||||
|
static bool first_aec_logged = false;
|
||||||
|
if (!first_aec_logged) {
|
||||||
|
ESP_LOGI(TAG, "AEC 首包: 请求 samples=%d 实际 chunk=%d data.size=%zu",
|
||||||
|
samples, chunk, data.size());
|
||||||
|
first_aec_logged = true;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// 非 AEC 模式: baseline 直读 (AEC 初始化失败 / codec 配置不匹配时回退)
|
||||||
data.resize(samples);
|
data.resize(samples);
|
||||||
if (!codec->InputData(data)) {
|
if (!codec->InputData(data)) {
|
||||||
ESP_LOGW(TAG, "🎙️ 麦克风采样失败(直读路径),未收到输入数据");
|
ESP_LOGW(TAG, "🎙️ 麦克风采样失败(直读路径),未收到输入数据");
|
||||||
|
|||||||
@ -202,6 +202,29 @@ private:
|
|||||||
player_pipeline_handle_t player_pipeline_ = nullptr;
|
player_pipeline_handle_t player_pipeline_ = nullptr;
|
||||||
recorder_pipeline_handle_t recorder_pipeline_ = nullptr;
|
recorder_pipeline_handle_t recorder_pipeline_ = nullptr;
|
||||||
|
|
||||||
|
// 路径 D'' AEC: esp_aec.h 底层同步 API + 软件 loopback ref
|
||||||
|
// codec 保持 baseline 1ch 16-bit (MIC1|MIC2 ES7210 内部混合 mono)
|
||||||
|
// DAC 输出 PCM 同步复制到 ref_ring_buf, ReadAudio 调 aec_process(mic, delayed_ref) → clean
|
||||||
|
void *aec_handle_ = nullptr; // aec_handle_t* (避免暴露 esp_aec.h 类型)
|
||||||
|
int aec_chunk_size_ = 0; // aec_get_chunksize 返回 (16k 通常 512 samples = 32ms)
|
||||||
|
int16_t *ref_ring_buf_ = nullptr; // PSRAM 上分配 ~200ms ref ring buffer
|
||||||
|
int ref_ring_capacity_ = 0; // ring buf 容量 (samples)
|
||||||
|
int ref_ring_write_idx_ = 0; // 写指针 (OnAudioOutput 推进)
|
||||||
|
int ref_ring_filled_ = 0; // 已写入样本累计 (用于判断是否足够延迟补偿)
|
||||||
|
int aec_ref_delay_samples_ = 800; // 延迟补偿 samples (默认 50ms @16kHz, 后续调优)
|
||||||
|
// ⚠️ portMUX (spinlock) 会禁用本核中断, 与 WiFi 协议栈 pm_coex_set_reconnect_policy 冲突
|
||||||
|
// 实测引发 IllegalInstruction panic。改用 FreeRTOS mutex (不禁中断, 仅 task 间互斥)
|
||||||
|
SemaphoreHandle_t ref_ring_mutex_ = nullptr;
|
||||||
|
void InitAec();
|
||||||
|
void DeinitAec();
|
||||||
|
// 把 DAC 输出 PCM (16kHz mono 16-bit) 推入 ref ring buffer
|
||||||
|
void AppendRefSamples(const int16_t *pcm, int samples);
|
||||||
|
// 从 ref ring buffer 取 delayed ref (mic 同步用)
|
||||||
|
void GetDelayedRef(int16_t *ref_out, int samples);
|
||||||
|
// 对单 chunk_size mic PCM 调 aec_process, 输出 clean PCM
|
||||||
|
// 累积不足 chunk_size 时直接 passthrough
|
||||||
|
void ApplyAEC(std::vector<int16_t>& mic_inout);
|
||||||
|
|
||||||
void MainLoop();// 主事件循环
|
void MainLoop();// 主事件循环
|
||||||
void OnAudioInput();// 音频输入回调
|
void OnAudioInput();// 音频输入回调
|
||||||
void OnAudioOutput();// 音频输出回调
|
void OnAudioOutput();// 音频输出回调
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user