toy-hardware/main/audio_processing/audio_processor.h

93 lines
3.6 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#ifndef AUDIO_PROCESSOR_H
#define AUDIO_PROCESSOR_H
#include <esp_afe_sr_models.h>
#include <freertos/FreeRTOS.h>
#include <freertos/task.h>
#include <freertos/event_groups.h>
#include <string>
#include <vector>
#include <functional>
#include <chrono>
#include "audio_codec.h"
// 回声感知VAD优化参数结构
struct EchoAwareVadParams {
float snr_threshold = 0.3f; // 信噪比阈值
int min_silence_ms = 200; // 最小静音持续时间
int interrupt_cooldown_ms = 500; // 打断冷却时间
bool adaptive_threshold = true; // 是否启用自适应阈值
// 自适应噪声抑制参数
bool adaptive_noise_suppression = false; // 是否启用自适应噪声抑制
float noise_suppression_base = 2.0f; // 基础噪声抑制强度
float volume_sensitivity = 3.0f; // 音量敏感度
float echo_detection_threshold = 0.2f; // 回声检测阈值
float distance_estimation_factor = 2.0f; // 距离估算因子
};
// 自适应噪声状态结构
struct AdaptiveNoiseState {
float current_echo_strength = 0.0f; // 当前回声强度
float estimated_distance_factor = 1.0f; // 估算的距离因子
float dynamic_suppression_level = 1.0f; // 动态抑制级别
float noise_baseline = 0.0f; // 噪声基准线
bool high_interference_mode = false; // 高干扰模式
std::chrono::steady_clock::time_point last_adaptation_time; // 最后自适应时间
};
class AudioProcessor {
public:
AudioProcessor();
~AudioProcessor();
void Initialize(AudioCodec* codec, bool realtime_chat);
void Feed(const std::vector<int16_t>& data);
void Start();
void Stop();
bool IsRunning();
void OnOutput(std::function<void(std::vector<int16_t>&& data)> callback);
void OnVadStateChange(std::function<void(bool speaking)> callback);
void OnSimpleVadStateChange(std::function<void(bool speaking)> callback); // 简单VAD回调用于普通业务
size_t GetFeedSize();
// 新增回声感知VAD优化接口
void SetEchoAwareParams(const EchoAwareVadParams& params);
void SetSpeakerVolume(float volume); // 动态调整VAD阈值
bool IsEchoSuppressed() const; // 检查AEC抑制状态
private:
EventGroupHandle_t event_group_ = nullptr;
esp_afe_sr_iface_t* afe_iface_ = nullptr;
esp_afe_sr_data_t* afe_data_ = nullptr;
std::function<void(std::vector<int16_t>&& data)> output_callback_;
std::function<void(bool speaking)> vad_state_change_callback_; // 复杂VAD回调语音打断专用
std::function<void(bool speaking)> simple_vad_state_change_callback_; // 简单VAD回调普通业务
AudioCodec* codec_ = nullptr;
bool is_speaking_ = false;
// 新增:回声感知优化相关成员
EchoAwareVadParams echo_params_;
float current_speaker_volume_ = 1.0f;
std::chrono::steady_clock::time_point last_interrupt_time_;
bool aec_converged_ = false;
// 自适应噪声抑制相关成员
AdaptiveNoiseState adaptive_state_;
bool adaptive_enabled_ = false;
void AudioProcessorTask();
bool EvaluateSpeechWithEchoAwareness(afe_fetch_result_t* fetch_result); // 回声感知语音评估
// 自适应噪声抑制方法
void UpdateAdaptiveNoiseState(const int16_t* audio_data, size_t sample_count);
float CalculateEchoStrength(const int16_t* audio_data, size_t sample_count);
float EstimateDistanceFactor(float echo_strength, float volume);
void AdaptSuppressionLevel();
AdaptiveNoiseState GetAdaptiveState() const;
};
#endif