toy-Kapi_Rtc/main/audio_processing/audio_processor.h

#ifndef AUDIO_PROCESSOR_H
#define AUDIO_PROCESSOR_H

#include <esp_afe_sr_models.h>
#include <freertos/FreeRTOS.h>
#include <freertos/task.h>
#include <freertos/event_groups.h>

#include <string>
#include <vector>
#include <functional>
#include <chrono>

#include "audio_codec.h"

// 回声感知VAD优化参数结构
struct EchoAwareVadParams {
    float snr_threshold = 0.3f;        // 信噪比阈值
    int min_silence_ms = 200;          // 最小静音持续时间
    int interrupt_cooldown_ms = 500;   // 打断冷却时间
    bool adaptive_threshold = true;     // 是否启用自适应阈值

    // 自适应噪声抑制参数
    bool adaptive_noise_suppression = false;    // 是否启用自适应噪声抑制
    float noise_suppression_base = 2.0f;        // 基础噪声抑制强度
    float volume_sensitivity = 3.0f;            // 音量敏感度
    float echo_detection_threshold = 0.2f;      // 回声检测阈值
    float distance_estimation_factor = 2.0f;    // 距离估算因子
};

// 自适应噪声状态结构
struct AdaptiveNoiseState {
    float current_echo_strength = 0.0f;         // 当前回声强度
    float estimated_distance_factor = 1.0f;     // 估算的距离因子
    float dynamic_suppression_level = 1.0f;     // 动态抑制级别
    float noise_baseline = 0.0f;                // 噪声基准线
    bool high_interference_mode = false;        // 高干扰模式
    std::chrono::steady_clock::time_point last_adaptation_time; // 最后自适应时间
};

class AudioProcessor {
public:
    AudioProcessor();
    ~AudioProcessor();

    void Initialize(AudioCodec* codec, bool realtime_chat);
    void Feed(const std::vector<int16_t>& data);
    void Start();
    void Stop();
    bool IsRunning();
    void OnOutput(std::function<void(std::vector<int16_t>&& data)> callback);// 输出回调函数，用于将处理后的音频数据发送到外部
    void OnVadStateChange(std::function<void(bool speaking)> callback);
    void OnSimpleVadStateChange(std::function<void(bool speaking)> callback);  // 简单VAD回调，用于普通业务
    size_t GetFeedSize();

    // 新增：回声感知VAD优化接口
    void SetEchoAwareParams(const EchoAwareVadParams& params);
    void SetSpeakerVolume(float volume); // 动态调整VAD阈值
    bool IsEchoSuppressed() const;       // 检查AEC抑制状态

private:
    EventGroupHandle_t event_group_ = nullptr;
    const esp_afe_sr_iface_t* afe_iface_ = nullptr;
    esp_afe_sr_data_t* afe_data_ = nullptr;
    std::function<void(std::vector<int16_t>&& data)> output_callback_;
    std::function<void(bool speaking)> vad_state_change_callback_;          // 复杂VAD回调（语音打断专用）
    std::function<void(bool speaking)> simple_vad_state_change_callback_;   // 简单VAD回调（普通业务）
    AudioCodec* codec_ = nullptr;
    bool is_speaking_ = false;

    // 新增：回声感知优化相关成员
    EchoAwareVadParams echo_params_;
    float current_speaker_volume_ = 1.0f;
    std::chrono::steady_clock::time_point last_interrupt_time_;
    bool aec_converged_ = false;

    // 自适应噪声抑制相关成员
    AdaptiveNoiseState adaptive_state_;
    bool adaptive_enabled_ = false;

    void AudioProcessorTask();
    bool EvaluateSpeechWithEchoAwareness(afe_fetch_result_t* fetch_result); // 回声感知语音评估

    // 自适应噪声抑制方法
    void UpdateAdaptiveNoiseState(const int16_t* audio_data, size_t sample_count);
    float CalculateEchoStrength(const int16_t* audio_data, size_t sample_count);
    float EstimateDistanceFactor(float echo_strength, float volume);
    void AdaptSuppressionLevel();
    AdaptiveNoiseState GetAdaptiveState() const;
};

#endif