toy-hardware/esp-spot/example/adf/coze_websocket/main/audio_processor.c

/*
 * Espressif Modified MIT License
 *
 * Copyright (c) 2025 Espressif Systems (Shanghai) Co., LTD
 *
 * Permission is hereby granted for use **exclusively** with Espressif Systems products.
 * This includes the right to use, copy, modify, merge, publish, distribute, and sublicense
 * the Software, subject to the following conditions:
 *
 * 1. This Software **must be used in conjunction with Espressif Systems products**.
 * 2. The above copyright notice and this permission notice shall be included in all copies
 *    or substantial portions of the Software.
 * 3. Redistribution of the Software in source or binary form **for use with non-Espressif products**
 *    is strictly prohibited.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
 * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
 * PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE
 * FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 *
 * SPDX-License-Identifier: MIT-ESPRESSIF
 */

#include <stdio.h>

#include "esp_log.h"

#include "audio_pipeline.h"
#include "raw_stream.h"
#include "filter_resample.h"
#include "algorithm_stream.h"
#include "raw_stream.h"
#include "i2s_stream.h"
#include "raw_opus_decoder.h"
#include "audio_mem.h"
#include "board.h"

#include "audio_processor.h"

static char *TAG = "audio_processor";

struct audio_recorder_s {
    audio_element_handle_t  i2s_reader;
    audio_element_handle_t  raw_stream;
    audio_element_handle_t  algo_stream;
    audio_pipeline_handle_t pipeline;
};

struct audio_player_s {
    audio_element_handle_t  i2s_writer;
    audio_element_handle_t  raw_stream;
    audio_element_handle_t  filter;
    audio_element_handle_t  opus_decoder_stream;
    audio_pipeline_handle_t pipeline;
};

static int algo_read_data_callback(audio_element_handle_t self, char *buffer, int len, TickType_t ticks_to_wait, void *context)
{
    audio_element_handle_t i2s_reader = (audio_element_handle_t)context;
    return audio_element_input(i2s_reader, buffer, len);
}

audio_recorder_handle_t recorder_pipeline_open()
{
    struct audio_recorder_s *recorder = audio_calloc(1, sizeof(struct audio_recorder_s));
    if (recorder == NULL) {
        ESP_LOGE(TAG, "No mem for recorder");
        return NULL;
    }
    audio_pipeline_cfg_t pipeline_cfg = DEFAULT_AUDIO_PIPELINE_CONFIG();
    recorder->pipeline = audio_pipeline_init(&pipeline_cfg);
    assert(recorder->pipeline);

#if CONFIG_ESP32_S3_KORVO2_V3_BOARD || CONFIG_ESP32_S3_BOX_BOARD
    i2s_stream_cfg_t i2s_cfg = I2S_STREAM_CFG_DEFAULT_WITH_PARA(0, 16000, I2S_DATA_BIT_WIDTH_32BIT, AUDIO_STREAM_READER);
    i2s_stream_set_channel_type(&i2s_cfg, I2S_CHANNEL_TYPE_ONLY_LEFT);
#else
    i2s_stream_cfg_t i2s_cfg = I2S_STREAM_CFG_DEFAULT_WITH_PARA(0, 16000, I2S_DATA_BIT_WIDTH_16BIT, AUDIO_STREAM_READER);
#endif
    i2s_cfg.task_stack = -1;
    recorder->i2s_reader = i2s_stream_init(&i2s_cfg);
    assert(recorder->i2s_reader);

    algorithm_stream_cfg_t algo_config = ALGORITHM_STREAM_CFG_DEFAULT();
    algo_config.sample_rate = 16000;
    algo_config.out_rb_size = 26 * 1024;
    algo_config.task_core = 1;
#if CONFIG_ESP32_S3_KORVO2_V3_BOARD || CONFIG_ESP32_S3_BOX_BOARD
    algo_config.input_format = "RM";
    #else
    algo_config.input_format = "MR";
#endif
    recorder->algo_stream = algo_stream_init(&algo_config);
    assert(recorder->algo_stream);
    audio_element_set_music_info(recorder->algo_stream, 16000, 1, 16);
    audio_element_set_read_cb(recorder->algo_stream, algo_read_data_callback, (void *)recorder->i2s_reader);
    audio_element_set_input_timeout(recorder->algo_stream, portMAX_DELAY);

    raw_stream_cfg_t raw_cfg = RAW_STREAM_CFG_DEFAULT();
    recorder->raw_stream = raw_stream_init(&raw_cfg);
    assert(recorder->raw_stream);

    audio_pipeline_register(recorder->pipeline, recorder->algo_stream, "algo_stream");
    audio_pipeline_register(recorder->pipeline, recorder->raw_stream, "raw_read");

    const char *link_tag2[2] = {"algo_stream", "raw_read"};
    audio_pipeline_link(recorder->pipeline, &link_tag2[0], 2);

    return (audio_recorder_handle_t)recorder;
}

esp_err_t recorder_pipeline_run(audio_recorder_handle_t recorder)
{
    return audio_pipeline_run(recorder->pipeline);
}

esp_err_t recorder_pipeline_read(audio_recorder_handle_t recorder, char *buffer, int len)
{
    return raw_stream_read(recorder->raw_stream, buffer, len);
}

esp_err_t recorder_pipeline_stop(audio_recorder_handle_t recorder)
{
    audio_pipeline_stop(recorder->pipeline);
    audio_pipeline_wait_for_stop(recorder->pipeline);
    audio_pipeline_reset_elements(recorder->pipeline);
    audio_pipeline_reset_ringbuffer(recorder->pipeline);
    audio_pipeline_reset_items_state(recorder->pipeline);
    return ESP_OK;
}

esp_err_t recorder_pipeline_close(audio_recorder_handle_t recorder)
{
    audio_pipeline_terminate(recorder->pipeline);
    audio_pipeline_deinit(recorder->pipeline);
    return ESP_OK;
}

static int opus_audio_data_callback(audio_element_handle_t self, char *buffer, int len, TickType_t ticks_to_wait, void *context)
{
    audio_element_handle_t i2s_writer = (audio_element_handle_t)context;
    return audio_element_output(i2s_writer, buffer, len);
}

audio_player_handle_t player_pipeline_open()
{
    struct audio_player_s *player = audio_calloc(1, sizeof(struct audio_player_s));
    if (player == NULL) {
        ESP_LOGE(TAG, "No mem for player");
        return NULL;
    }
    audio_pipeline_cfg_t pipeline_cfg = DEFAULT_AUDIO_PIPELINE_CONFIG();
    player->pipeline = audio_pipeline_init(&pipeline_cfg);
    assert(player->pipeline);
#if CONFIG_ESP32_S3_KORVO2_V3_BOARD || CONFIG_ESP32_S3_BOX_BOARD
    i2s_stream_cfg_t i2s_cfg = I2S_STREAM_CFG_DEFAULT_WITH_PARA(0, 16000, I2S_DATA_BIT_WIDTH_32BIT, AUDIO_STREAM_WRITER);
    i2s_stream_set_channel_type(&i2s_cfg, I2S_CHANNEL_TYPE_ONLY_LEFT);
    i2s_cfg.need_expand = true;
#else
    i2s_stream_cfg_t i2s_cfg = I2S_STREAM_CFG_DEFAULT_WITH_PARA(0, 16000, I2S_DATA_BIT_WIDTH_16BIT, AUDIO_STREAM_WRITER);
#endif
    i2s_cfg.task_stack = -1;
    player->i2s_writer = i2s_stream_init(&i2s_cfg);
    assert(player->i2s_writer);

    raw_stream_cfg_t raw_cfg = RAW_STREAM_CFG_DEFAULT();
    player->raw_stream = raw_stream_init(&raw_cfg);
    assert(player->raw_stream);

    raw_opus_dec_cfg_t opus_dec_cfg = RAW_OPUS_DEC_CONFIG_DEFAULT();
    opus_dec_cfg.enable_frame_length_prefix = true;
    opus_dec_cfg.sample_rate = 16000;
    opus_dec_cfg.channels = 1;
    opus_dec_cfg.task_core = 1;
    player->opus_decoder_stream = raw_opus_decoder_init(&opus_dec_cfg);
    assert(player->opus_decoder_stream);

    rsp_filter_cfg_t filter_cfg = DEFAULT_RESAMPLE_FILTER_CONFIG();
    filter_cfg.src_ch = 1;
    filter_cfg.src_rate = 16000;
#if CONFIG_ESP32_S3_KORVO2_V3_BOARD || CONFIG_ESP32_S3_BOX_BOARD
    filter_cfg.dest_ch = 1;
#else
    filter_cfg.dest_ch = 2;
#endif
    filter_cfg.dest_rate = 16000;
    filter_cfg.stack_in_ext = true;
    filter_cfg.task_core = 1;
    filter_cfg.complexity = 2;
    player->filter = rsp_filter_init(&filter_cfg);
    assert(player->filter);
    audio_element_set_write_cb(player->filter, opus_audio_data_callback, (void *)player->i2s_writer);

    audio_pipeline_register(player->pipeline, player->raw_stream, "raw_stream");
    audio_pipeline_register(player->pipeline, player->opus_decoder_stream, "raw_opus");
    audio_pipeline_register(player->pipeline, player->filter, "filter");

    const char *link_tag[3] = {"raw_stream", "raw_opus", "filter"};
    audio_pipeline_link(player->pipeline, &link_tag[0], 3);
    return (audio_player_handle_t)player;
}

esp_err_t player_pipeline_run(audio_player_handle_t player)
{
    return audio_pipeline_run(player->pipeline);
}

esp_err_t player_pipeline_stop(audio_player_handle_t player)
{
    audio_pipeline_stop(player->pipeline);
    audio_pipeline_wait_for_stop(player->pipeline);
    audio_pipeline_reset_elements(player->pipeline);
    audio_pipeline_reset_ringbuffer(player->pipeline);
    audio_pipeline_reset_items_state(player->pipeline);
    return ESP_OK;
}

esp_err_t player_pipeline_write(audio_player_handle_t player, char *buffer, int len)
{
    return raw_stream_write(player->raw_stream, buffer, len);
}

esp_err_t player_pipeline_close(audio_player_handle_t player)
{
    audio_pipeline_terminate(player->pipeline);
    audio_pipeline_deinit(player->pipeline);
    return ESP_OK;
}