#pragma once #include "stdint.h" #include "dl_lib_convq_queue.h" #ifdef __cplusplus extern "C" { #endif //Opaque model data container typedef struct model_iface_data_t model_iface_data_t; /** * @brief The state of wakeup */ typedef enum { WAKENET_NO_DETECT = 0, // wake word is not detected WAKENET_CHANNEL_VERIFIED = -1, // output channel is verified WAKENET_DETECTED = 1 // wake word is detected } wakenet_state_t; //Set wake words recognition operating mode //The probability of being wake words is increased with increasing mode, //As a consequence also the false alarm rate goes up typedef enum { DET_MODE_90 = 0, // Normal DET_MODE_95 = 1, // Aggressive DET_MODE_2CH_90 = 2, DET_MODE_2CH_95 = 3, DET_MODE_3CH_90 = 4, DET_MODE_3CH_95 = 5, DET_MODE_90_COPY_PARAMS = 6, // Aggressive } det_mode_t; typedef struct { int wake_word_num; //The number of all wake words char **wake_word_list; //The name list of wake words } wake_word_info_t; /** * @brief Easy function type to initialze a model instance with a detection mode and specified wake word coefficient * * @param model_name The specified wake word model coefficient * @param det_mode The wake words detection mode to trigger wake words, DET_MODE_90 or DET_MODE_95 * @returns Handle to the model data */ typedef model_iface_data_t* (*esp_wn_iface_op_create_t)(const void *model_name, det_mode_t det_mode); /** * @brief Get the amount of samples that need to be passed to the detect function * * Every speech recognition model processes a certain number of samples at the same time. This function * can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes. * * @param model The model object to query * @return The amount of samples to feed the detect function */ typedef int (*esp_wn_iface_op_get_samp_chunksize_t)(model_iface_data_t *model); /** * @brief Get the channel number of samples that need to be passed to the detect function * * Every speech recognition model processes a certain number of samples at the same time. This function * can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes. * * @param model The model object to query * @return The amount of samples to feed the detect function */ typedef int (*esp_wn_iface_op_get_channel_num_t)(model_iface_data_t *model); /** * @brief Get the start point of wake word when one wake word is detected. * * @Warning: This function should be called when the channel index is verified. * The returned value is the number of samples from start point of wake word to detected point. * * @param model The model object to query * @return The number of samples from start point to detected point (end point) */ typedef int (*esp_wn_iface_op_get_start_point_t)(model_iface_data_t *model); /** * @brief Get the sample rate of the samples to feed to the detect function * * @param model The model object to query * @return The sample rate, in hz */ typedef int (*esp_wn_iface_op_get_samp_rate_t)(model_iface_data_t *model); /** * @brief Get the number of wake words * * @param model The model object to query * @returns the number of wake words */ typedef int (*esp_wn_iface_op_get_word_num_t)(model_iface_data_t *model); /** * @brief Get the name of wake word by index * * @Warning The index of wake word start with 1 * @param model The model object to query * @param word_index The index of wake word * @returns the detection threshold */ typedef char* (*esp_wn_iface_op_get_word_name_t)(model_iface_data_t *model, int word_index); /** * @brief Set the detection threshold to manually abjust the probability * * @param model The model object to query * @param det_treshold The threshold to trigger wake words, the range of det_threshold is 0.4~0.9999 * @param word_index The index of wake word * @return 0: setting failed, 1: setting success */ typedef int (*esp_wn_iface_op_set_det_threshold_t)(model_iface_data_t *model, float det_threshold, int word_index); /** * @brief Reset the threshold to its initial state * * @param model The model object to query * @return 0: setting failed, 1: setting success */ typedef int (*esp_wn_iface_op_reset_det_threshold_t)(model_iface_data_t *model); /** * @brief Get the wake word detection threshold of different modes * * @param model The model object to query * @param word_index The index of wake word * @returns the detection threshold */ typedef float (*esp_wn_iface_op_get_det_threshold_t)(model_iface_data_t *model, int word_index); /** * @brief Feed samples of an audio stream to the keyword detection model and detect if there is a keyword found. * * @Warning The index of wake word start with 1, 0 means no wake words is detected. * * @param model The model object to query * @param samples An array of 16-bit signed audio samples. The array size used can be queried by the * get_samp_chunksize function. * @return The index of wake words, return 0 if no wake word is detected, else the index of the wake words. */ typedef wakenet_state_t (*esp_wn_iface_op_detect_t)(model_iface_data_t *model, int16_t *samples); /** * @brief Get the volume gain * * @param model The model object to query * @param target_db The target dB to calculate volume gain * @returns the volume gain */ typedef float (*esp_wn_iface_op_get_vol_gain_t)(model_iface_data_t *model, float target_db); /** * @brief Get the triggered channel index. Channel index starts from zero * * @param model The model object to query * @return The channel index */ typedef int (*esp_wn_iface_op_get_triggered_channel_t)(model_iface_data_t *model); /** * @brief Clean all states of model * * @param model The model object to query */ typedef void (*esp_wn_iface_op_clean_t)(model_iface_data_t *model); /** * @brief Destroy a speech recognition model * * @param model Model object to destroy */ typedef void (*esp_wn_iface_op_destroy_t)(model_iface_data_t *model); /** * @brief Feed MFCC of an audio stream to the vad model and detect whether is * voice. * * @param model The model object to query * @param cq An array of 16-bit MFCC. * @return The index of wake words, return 0 if no wake word is detected, else * the index of the wake words. */ typedef wakenet_state_t (*esp_wn_iface_op_detect_mfcc_t)(model_iface_data_t *model, int16_t *samples, dl_convq_queue_t *cq); /** * @brief Get MFCC of an audio stream * * @param model The model object to query * @return MFCC data */ typedef dl_convq_queue_t* (*esp_wn_iface_op_get_mfcc_data_t)(model_iface_data_t *model); /** * This structure contains the functions used to do operations on a wake word detection model. */ typedef struct { esp_wn_iface_op_create_t create; esp_wn_iface_op_get_start_point_t get_start_point; esp_wn_iface_op_get_samp_chunksize_t get_samp_chunksize; esp_wn_iface_op_get_channel_num_t get_channel_num; esp_wn_iface_op_get_samp_rate_t get_samp_rate; esp_wn_iface_op_get_word_num_t get_word_num; esp_wn_iface_op_get_word_name_t get_word_name; esp_wn_iface_op_set_det_threshold_t set_det_threshold; esp_wn_iface_op_reset_det_threshold_t reset_det_threshold; esp_wn_iface_op_get_det_threshold_t get_det_threshold; esp_wn_iface_op_get_triggered_channel_t get_triggered_channel; esp_wn_iface_op_get_vol_gain_t get_vol_gain; esp_wn_iface_op_detect_t detect; esp_wn_iface_op_detect_mfcc_t detect_mfcc; esp_wn_iface_op_get_mfcc_data_t get_mfcc_data; esp_wn_iface_op_clean_t clean; esp_wn_iface_op_destroy_t destroy; } esp_wn_iface_t; #ifdef __cplusplus } #endif