Rdzleo e95d0c414e Phase 01 批次 1-3: 单摄像头人脸追踪基础设施
实现 ESP32-S3 上单摄像头人脸追踪的核心代码骨架,替代 Grove Vision AI V2
模块,通过 UART 发送人脸坐标驱动 RP2040 控制的眼球/YAW 舵机。

## 规划文档(docs/phase-01-face-tracking/)

- GOAL.md       Phase 目标与 5 大成功标准
- RESEARCH.md   esp-dl v3.2/3.3 + human_face_detect 0.4.1 技术调研
- PLAN.md       15 个原子任务的执行计划(T01-T15)
- PLAN_CHECK.md 计划审查报告(PASS_WITH_NOTES)
- PROGRESS.md   执行进度追踪(批次 1-3 已完成)

## 批次 1:依赖与开关(T01-T03)

- main/idf_component.yml
  新增 esp-dl ~3.3.0 + human_face_detect 0.4.1(仅 S3/P4)
  esp-sr 从 ~2.2.0 升级到 ~2.3.1,解决 esp-dsp 1.6/1.7 版本冲突
- main/Kconfig.projbuild
  新增 CONFIG_XIAOZHI_ENABLE_FACE_TRACKING 开关(默认 y,depends on S3)
  新增 CONFIG_XIAOZHI_FACE_TRACKING_FPS_CHOICE(5/10/15)
- main/boards/common/esp32_camera.{h,cc}
  新增 ProbeFrameCapture() 最小 V4L2 DQBUF/QBUF 探针(T01)
- main/application.cc
  Start() 末尾调用 probe 验证摄像头硬件链路

## 批次 2:人脸检测核心(T04-T06)

- main/boards/common/esp32_camera.{h,cc}
  新增 FrameRef 结构体 + CaptureForDetection/ReleaseDetectionFrame
  双超时 mutex 策略:face_tracker 10ms timeout 跳帧,Capture() RAII guard
- main/face_tracker.{h,cc}(新建)
  Core 0 / 优先级 2 / 栈 8KB 独立任务
  集成 esp-dl HumanFaceDetect 推理
  坐标归一化 cx*224/W-112,匹配 RP2040 pixel_centre=112
  多人脸遍历挑 score 最高,避免多脸时眼球摇摆
  三重保护:Kconfig depends on S3 + 源文件 #if 守卫 + CMake 条件排除
- main/CMakeLists.txt
  非 S3 目标从 SOURCES 移除 face_tracker.cc

## 批次 3:UART 协议扩展(T07)

- main/uart_component.{h,cc}
  新增 uart_send_face(x,y) 发送 face:x,y\r\n 协议
  extern "C" 链接名配合 face_tracker 的弱符号声明
  全局 TX mutex 保护所有 UART 写入,防并发帧交织
  uart_send_string 同步加锁保持一致性

## 编译验证

idf.py build 通过,固件 2.51MB / 剩余 1.46MB (36% free)
当前 face_tracker 未被 application 激活(留到 T11),
UART/摄像头现有功能零影响。

## 未完成(下次继续)

- T01 硬件 probe 实机验证
- T08-T10 RP2040 端 parse_face + facetrack 双数据源改造
- T11-T15 application 接入 + 端到端联调 + 性能调优 + 最终验收

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-17 18:24:27 +08:00

933 lines
34 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "application.h"
#include "board.h"
#include "display.h"
#include "system_info.h"
#include "audio_codec.h"
#include "mqtt_protocol.h"
#include "websocket_protocol.h"
#include "assets/lang_config.h"
#include "mcp_server.h"
#include "assets.h"
#include "settings.h"
#include <cstring>
#include <esp_log.h>
#include <cJSON.h>
#include <driver/gpio.h>
#include <arpa/inet.h>
#include <font_awesome.h>
#include <uart_component.h>
// [T01] 临时 probe验证 OV3660 + esp_video 底层采集链路Phase 01
// 仅在非 ESP32原版目标上可用——esp32_camera 组件本身也是这个守卫
#ifndef CONFIG_IDF_TARGET_ESP32
#include <esp_timer.h>
#include "boards/common/esp32_camera.h"
#endif
#define TAG "Application"
// ESP32 设备状态字符串,用于日志输出
// 其中 "idle"、"listening"、"speaking" 会通过 UART 发送给 RP2040 驱动舵机动画
static const char* const STATE_STRINGS[] = {
"unknown",
"starting",
"configuring",
"idle", // → RP2040 执行 state_sleep闭眼、耳朵收起
"connecting",
"listening", // → RP2040 执行 state_listen好奇姿态、眼球随机看
"speaking", // → RP2040 执行 state_speaking嘴巴开合、眼球随机看、身体摆动
"upgrading",
"activating",
"audio_testing",
"fatal_error",
"invalid_state"
};
Application::Application() {
event_group_ = xEventGroupCreate();
#if CONFIG_USE_DEVICE_AEC && CONFIG_USE_SERVER_AEC
#error "CONFIG_USE_DEVICE_AEC and CONFIG_USE_SERVER_AEC cannot be enabled at the same time"
#elif CONFIG_USE_DEVICE_AEC
aec_mode_ = kAecOnDeviceSide;
#elif CONFIG_USE_SERVER_AEC
aec_mode_ = kAecOnServerSide;
#else
aec_mode_ = kAecOff;
#endif
esp_timer_create_args_t clock_timer_args = {
.callback = [](void* arg) {
Application* app = (Application*)arg;
xEventGroupSetBits(app->event_group_, MAIN_EVENT_CLOCK_TICK);
},
.arg = this,
.dispatch_method = ESP_TIMER_TASK,
.name = "clock_timer",
.skip_unhandled_events = true
};
esp_timer_create(&clock_timer_args, &clock_timer_handle_);
}
Application::~Application() {
if (clock_timer_handle_ != nullptr) {
esp_timer_stop(clock_timer_handle_);
esp_timer_delete(clock_timer_handle_);
}
vEventGroupDelete(event_group_);
}
void Application::CheckAssetsVersion() {
auto& board = Board::GetInstance();
auto display = board.GetDisplay();
auto& assets = Assets::GetInstance();
if (!assets.partition_valid()) {
ESP_LOGW(TAG, "Assets partition is disabled for board %s", BOARD_NAME);
return;
}
Settings settings("assets", true);
// Check if there is a new assets need to be downloaded
std::string download_url = settings.GetString("download_url");
if (!download_url.empty()) {
settings.EraseKey("download_url");
char message[256];
snprintf(message, sizeof(message), Lang::Strings::FOUND_NEW_ASSETS, download_url.c_str());
Alert(Lang::Strings::LOADING_ASSETS, message, "cloud_arrow_down", Lang::Sounds::OGG_UPGRADE);
// Wait for the audio service to be idle for 3 seconds
vTaskDelay(pdMS_TO_TICKS(3000));
SetDeviceState(kDeviceStateUpgrading);
board.SetPowerSaveMode(false);
display->SetChatMessage("system", Lang::Strings::PLEASE_WAIT);
bool success = assets.Download(download_url, [display](int progress, size_t speed) -> void {
std::thread([display, progress, speed]() {
char buffer[32];
snprintf(buffer, sizeof(buffer), "%d%% %uKB/s", progress, speed / 1024);
display->SetChatMessage("system", buffer);
}).detach();
});
board.SetPowerSaveMode(true);
vTaskDelay(pdMS_TO_TICKS(1000));
if (!success) {
Alert(Lang::Strings::ERROR, Lang::Strings::DOWNLOAD_ASSETS_FAILED, "circle_xmark", Lang::Sounds::OGG_EXCLAMATION);
vTaskDelay(pdMS_TO_TICKS(2000));
return;
}
}
// Apply assets
assets.Apply();
display->SetChatMessage("system", "");
display->SetEmotion("microchip_ai");
uart_send_string("configuring");
}
void Application::CheckNewVersion(Ota& ota) {
const int MAX_RETRY = 10;
int retry_count = 0;
int retry_delay = 10; // 初始重试延迟为10秒
auto& board = Board::GetInstance();
while (true) {
SetDeviceState(kDeviceStateActivating);
auto display = board.GetDisplay();
display->SetStatus(Lang::Strings::CHECKING_NEW_VERSION);
esp_err_t err = ota.CheckVersion();
if (err != ESP_OK) {
retry_count++;
if (retry_count >= MAX_RETRY) {
ESP_LOGE(TAG, "Too many retries, exit version check");
return;
}
char error_message[128];
snprintf(error_message, sizeof(error_message), "code=%d, url=%s", err, ota.GetCheckVersionUrl().c_str());
char buffer[256];
snprintf(buffer, sizeof(buffer), Lang::Strings::CHECK_NEW_VERSION_FAILED, retry_delay, error_message);
Alert(Lang::Strings::ERROR, buffer, "cloud_slash", Lang::Sounds::OGG_EXCLAMATION);
ESP_LOGW(TAG, "Check new version failed, retry in %d seconds (%d/%d)", retry_delay, retry_count, MAX_RETRY);
for (int i = 0; i < retry_delay; i++) {
vTaskDelay(pdMS_TO_TICKS(1000));
if (device_state_ == kDeviceStateIdle) {
break;
}
}
retry_delay *= 2; // 每次重试后延迟时间翻倍
continue;
}
retry_count = 0;
retry_delay = 10; // 重置重试延迟时间
if (ota.HasNewVersion()) {
if (UpgradeFirmware(ota)) {
return; // This line will never be reached after reboot
}
// If upgrade failed, continue to normal operation (don't break, just fall through)
}
// No new version, mark the current version as valid
ota.MarkCurrentVersionValid();
if (!ota.HasActivationCode() && !ota.HasActivationChallenge()) {
xEventGroupSetBits(event_group_, MAIN_EVENT_CHECK_NEW_VERSION_DONE);
// Exit the loop if done checking new version
break;
}
display->SetStatus(Lang::Strings::ACTIVATION);
// Activation code is shown to the user and waiting for the user to input
if (ota.HasActivationCode()) {
ShowActivationCode(ota.GetActivationCode(), ota.GetActivationMessage());
}
// This will block the loop until the activation is done or timeout
for (int i = 0; i < 10; ++i) {
ESP_LOGI(TAG, "Activating... %d/%d", i + 1, 10);
esp_err_t err = ota.Activate();
if (err == ESP_OK) {
xEventGroupSetBits(event_group_, MAIN_EVENT_CHECK_NEW_VERSION_DONE);
break;
} else if (err == ESP_ERR_TIMEOUT) {
vTaskDelay(pdMS_TO_TICKS(3000));
} else {
vTaskDelay(pdMS_TO_TICKS(10000));
}
if (device_state_ == kDeviceStateIdle) {
break;
}
}
}
}
void Application::ShowActivationCode(const std::string& code, const std::string& message) {
struct digit_sound {
char digit;
const std::string_view& sound;
};
static const std::array<digit_sound, 10> digit_sounds{{
digit_sound{'0', Lang::Sounds::OGG_0},
digit_sound{'1', Lang::Sounds::OGG_1},
digit_sound{'2', Lang::Sounds::OGG_2},
digit_sound{'3', Lang::Sounds::OGG_3},
digit_sound{'4', Lang::Sounds::OGG_4},
digit_sound{'5', Lang::Sounds::OGG_5},
digit_sound{'6', Lang::Sounds::OGG_6},
digit_sound{'7', Lang::Sounds::OGG_7},
digit_sound{'8', Lang::Sounds::OGG_8},
digit_sound{'9', Lang::Sounds::OGG_9}
}};
// This sentence uses 9KB of SRAM, so we need to wait for it to finish
Alert(Lang::Strings::ACTIVATION, message.c_str(), "link", Lang::Sounds::OGG_ACTIVATION);
for (const auto& digit : code) {
auto it = std::find_if(digit_sounds.begin(), digit_sounds.end(),
[digit](const digit_sound& ds) { return ds.digit == digit; });
if (it != digit_sounds.end()) {
audio_service_.PlaySound(it->sound);
}
}
}
void Application::Alert(const char* status, const char* message, const char* emotion, const std::string_view& sound) {
ESP_LOGW(TAG, "Alert [%s] %s: %s", emotion, status, message);
auto display = Board::GetInstance().GetDisplay();
display->SetStatus(status);
display->SetEmotion(emotion);
// Alert 时将情绪字符串(如 "happy"、"neutral")发送给 RP2040 触发对应舵机动画
uart_send_string(emotion);
display->SetChatMessage("system", message);
if (!sound.empty()) {
audio_service_.PlaySound(sound);
}
}
void Application::DismissAlert() {
if (device_state_ == kDeviceStateIdle) {
auto display = Board::GetInstance().GetDisplay();
display->SetStatus(Lang::Strings::STANDBY);
display->SetEmotion("neutral");
uart_send_string("idle");
display->SetChatMessage("system", "");
}
}
void Application::ToggleChatState() {
if (device_state_ == kDeviceStateActivating) {
SetDeviceState(kDeviceStateIdle);
return;
} else if (device_state_ == kDeviceStateWifiConfiguring) {
audio_service_.EnableAudioTesting(true);
SetDeviceState(kDeviceStateAudioTesting);
return;
} else if (device_state_ == kDeviceStateAudioTesting) {
audio_service_.EnableAudioTesting(false);
SetDeviceState(kDeviceStateWifiConfiguring);
return;
}
if (!protocol_) {
ESP_LOGE(TAG, "Protocol not initialized");
return;
}
if (device_state_ == kDeviceStateIdle) {
Schedule([this]() {
if (!protocol_->IsAudioChannelOpened()) {
SetDeviceState(kDeviceStateConnecting);
if (!protocol_->OpenAudioChannel()) {
return;
}
}
SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime);
});
} else if (device_state_ == kDeviceStateSpeaking) {
Schedule([this]() {
AbortSpeaking(kAbortReasonNone);
});
} else if (device_state_ == kDeviceStateListening) {
Schedule([this]() {
protocol_->CloseAudioChannel();
});
}
}
void Application::StartListening() {
if (device_state_ == kDeviceStateActivating) {
SetDeviceState(kDeviceStateIdle);
return;
} else if (device_state_ == kDeviceStateWifiConfiguring) {
audio_service_.EnableAudioTesting(true);
SetDeviceState(kDeviceStateAudioTesting);
return;
}
if (!protocol_) {
ESP_LOGE(TAG, "Protocol not initialized");
return;
}
if (device_state_ == kDeviceStateIdle) {
Schedule([this]() {
if (!protocol_->IsAudioChannelOpened()) {
SetDeviceState(kDeviceStateConnecting);
if (!protocol_->OpenAudioChannel()) {
return;
}
}
SetListeningMode(kListeningModeManualStop);
});
} else if (device_state_ == kDeviceStateSpeaking) {
Schedule([this]() {
AbortSpeaking(kAbortReasonNone);
SetListeningMode(kListeningModeManualStop);
});
}
}
void Application::StopListening() {
if (device_state_ == kDeviceStateAudioTesting) {
audio_service_.EnableAudioTesting(false);
SetDeviceState(kDeviceStateWifiConfiguring);
return;
}
const std::array<int, 3> valid_states = {
kDeviceStateListening,
kDeviceStateSpeaking,
kDeviceStateIdle,
};
// If not valid, do nothing
if (std::find(valid_states.begin(), valid_states.end(), device_state_) == valid_states.end()) {
return;
}
Schedule([this]() {
if (device_state_ == kDeviceStateListening) {
protocol_->SendStopListening();
SetDeviceState(kDeviceStateIdle);
}
});
}
void Application::Start() {
auto& board = Board::GetInstance();
SetDeviceState(kDeviceStateStarting);
/* Setup the display */
auto display = board.GetDisplay();
// Print board name/version info
display->SetChatMessage("system", SystemInfo::GetUserAgent().c_str());
/* Setup the audio service */
auto codec = board.GetAudioCodec();
audio_service_.Initialize(codec);
audio_service_.Start();
AudioServiceCallbacks callbacks;
callbacks.on_send_queue_available = [this]() {
xEventGroupSetBits(event_group_, MAIN_EVENT_SEND_AUDIO);
};
callbacks.on_wake_word_detected = [this](const std::string& wake_word) {
xEventGroupSetBits(event_group_, MAIN_EVENT_WAKE_WORD_DETECTED);
};
callbacks.on_vad_change = [this](bool speaking) {
xEventGroupSetBits(event_group_, MAIN_EVENT_VAD_CHANGE);
};
audio_service_.SetCallbacks(callbacks);
// Start the main event loop task with priority 3
xTaskCreate([](void* arg) {
((Application*)arg)->MainEventLoop();
vTaskDelete(NULL);
}, "main_event_loop", 2048 * 4, this, 3, &main_event_loop_task_handle_);
/* Start the clock timer to update the status bar */
esp_timer_start_periodic(clock_timer_handle_, 1000000);
/* Wait for the network to be ready */
board.StartNetwork();
// Update the status bar immediately to show the network state
display->UpdateStatusBar(true);
// Check for new assets version
CheckAssetsVersion();
// Check for new firmware version or get the MQTT broker address
Ota ota;
CheckNewVersion(ota);
// Initialize the protocol
display->SetStatus(Lang::Strings::LOADING_PROTOCOL);
// Add MCP common tools before initializing the protocol
auto& mcp_server = McpServer::GetInstance();
mcp_server.AddCommonTools();
mcp_server.AddUserOnlyTools();
if (ota.HasMqttConfig()) {
protocol_ = std::make_unique<MqttProtocol>();
} else if (ota.HasWebsocketConfig()) {
protocol_ = std::make_unique<WebsocketProtocol>();
} else {
ESP_LOGW(TAG, "No protocol specified in the OTA config, using MQTT");
protocol_ = std::make_unique<MqttProtocol>();
}
protocol_->OnConnected([this]() {
DismissAlert();
});
protocol_->OnNetworkError([this](const std::string& message) {
last_error_message_ = message;
xEventGroupSetBits(event_group_, MAIN_EVENT_ERROR);
});
protocol_->OnIncomingAudio([this](std::unique_ptr<AudioStreamPacket> packet) {
if (device_state_ == kDeviceStateSpeaking) {
audio_service_.PushPacketToDecodeQueue(std::move(packet));
}
});
protocol_->OnAudioChannelOpened([this, codec, &board]() {
board.SetPowerSaveMode(false);
if (protocol_->server_sample_rate() != codec->output_sample_rate()) {
ESP_LOGW(TAG, "Server sample rate %d does not match device output sample rate %d, resampling may cause distortion",
protocol_->server_sample_rate(), codec->output_sample_rate());
}
});
protocol_->OnAudioChannelClosed([this, &board]() {
board.SetPowerSaveMode(true);
Schedule([this]() {
auto display = Board::GetInstance().GetDisplay();
display->SetChatMessage("system", "");
SetDeviceState(kDeviceStateIdle);
});
});
protocol_->OnIncomingJson([this, display](const cJSON* root) {
// Parse JSON data
auto type = cJSON_GetObjectItem(root, "type");
if (strcmp(type->valuestring, "tts") == 0) {
auto state = cJSON_GetObjectItem(root, "state");
if (strcmp(state->valuestring, "start") == 0) {
Schedule([this]() {
aborted_ = false;
if (device_state_ == kDeviceStateIdle || device_state_ == kDeviceStateListening) {
SetDeviceState(kDeviceStateSpeaking);
}
});
} else if (strcmp(state->valuestring, "stop") == 0) {
Schedule([this]() {
if (device_state_ == kDeviceStateSpeaking) {
if (listening_mode_ == kListeningModeManualStop) {
SetDeviceState(kDeviceStateIdle);
} else {
SetDeviceState(kDeviceStateListening);
}
}
});
} else if (strcmp(state->valuestring, "sentence_start") == 0) {
auto text = cJSON_GetObjectItem(root, "text");
if (cJSON_IsString(text)) {
ESP_LOGI(TAG, "<< %s", text->valuestring);
Schedule([this, display, message = std::string(text->valuestring)]() {
display->SetChatMessage("assistant", message.c_str());
});
}
}
} else if (strcmp(type->valuestring, "stt") == 0) {
auto text = cJSON_GetObjectItem(root, "text");
if (cJSON_IsString(text)) {
ESP_LOGI(TAG, ">> %s", text->valuestring);
Schedule([this, display, message = std::string(text->valuestring)]() {
display->SetChatMessage("user", message.c_str());
});
}
} else if (strcmp(type->valuestring, "llm") == 0) {
// LLM 回复中携带的情绪标签(如 "happy"、"thinking"
auto emotion = cJSON_GetObjectItem(root, "emotion");
if (cJSON_IsString(emotion)) {
Schedule([this, display, emotion_str = std::string(emotion->valuestring)]() {
display->SetEmotion(emotion_str.c_str());
// 将 AI 返回的情绪标签发送给 RP2040实时切换舵机动画表情
uart_send_string(emotion_str.c_str());
});
}
} else if (strcmp(type->valuestring, "mcp") == 0) {
auto payload = cJSON_GetObjectItem(root, "payload");
if (cJSON_IsObject(payload)) {
McpServer::GetInstance().ParseMessage(payload);
}
} else if (strcmp(type->valuestring, "system") == 0) {
auto command = cJSON_GetObjectItem(root, "command");
if (cJSON_IsString(command)) {
ESP_LOGI(TAG, "System command: %s", command->valuestring);
if (strcmp(command->valuestring, "reboot") == 0) {
// Do a reboot if user requests a OTA update
Schedule([this]() {
Reboot();
});
} else {
ESP_LOGW(TAG, "Unknown system command: %s", command->valuestring);
}
}
} else if (strcmp(type->valuestring, "alert") == 0) {
auto status = cJSON_GetObjectItem(root, "status");
auto message = cJSON_GetObjectItem(root, "message");
auto emotion = cJSON_GetObjectItem(root, "emotion");
if (cJSON_IsString(status) && cJSON_IsString(message) && cJSON_IsString(emotion)) {
Alert(status->valuestring, message->valuestring, emotion->valuestring, Lang::Sounds::OGG_VIBRATION);
} else {
ESP_LOGW(TAG, "Alert command requires status, message and emotion");
}
#if CONFIG_RECEIVE_CUSTOM_MESSAGE
} else if (strcmp(type->valuestring, "custom") == 0) {
auto payload = cJSON_GetObjectItem(root, "payload");
ESP_LOGI(TAG, "Received custom message: %s", cJSON_PrintUnformatted(root));
if (cJSON_IsObject(payload)) {
Schedule([this, display, payload_str = std::string(cJSON_PrintUnformatted(payload))]() {
display->SetChatMessage("system", payload_str.c_str());
});
} else {
ESP_LOGW(TAG, "Invalid custom message format: missing payload");
}
#endif
} else {
ESP_LOGW(TAG, "Unknown message type: %s", type->valuestring);
}
});
bool protocol_started = protocol_->Start();
// [T01] 摄像头 V4L2 原始采集 sanity probePhase 01 验证 OV3660 底层链路)
// 完成 T04 CaptureForDetection 后删除此段调用(保留 ProbeFrameCapture API 作诊断用)
#ifndef CONFIG_IDF_TARGET_ESP32
{
auto* cam = dynamic_cast<Esp32Camera*>(board.GetCamera());
if (cam) {
int64_t elapsed = 0;
bool ok = cam->ProbeFrameCapture(&elapsed);
ESP_LOGI("T01_Probe", "V4L2 probe result=%d elapsed=%lldus",
ok, (long long)elapsed);
} else {
ESP_LOGW("T01_Probe", "no camera instance (board.GetCamera() returned null or non-Esp32Camera)");
}
}
#endif
SystemInfo::PrintHeapStats();
SetDeviceState(kDeviceStateIdle);
has_server_time_ = ota.HasServerTime();
if (protocol_started) {
std::string message = std::string(Lang::Strings::VERSION) + ota.GetCurrentVersion();
display->ShowNotification(message.c_str());
display->SetChatMessage("system", "");
// Play the success sound to indicate the device is ready
audio_service_.PlaySound(Lang::Sounds::OGG_SUCCESS);
}
}
// Add a async task to MainLoop
void Application::Schedule(std::function<void()> callback) {
{
std::lock_guard<std::mutex> lock(mutex_);
main_tasks_.push_back(std::move(callback));
}
xEventGroupSetBits(event_group_, MAIN_EVENT_SCHEDULE);
}
// The Main Event Loop controls the chat state and websocket connection
// If other tasks need to access the websocket or chat state,
// they should use Schedule to call this function
void Application::MainEventLoop() {
while (true) {
auto bits = xEventGroupWaitBits(event_group_, MAIN_EVENT_SCHEDULE |
MAIN_EVENT_SEND_AUDIO |
MAIN_EVENT_WAKE_WORD_DETECTED |
MAIN_EVENT_VAD_CHANGE |
MAIN_EVENT_CLOCK_TICK |
MAIN_EVENT_ERROR, pdTRUE, pdFALSE, portMAX_DELAY);
if (bits & MAIN_EVENT_ERROR) {
SetDeviceState(kDeviceStateIdle);
Alert(Lang::Strings::ERROR, last_error_message_.c_str(), "circle_xmark", Lang::Sounds::OGG_EXCLAMATION);
}
if (bits & MAIN_EVENT_SEND_AUDIO) {
while (auto packet = audio_service_.PopPacketFromSendQueue()) {
if (protocol_ && !protocol_->SendAudio(std::move(packet))) {
break;
}
}
}
if (bits & MAIN_EVENT_WAKE_WORD_DETECTED) {
OnWakeWordDetected();
}
if (bits & MAIN_EVENT_VAD_CHANGE) {
if (device_state_ == kDeviceStateListening) {
auto led = Board::GetInstance().GetLed();
led->OnStateChanged();
}
}
if (bits & MAIN_EVENT_SCHEDULE) {
std::unique_lock<std::mutex> lock(mutex_);
auto tasks = std::move(main_tasks_);
lock.unlock();
for (auto& task : tasks) {
task();
}
}
if (bits & MAIN_EVENT_CLOCK_TICK) {
clock_ticks_++;
auto display = Board::GetInstance().GetDisplay();
display->UpdateStatusBar();
// Print the debug info every 10 seconds
if (clock_ticks_ % 10 == 0) {
// SystemInfo::PrintTaskCpuUsage(pdMS_TO_TICKS(1000));
// SystemInfo::PrintTaskList();
SystemInfo::PrintHeapStats();
}
}
}
}
void Application::OnWakeWordDetected() {
if (!protocol_) {
return;
}
if (device_state_ == kDeviceStateIdle) {
audio_service_.EncodeWakeWord();
if (!protocol_->IsAudioChannelOpened()) {
SetDeviceState(kDeviceStateConnecting);
if (!protocol_->OpenAudioChannel()) {
audio_service_.EnableWakeWordDetection(true);
return;
}
}
auto wake_word = audio_service_.GetLastWakeWord();
ESP_LOGI(TAG, "Wake word detected: %s", wake_word.c_str());
#if CONFIG_SEND_WAKE_WORD_DATA
// Encode and send the wake word data to the server
while (auto packet = audio_service_.PopWakeWordPacket()) {
protocol_->SendAudio(std::move(packet));
}
// Set the chat state to wake word detected
protocol_->SendWakeWordDetected(wake_word);
SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime);
#else
SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime);
// Play the pop up sound to indicate the wake word is detected
audio_service_.PlaySound(Lang::Sounds::OGG_POPUP);
#endif
} else if (device_state_ == kDeviceStateSpeaking) {
AbortSpeaking(kAbortReasonWakeWordDetected);
} else if (device_state_ == kDeviceStateActivating) {
SetDeviceState(kDeviceStateIdle);
}
}
void Application::AbortSpeaking(AbortReason reason) {
ESP_LOGI(TAG, "Abort speaking");
aborted_ = true;
if (protocol_) {
protocol_->SendAbortSpeaking(reason);
}
}
void Application::SetListeningMode(ListeningMode mode) {
listening_mode_ = mode;
SetDeviceState(kDeviceStateListening);
}
void Application::SetDeviceState(DeviceState state) {
if (device_state_ == state) {
return;
}
clock_ticks_ = 0;
auto previous_state = device_state_;
device_state_ = state;
ESP_LOGI(TAG, "STATE: %s", STATE_STRINGS[device_state_]);
// Send the state change event
DeviceStateEventManager::GetInstance().PostStateChangeEvent(previous_state, state);
auto& board = Board::GetInstance();
auto display = board.GetDisplay();
auto led = board.GetLed();
led->OnStateChanged();
// 根据设备状态通过 UART 发送指令给 RP2040驱动舵机执行对应动画
switch (state) {
case kDeviceStateUnknown:
case kDeviceStateIdle:
display->SetStatus(Lang::Strings::STANDBY);
display->SetEmotion("neutral");
audio_service_.EnableVoiceProcessing(false);
audio_service_.EnableWakeWordDetection(true);
uart_send_string("idle"); // → RP2040: 进入睡眠姿态(闭眼、耳朵收起)
break;
case kDeviceStateConnecting:
display->SetStatus(Lang::Strings::CONNECTING);
display->SetEmotion("neutral");
display->SetChatMessage("system", "");
break;
case kDeviceStateListening:
display->SetStatus(Lang::Strings::LISTENING);
display->SetEmotion("neutral");
uart_send_string("listening"); // → RP2040: 进入聆听姿态(好奇歪头、眼球随机看)
// Make sure the audio processor is running
if (!audio_service_.IsAudioProcessorRunning()) {
// Send the start listening command
protocol_->SendStartListening(listening_mode_);
audio_service_.EnableVoiceProcessing(true);
audio_service_.EnableWakeWordDetection(false);
}
break;
case kDeviceStateSpeaking:
display->SetStatus(Lang::Strings::SPEAKING);
uart_send_string("speaking"); // → RP2040: 进入说话姿态(嘴巴开合、眼球随机看、身体摆动)
if (listening_mode_ != kListeningModeRealtime) {
audio_service_.EnableVoiceProcessing(false);
// Only AFE wake word can be detected in speaking mode
audio_service_.EnableWakeWordDetection(audio_service_.IsAfeWakeWord());
}
audio_service_.ResetDecoder();
break;
default:
// Do nothing
break;
}
}
void Application::Reboot() {
ESP_LOGI(TAG, "Rebooting...");
// Disconnect the audio channel
if (protocol_ && protocol_->IsAudioChannelOpened()) {
protocol_->CloseAudioChannel();
}
protocol_.reset();
audio_service_.Stop();
vTaskDelay(pdMS_TO_TICKS(1000));
esp_restart();
}
bool Application::UpgradeFirmware(Ota& ota, const std::string& url) {
auto& board = Board::GetInstance();
auto display = board.GetDisplay();
// Use provided URL or get from OTA object
std::string upgrade_url = url.empty() ? ota.GetFirmwareUrl() : url;
std::string version_info = url.empty() ? ota.GetFirmwareVersion() : "(Manual upgrade)";
// Close audio channel if it's open
if (protocol_ && protocol_->IsAudioChannelOpened()) {
ESP_LOGI(TAG, "Closing audio channel before firmware upgrade");
protocol_->CloseAudioChannel();
}
ESP_LOGI(TAG, "Starting firmware upgrade from URL: %s", upgrade_url.c_str());
Alert(Lang::Strings::OTA_UPGRADE, Lang::Strings::UPGRADING, "download", Lang::Sounds::OGG_UPGRADE);
vTaskDelay(pdMS_TO_TICKS(3000));
SetDeviceState(kDeviceStateUpgrading);
std::string message = std::string(Lang::Strings::NEW_VERSION) + version_info;
display->SetChatMessage("system", message.c_str());
board.SetPowerSaveMode(false);
audio_service_.Stop();
vTaskDelay(pdMS_TO_TICKS(1000));
bool upgrade_success = ota.StartUpgradeFromUrl(upgrade_url, [display](int progress, size_t speed) {
std::thread([display, progress, speed]() {
char buffer[32];
snprintf(buffer, sizeof(buffer), "%d%% %uKB/s", progress, speed / 1024);
display->SetChatMessage("system", buffer);
}).detach();
});
if (!upgrade_success) {
// Upgrade failed, restart audio service and continue running
ESP_LOGE(TAG, "Firmware upgrade failed, restarting audio service and continuing operation...");
audio_service_.Start(); // Restart audio service
board.SetPowerSaveMode(true); // Restore power save mode
Alert(Lang::Strings::ERROR, Lang::Strings::UPGRADE_FAILED, "circle_xmark", Lang::Sounds::OGG_EXCLAMATION);
vTaskDelay(pdMS_TO_TICKS(3000));
return false;
} else {
// Upgrade success, reboot immediately
ESP_LOGI(TAG, "Firmware upgrade successful, rebooting...");
display->SetChatMessage("system", "Upgrade successful, rebooting...");
vTaskDelay(pdMS_TO_TICKS(1000)); // Brief pause to show message
Reboot();
return true;
}
}
void Application::WakeWordInvoke(const std::string& wake_word) {
if (!protocol_) {
return;
}
if (device_state_ == kDeviceStateIdle) {
audio_service_.EncodeWakeWord();
if (!protocol_->IsAudioChannelOpened()) {
SetDeviceState(kDeviceStateConnecting);
if (!protocol_->OpenAudioChannel()) {
audio_service_.EnableWakeWordDetection(true);
return;
}
}
ESP_LOGI(TAG, "Wake word detected: %s", wake_word.c_str());
#if CONFIG_USE_AFE_WAKE_WORD || CONFIG_USE_CUSTOM_WAKE_WORD
// Encode and send the wake word data to the server
while (auto packet = audio_service_.PopWakeWordPacket()) {
protocol_->SendAudio(std::move(packet));
}
// Set the chat state to wake word detected
protocol_->SendWakeWordDetected(wake_word);
SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime);
#else
SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime);
// Play the pop up sound to indicate the wake word is detected
audio_service_.PlaySound(Lang::Sounds::OGG_POPUP);
#endif
} else if (device_state_ == kDeviceStateSpeaking) {
Schedule([this]() {
AbortSpeaking(kAbortReasonNone);
});
} else if (device_state_ == kDeviceStateListening) {
Schedule([this]() {
if (protocol_) {
protocol_->CloseAudioChannel();
}
});
}
}
bool Application::CanEnterSleepMode() {
if (device_state_ != kDeviceStateIdle) {
return false;
}
if (protocol_ && protocol_->IsAudioChannelOpened()) {
return false;
}
if (!audio_service_.IsIdle()) {
return false;
}
// Now it is safe to enter sleep mode
return true;
}
void Application::SendMcpMessage(const std::string& payload) {
if (protocol_ == nullptr) {
return;
}
// Make sure you are using main thread to send MCP message
if (xTaskGetCurrentTaskHandle() == main_event_loop_task_handle_) {
protocol_->SendMcpMessage(payload);
} else {
Schedule([this, payload = std::move(payload)]() {
protocol_->SendMcpMessage(payload);
});
}
}
void Application::SetAecMode(AecMode mode) {
aec_mode_ = mode;
Schedule([this]() {
auto& board = Board::GetInstance();
auto display = board.GetDisplay();
switch (aec_mode_) {
case kAecOff:
audio_service_.EnableDeviceAec(false);
display->ShowNotification(Lang::Strings::RTC_MODE_OFF);
break;
case kAecOnServerSide:
audio_service_.EnableDeviceAec(false);
display->ShowNotification(Lang::Strings::RTC_MODE_ON);
break;
case kAecOnDeviceSide:
audio_service_.EnableDeviceAec(true);
display->ShowNotification(Lang::Strings::RTC_MODE_ON);
break;
}
// If the AEC mode is changed, close the audio channel
if (protocol_ && protocol_->IsAudioChannelOpened()) {
protocol_->CloseAudioChannel();
}
});
}
void Application::PlaySound(const std::string_view& sound) {
audio_service_.PlaySound(sound);
}