CogletESP-CogletESP/main/application.cc
Rdzleo 2a77110654
Some checks failed
Build Boards / Determine variants to build (push) Successful in 50s
Build Boards / Build ${{ matrix.name }} (push) Failing after 13m51s
新增语音控制机器人各部位舵机功能
1. RP2040 新增一次性动作系统(oneshot):
   - 支持多次来回摆动,动作完成后自动恢复原位
   - 动作期间临时提升舵机速度(boost 8-15倍),确保全幅度快速响应
   - 动作执行期间暂停常规状态动画和人脸追踪,避免舵机目标冲突
   - 支持12个单动作(左右耳/左右眼球/点头/摇头/歪头/张嘴/眨眼/闭眼/睁眼/转身)
   - 支持6个组合动作(双耳/点头眨眼/摇头/张望/卖萌/跳舞)

2. ESP32 新增语音指令识别(stt关键词匹配):
   - 从用户语音识别结果中匹配中文关键词,直接发送动作指令给RP2040
   - 不依赖AI回复格式,用户说话后立即触发动作
   - 支持ASR识别变体容错(如"耳朵"/"耳刀"/"耳"均可匹配)

3. ESP32 新增AI回复文本动作解析(备用通道):
   - 支持从AI回复文本中解析全角括号包裹的动作代码

4. 新增LLM情绪标签日志,便于调试AI返回内容

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-10 10:26:52 +08:00

1014 lines
39 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "application.h"
#include "board.h"
#include "display.h"
#include "system_info.h"
#include "audio_codec.h"
#include "mqtt_protocol.h"
#include "websocket_protocol.h"
#include "assets/lang_config.h"
#include "mcp_server.h"
#include "assets.h"
#include "settings.h"
#include <cstring>
#include <esp_log.h>
#include <cJSON.h>
#include <driver/gpio.h>
#include <arpa/inet.h>
#include <font_awesome.h>
#include <uart_component.h>
#define TAG "Application"
// ESP32 设备状态字符串,用于日志输出
// 其中 "idle"、"listening"、"speaking" 会通过 UART 发送给 RP2040 驱动舵机动画
static const char* const STATE_STRINGS[] = {
"unknown",
"starting",
"configuring",
"idle", // → RP2040 执行 state_sleep闭眼、耳朵收起
"connecting",
"listening", // → RP2040 执行 state_listen好奇姿态、眼球随机看
"speaking", // → RP2040 执行 state_speaking嘴巴开合、眼球随机看、身体摆动
"upgrading",
"activating",
"audio_testing",
"fatal_error",
"invalid_state"
};
Application::Application() {
event_group_ = xEventGroupCreate();
#if CONFIG_USE_DEVICE_AEC && CONFIG_USE_SERVER_AEC
#error "CONFIG_USE_DEVICE_AEC and CONFIG_USE_SERVER_AEC cannot be enabled at the same time"
#elif CONFIG_USE_DEVICE_AEC
aec_mode_ = kAecOnDeviceSide;
#elif CONFIG_USE_SERVER_AEC
aec_mode_ = kAecOnServerSide;
#else
aec_mode_ = kAecOff;
#endif
esp_timer_create_args_t clock_timer_args = {
.callback = [](void* arg) {
Application* app = (Application*)arg;
xEventGroupSetBits(app->event_group_, MAIN_EVENT_CLOCK_TICK);
},
.arg = this,
.dispatch_method = ESP_TIMER_TASK,
.name = "clock_timer",
.skip_unhandled_events = true
};
esp_timer_create(&clock_timer_args, &clock_timer_handle_);
}
Application::~Application() {
if (clock_timer_handle_ != nullptr) {
esp_timer_stop(clock_timer_handle_);
esp_timer_delete(clock_timer_handle_);
}
vEventGroupDelete(event_group_);
}
void Application::CheckAssetsVersion() {
auto& board = Board::GetInstance();
auto display = board.GetDisplay();
auto& assets = Assets::GetInstance();
if (!assets.partition_valid()) {
ESP_LOGW(TAG, "Assets partition is disabled for board %s", BOARD_NAME);
return;
}
Settings settings("assets", true);
// Check if there is a new assets need to be downloaded
std::string download_url = settings.GetString("download_url");
if (!download_url.empty()) {
settings.EraseKey("download_url");
char message[256];
snprintf(message, sizeof(message), Lang::Strings::FOUND_NEW_ASSETS, download_url.c_str());
Alert(Lang::Strings::LOADING_ASSETS, message, "cloud_arrow_down", Lang::Sounds::OGG_UPGRADE);
// Wait for the audio service to be idle for 3 seconds
vTaskDelay(pdMS_TO_TICKS(3000));
SetDeviceState(kDeviceStateUpgrading);
board.SetPowerSaveMode(false);
display->SetChatMessage("system", Lang::Strings::PLEASE_WAIT);
bool success = assets.Download(download_url, [display](int progress, size_t speed) -> void {
std::thread([display, progress, speed]() {
char buffer[32];
snprintf(buffer, sizeof(buffer), "%d%% %uKB/s", progress, speed / 1024);
display->SetChatMessage("system", buffer);
}).detach();
});
board.SetPowerSaveMode(true);
vTaskDelay(pdMS_TO_TICKS(1000));
if (!success) {
Alert(Lang::Strings::ERROR, Lang::Strings::DOWNLOAD_ASSETS_FAILED, "circle_xmark", Lang::Sounds::OGG_EXCLAMATION);
vTaskDelay(pdMS_TO_TICKS(2000));
return;
}
}
// Apply assets
assets.Apply();
display->SetChatMessage("system", "");
display->SetEmotion("microchip_ai");
uart_send_string("configuring");
}
void Application::CheckNewVersion(Ota& ota) {
const int MAX_RETRY = 10;
int retry_count = 0;
int retry_delay = 10; // 初始重试延迟为10秒
auto& board = Board::GetInstance();
while (true) {
SetDeviceState(kDeviceStateActivating);
auto display = board.GetDisplay();
display->SetStatus(Lang::Strings::CHECKING_NEW_VERSION);
esp_err_t err = ota.CheckVersion();
if (err != ESP_OK) {
retry_count++;
if (retry_count >= MAX_RETRY) {
ESP_LOGE(TAG, "Too many retries, exit version check");
return;
}
char error_message[128];
snprintf(error_message, sizeof(error_message), "code=%d, url=%s", err, ota.GetCheckVersionUrl().c_str());
char buffer[256];
snprintf(buffer, sizeof(buffer), Lang::Strings::CHECK_NEW_VERSION_FAILED, retry_delay, error_message);
Alert(Lang::Strings::ERROR, buffer, "cloud_slash", Lang::Sounds::OGG_EXCLAMATION);
ESP_LOGW(TAG, "Check new version failed, retry in %d seconds (%d/%d)", retry_delay, retry_count, MAX_RETRY);
for (int i = 0; i < retry_delay; i++) {
vTaskDelay(pdMS_TO_TICKS(1000));
if (device_state_ == kDeviceStateIdle) {
break;
}
}
retry_delay *= 2; // 每次重试后延迟时间翻倍
continue;
}
retry_count = 0;
retry_delay = 10; // 重置重试延迟时间
if (ota.HasNewVersion()) {
if (UpgradeFirmware(ota)) {
return; // This line will never be reached after reboot
}
// If upgrade failed, continue to normal operation (don't break, just fall through)
}
// No new version, mark the current version as valid
ota.MarkCurrentVersionValid();
if (!ota.HasActivationCode() && !ota.HasActivationChallenge()) {
xEventGroupSetBits(event_group_, MAIN_EVENT_CHECK_NEW_VERSION_DONE);
// Exit the loop if done checking new version
break;
}
display->SetStatus(Lang::Strings::ACTIVATION);
// Activation code is shown to the user and waiting for the user to input
if (ota.HasActivationCode()) {
ShowActivationCode(ota.GetActivationCode(), ota.GetActivationMessage());
}
// This will block the loop until the activation is done or timeout
for (int i = 0; i < 10; ++i) {
ESP_LOGI(TAG, "Activating... %d/%d", i + 1, 10);
esp_err_t err = ota.Activate();
if (err == ESP_OK) {
xEventGroupSetBits(event_group_, MAIN_EVENT_CHECK_NEW_VERSION_DONE);
break;
} else if (err == ESP_ERR_TIMEOUT) {
vTaskDelay(pdMS_TO_TICKS(3000));
} else {
vTaskDelay(pdMS_TO_TICKS(10000));
}
if (device_state_ == kDeviceStateIdle) {
break;
}
}
}
}
void Application::ShowActivationCode(const std::string& code, const std::string& message) {
struct digit_sound {
char digit;
const std::string_view& sound;
};
static const std::array<digit_sound, 10> digit_sounds{{
digit_sound{'0', Lang::Sounds::OGG_0},
digit_sound{'1', Lang::Sounds::OGG_1},
digit_sound{'2', Lang::Sounds::OGG_2},
digit_sound{'3', Lang::Sounds::OGG_3},
digit_sound{'4', Lang::Sounds::OGG_4},
digit_sound{'5', Lang::Sounds::OGG_5},
digit_sound{'6', Lang::Sounds::OGG_6},
digit_sound{'7', Lang::Sounds::OGG_7},
digit_sound{'8', Lang::Sounds::OGG_8},
digit_sound{'9', Lang::Sounds::OGG_9}
}};
// This sentence uses 9KB of SRAM, so we need to wait for it to finish
Alert(Lang::Strings::ACTIVATION, message.c_str(), "link", Lang::Sounds::OGG_ACTIVATION);
for (const auto& digit : code) {
auto it = std::find_if(digit_sounds.begin(), digit_sounds.end(),
[digit](const digit_sound& ds) { return ds.digit == digit; });
if (it != digit_sounds.end()) {
audio_service_.PlaySound(it->sound);
}
}
}
void Application::Alert(const char* status, const char* message, const char* emotion, const std::string_view& sound) {
ESP_LOGW(TAG, "Alert [%s] %s: %s", emotion, status, message);
auto display = Board::GetInstance().GetDisplay();
display->SetStatus(status);
display->SetEmotion(emotion);
// Alert 时将情绪字符串(如 "happy"、"neutral")发送给 RP2040 触发对应舵机动画
uart_send_string(emotion);
display->SetChatMessage("system", message);
if (!sound.empty()) {
audio_service_.PlaySound(sound);
}
}
void Application::DismissAlert() {
if (device_state_ == kDeviceStateIdle) {
auto display = Board::GetInstance().GetDisplay();
display->SetStatus(Lang::Strings::STANDBY);
display->SetEmotion("neutral");
uart_send_string("idle");
display->SetChatMessage("system", "");
}
}
void Application::ToggleChatState() {
if (device_state_ == kDeviceStateActivating) {
SetDeviceState(kDeviceStateIdle);
return;
} else if (device_state_ == kDeviceStateWifiConfiguring) {
audio_service_.EnableAudioTesting(true);
SetDeviceState(kDeviceStateAudioTesting);
return;
} else if (device_state_ == kDeviceStateAudioTesting) {
audio_service_.EnableAudioTesting(false);
SetDeviceState(kDeviceStateWifiConfiguring);
return;
}
if (!protocol_) {
ESP_LOGE(TAG, "Protocol not initialized");
return;
}
if (device_state_ == kDeviceStateIdle) {
Schedule([this]() {
if (!protocol_->IsAudioChannelOpened()) {
SetDeviceState(kDeviceStateConnecting);
if (!protocol_->OpenAudioChannel()) {
return;
}
}
SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime);
});
} else if (device_state_ == kDeviceStateSpeaking) {
Schedule([this]() {
AbortSpeaking(kAbortReasonNone);
});
} else if (device_state_ == kDeviceStateListening) {
Schedule([this]() {
protocol_->CloseAudioChannel();
});
}
}
void Application::StartListening() {
if (device_state_ == kDeviceStateActivating) {
SetDeviceState(kDeviceStateIdle);
return;
} else if (device_state_ == kDeviceStateWifiConfiguring) {
audio_service_.EnableAudioTesting(true);
SetDeviceState(kDeviceStateAudioTesting);
return;
}
if (!protocol_) {
ESP_LOGE(TAG, "Protocol not initialized");
return;
}
if (device_state_ == kDeviceStateIdle) {
Schedule([this]() {
if (!protocol_->IsAudioChannelOpened()) {
SetDeviceState(kDeviceStateConnecting);
if (!protocol_->OpenAudioChannel()) {
return;
}
}
SetListeningMode(kListeningModeManualStop);
});
} else if (device_state_ == kDeviceStateSpeaking) {
Schedule([this]() {
AbortSpeaking(kAbortReasonNone);
SetListeningMode(kListeningModeManualStop);
});
}
}
void Application::StopListening() {
if (device_state_ == kDeviceStateAudioTesting) {
audio_service_.EnableAudioTesting(false);
SetDeviceState(kDeviceStateWifiConfiguring);
return;
}
const std::array<int, 3> valid_states = {
kDeviceStateListening,
kDeviceStateSpeaking,
kDeviceStateIdle,
};
// If not valid, do nothing
if (std::find(valid_states.begin(), valid_states.end(), device_state_) == valid_states.end()) {
return;
}
Schedule([this]() {
if (device_state_ == kDeviceStateListening) {
protocol_->SendStopListening();
SetDeviceState(kDeviceStateIdle);
}
});
}
void Application::Start() {
auto& board = Board::GetInstance();
SetDeviceState(kDeviceStateStarting);
/* Setup the display */
auto display = board.GetDisplay();
// Print board name/version info
display->SetChatMessage("system", SystemInfo::GetUserAgent().c_str());
/* Setup the audio service */
auto codec = board.GetAudioCodec();
audio_service_.Initialize(codec);
audio_service_.Start();
AudioServiceCallbacks callbacks;
callbacks.on_send_queue_available = [this]() {
xEventGroupSetBits(event_group_, MAIN_EVENT_SEND_AUDIO);
};
callbacks.on_wake_word_detected = [this](const std::string& wake_word) {
xEventGroupSetBits(event_group_, MAIN_EVENT_WAKE_WORD_DETECTED);
};
callbacks.on_vad_change = [this](bool speaking) {
xEventGroupSetBits(event_group_, MAIN_EVENT_VAD_CHANGE);
};
audio_service_.SetCallbacks(callbacks);
// Start the main event loop task with priority 3
xTaskCreate([](void* arg) {
((Application*)arg)->MainEventLoop();
vTaskDelete(NULL);
}, "main_event_loop", 2048 * 4, this, 3, &main_event_loop_task_handle_);
/* Start the clock timer to update the status bar */
esp_timer_start_periodic(clock_timer_handle_, 1000000);
/* Wait for the network to be ready */
board.StartNetwork();
// Update the status bar immediately to show the network state
display->UpdateStatusBar(true);
// Check for new assets version
CheckAssetsVersion();
// Check for new firmware version or get the MQTT broker address
Ota ota;
CheckNewVersion(ota);
// Initialize the protocol
display->SetStatus(Lang::Strings::LOADING_PROTOCOL);
// Add MCP common tools before initializing the protocol
auto& mcp_server = McpServer::GetInstance();
mcp_server.AddCommonTools();
mcp_server.AddUserOnlyTools();
if (ota.HasMqttConfig()) {
protocol_ = std::make_unique<MqttProtocol>();
} else if (ota.HasWebsocketConfig()) {
protocol_ = std::make_unique<WebsocketProtocol>();
} else {
ESP_LOGW(TAG, "No protocol specified in the OTA config, using MQTT");
protocol_ = std::make_unique<MqttProtocol>();
}
protocol_->OnConnected([this]() {
DismissAlert();
});
protocol_->OnNetworkError([this](const std::string& message) {
last_error_message_ = message;
xEventGroupSetBits(event_group_, MAIN_EVENT_ERROR);
});
protocol_->OnIncomingAudio([this](std::unique_ptr<AudioStreamPacket> packet) {
if (device_state_ == kDeviceStateSpeaking) {
audio_service_.PushPacketToDecodeQueue(std::move(packet));
}
});
protocol_->OnAudioChannelOpened([this, codec, &board]() {
board.SetPowerSaveMode(false);
if (protocol_->server_sample_rate() != codec->output_sample_rate()) {
ESP_LOGW(TAG, "Server sample rate %d does not match device output sample rate %d, resampling may cause distortion",
protocol_->server_sample_rate(), codec->output_sample_rate());
}
});
protocol_->OnAudioChannelClosed([this, &board]() {
board.SetPowerSaveMode(true);
Schedule([this]() {
auto display = Board::GetInstance().GetDisplay();
display->SetChatMessage("system", "");
SetDeviceState(kDeviceStateIdle);
});
});
protocol_->OnIncomingJson([this, display](const cJSON* root) {
// Parse JSON data
auto type = cJSON_GetObjectItem(root, "type");
if (strcmp(type->valuestring, "tts") == 0) {
auto state = cJSON_GetObjectItem(root, "state");
if (strcmp(state->valuestring, "start") == 0) {
Schedule([this]() {
aborted_ = false;
if (device_state_ == kDeviceStateIdle || device_state_ == kDeviceStateListening) {
SetDeviceState(kDeviceStateSpeaking);
}
});
} else if (strcmp(state->valuestring, "stop") == 0) {
Schedule([this]() {
if (device_state_ == kDeviceStateSpeaking) {
if (listening_mode_ == kListeningModeManualStop) {
SetDeviceState(kDeviceStateIdle);
} else {
SetDeviceState(kDeviceStateListening);
}
}
});
} else if (strcmp(state->valuestring, "sentence_start") == 0) {
auto text = cJSON_GetObjectItem(root, "text");
if (cJSON_IsString(text)) {
ESP_LOGI(TAG, "<< %s", text->valuestring);
// 从 AI 回复文本中解析动作代码,发送给 RP2040
// 格式EAL好啦好啦 TTS 的 IgnoreBracketText 会自动跳过()内容
// = UTF-8: 0xEF 0xBC 0x88 = UTF-8: 0xEF 0xBC 0x89
const char* t = text->valuestring;
if ((unsigned char)t[0] == 0xEF && (unsigned char)t[1] == 0xBC && (unsigned char)t[2] == 0x88) {
// 找到全角左括号(,搜索对应的)
const char* content = t + 3;
const char* end = content;
while (*end) {
if ((unsigned char)end[0] == 0xEF && (unsigned char)end[1] == 0xBC && (unsigned char)end[2] == 0x89) {
break;
}
end++;
}
if (*end) {
int code_len = end - content;
if (code_len > 0 && code_len <= 8) {
char action_code[9] = {};
memcpy(action_code, content, code_len);
static const struct { const char* code; const char* cmd; } action_table[] = {
// 单动作
{"EAL", "act_ear_left"},
{"EAR", "act_ear_right"},
{"EYL", "act_eye_left"},
{"EYR", "act_eye_right"},
{"TLL", "act_tilt_left"},
{"TLR", "act_tilt_right"},
{"NOD", "act_nod"},
{"MOU", "act_mouth"},
{"BLK", "act_blink"},
{"ECL", "act_eyes_close"},
{"EOP", "act_eyes_open"},
{"BTN", "act_body_turn"},
// 组合动作
{"EARB", "act_ears_both"},
{"NDBK", "act_nod_blink"},
{"SHAK", "act_shake_head"},
{"LOOK", "act_look_around"},
{"CUTE", "act_cute"},
{"DANC", "act_dance"},
};
for (const auto& entry : action_table) {
if (strcmp(action_code, entry.code) == 0) {
ESP_LOGI(TAG, "Action: %s -> %s", action_code, entry.cmd);
uart_send_string(entry.cmd);
break;
}
}
}
}
}
Schedule([this, display, message = std::string(text->valuestring)]() {
display->SetChatMessage("assistant", message.c_str());
});
}
}
} else if (strcmp(type->valuestring, "stt") == 0) {
auto text = cJSON_GetObjectItem(root, "text");
if (cJSON_IsString(text)) {
ESP_LOGI(TAG, ">> %s", text->valuestring);
// 从用户语音指令中匹配身体动作关键词,直接发送给 RP2040
const char* s = text->valuestring;
const char* action_cmd = nullptr;
// 判断是否包含"耳"相关("耳朵"或"耳"ASR 可能识别为"耳刀"等)
bool has_ear = strstr(s, "") != nullptr;
bool has_left = strstr(s, "") != nullptr;
bool has_right = strstr(s, "") != nullptr;
// 优先匹配带方向的耳朵
if (has_ear && has_left) {
action_cmd = "act_ear_left";
} else if (has_ear && has_right) {
action_cmd = "act_ear_right";
} else if (has_ear) {
action_cmd = "act_ears_both";
// 头部动作
} else if (strstr(s, "点头") || strstr(s, "点一下头") || strstr(s, "点点头")) {
action_cmd = "act_nod";
} else if (strstr(s, "摇头") || strstr(s, "摇摇头") || strstr(s, "摇一下头")) {
action_cmd = "act_shake_head";
} else if (strstr(s, "") && has_left) {
action_cmd = "act_tilt_left";
} else if (strstr(s, "") && has_right) {
action_cmd = "act_tilt_right";
} else if (strstr(s, "歪头") || strstr(s, "歪一下")) {
action_cmd = "act_tilt_left";
// 眼睛动作
} else if ((strstr(s, "") || strstr(s, "")) && has_left) {
action_cmd = "act_eye_left";
} else if ((strstr(s, "") || strstr(s, "")) && has_right) {
action_cmd = "act_eye_right";
} else if (strstr(s, "眨眼") || strstr(s, "眨一下")) {
action_cmd = "act_blink";
} else if (strstr(s, "闭眼") || strstr(s, "闭上眼")) {
action_cmd = "act_eyes_close";
} else if (strstr(s, "睁眼") || strstr(s, "睁开眼")) {
action_cmd = "act_eyes_open";
// 嘴巴
} else if (strstr(s, "张嘴") || strstr(s, "嘴巴") || strstr(s, "张开嘴")) {
action_cmd = "act_mouth";
// 身体
} else if (strstr(s, "转身") || strstr(s, "转动") || strstr(s, "转个") || strstr(s, "转一下")) {
action_cmd = "act_body_turn";
// 组合/特殊
} else if (strstr(s, "卖萌")) {
action_cmd = "act_cute";
} else if (strstr(s, "跳舞") || strstr(s, "跳个舞")) {
action_cmd = "act_dance";
}
if (action_cmd) {
ESP_LOGI(TAG, "Voice action: [%s] -> %s", s, action_cmd);
uart_send_string(action_cmd);
}
Schedule([this, display, message = std::string(text->valuestring)]() {
display->SetChatMessage("user", message.c_str());
});
}
} else if (strcmp(type->valuestring, "llm") == 0) {
// LLM 回复中携带的情绪标签(如 "happy"、"thinking"
auto emotion = cJSON_GetObjectItem(root, "emotion");
if (cJSON_IsString(emotion)) {
ESP_LOGI(TAG, "LLM emotion: [%s]", emotion->valuestring);
Schedule([this, display, emotion_str = std::string(emotion->valuestring)]() {
display->SetEmotion(emotion_str.c_str());
// 将 AI 返回的情绪标签发送给 RP2040实时切换舵机动画表情
uart_send_string(emotion_str.c_str());
});
}
} else if (strcmp(type->valuestring, "mcp") == 0) {
auto payload = cJSON_GetObjectItem(root, "payload");
if (cJSON_IsObject(payload)) {
McpServer::GetInstance().ParseMessage(payload);
}
} else if (strcmp(type->valuestring, "system") == 0) {
auto command = cJSON_GetObjectItem(root, "command");
if (cJSON_IsString(command)) {
ESP_LOGI(TAG, "System command: %s", command->valuestring);
if (strcmp(command->valuestring, "reboot") == 0) {
// Do a reboot if user requests a OTA update
Schedule([this]() {
Reboot();
});
} else {
ESP_LOGW(TAG, "Unknown system command: %s", command->valuestring);
}
}
} else if (strcmp(type->valuestring, "alert") == 0) {
auto status = cJSON_GetObjectItem(root, "status");
auto message = cJSON_GetObjectItem(root, "message");
auto emotion = cJSON_GetObjectItem(root, "emotion");
if (cJSON_IsString(status) && cJSON_IsString(message) && cJSON_IsString(emotion)) {
Alert(status->valuestring, message->valuestring, emotion->valuestring, Lang::Sounds::OGG_VIBRATION);
} else {
ESP_LOGW(TAG, "Alert command requires status, message and emotion");
}
#if CONFIG_RECEIVE_CUSTOM_MESSAGE
} else if (strcmp(type->valuestring, "custom") == 0) {
auto payload = cJSON_GetObjectItem(root, "payload");
ESP_LOGI(TAG, "Received custom message: %s", cJSON_PrintUnformatted(root));
if (cJSON_IsObject(payload)) {
Schedule([this, display, payload_str = std::string(cJSON_PrintUnformatted(payload))]() {
display->SetChatMessage("system", payload_str.c_str());
});
} else {
ESP_LOGW(TAG, "Invalid custom message format: missing payload");
}
#endif
} else {
ESP_LOGW(TAG, "Unknown message type: %s", type->valuestring);
}
});
bool protocol_started = protocol_->Start();
SystemInfo::PrintHeapStats();
SetDeviceState(kDeviceStateIdle);
has_server_time_ = ota.HasServerTime();
if (protocol_started) {
std::string message = std::string(Lang::Strings::VERSION) + ota.GetCurrentVersion();
display->ShowNotification(message.c_str());
display->SetChatMessage("system", "");
// Play the success sound to indicate the device is ready
audio_service_.PlaySound(Lang::Sounds::OGG_SUCCESS);
}
}
// Add a async task to MainLoop
void Application::Schedule(std::function<void()> callback) {
{
std::lock_guard<std::mutex> lock(mutex_);
main_tasks_.push_back(std::move(callback));
}
xEventGroupSetBits(event_group_, MAIN_EVENT_SCHEDULE);
}
// The Main Event Loop controls the chat state and websocket connection
// If other tasks need to access the websocket or chat state,
// they should use Schedule to call this function
void Application::MainEventLoop() {
while (true) {
auto bits = xEventGroupWaitBits(event_group_, MAIN_EVENT_SCHEDULE |
MAIN_EVENT_SEND_AUDIO |
MAIN_EVENT_WAKE_WORD_DETECTED |
MAIN_EVENT_VAD_CHANGE |
MAIN_EVENT_CLOCK_TICK |
MAIN_EVENT_ERROR, pdTRUE, pdFALSE, portMAX_DELAY);
if (bits & MAIN_EVENT_ERROR) {
SetDeviceState(kDeviceStateIdle);
Alert(Lang::Strings::ERROR, last_error_message_.c_str(), "circle_xmark", Lang::Sounds::OGG_EXCLAMATION);
}
if (bits & MAIN_EVENT_SEND_AUDIO) {
while (auto packet = audio_service_.PopPacketFromSendQueue()) {
if (protocol_ && !protocol_->SendAudio(std::move(packet))) {
break;
}
}
}
if (bits & MAIN_EVENT_WAKE_WORD_DETECTED) {
OnWakeWordDetected();
}
if (bits & MAIN_EVENT_VAD_CHANGE) {
if (device_state_ == kDeviceStateListening) {
auto led = Board::GetInstance().GetLed();
led->OnStateChanged();
}
}
if (bits & MAIN_EVENT_SCHEDULE) {
std::unique_lock<std::mutex> lock(mutex_);
auto tasks = std::move(main_tasks_);
lock.unlock();
for (auto& task : tasks) {
task();
}
}
if (bits & MAIN_EVENT_CLOCK_TICK) {
clock_ticks_++;
auto display = Board::GetInstance().GetDisplay();
display->UpdateStatusBar();
// Print the debug info every 10 seconds
if (clock_ticks_ % 10 == 0) {
// SystemInfo::PrintTaskCpuUsage(pdMS_TO_TICKS(1000));
// SystemInfo::PrintTaskList();
SystemInfo::PrintHeapStats();
}
}
}
}
void Application::OnWakeWordDetected() {
if (!protocol_) {
return;
}
if (device_state_ == kDeviceStateIdle) {
audio_service_.EncodeWakeWord();
if (!protocol_->IsAudioChannelOpened()) {
SetDeviceState(kDeviceStateConnecting);
if (!protocol_->OpenAudioChannel()) {
audio_service_.EnableWakeWordDetection(true);
return;
}
}
auto wake_word = audio_service_.GetLastWakeWord();
ESP_LOGI(TAG, "Wake word detected: %s", wake_word.c_str());
#if CONFIG_SEND_WAKE_WORD_DATA
// Encode and send the wake word data to the server
while (auto packet = audio_service_.PopWakeWordPacket()) {
protocol_->SendAudio(std::move(packet));
}
// Set the chat state to wake word detected
protocol_->SendWakeWordDetected(wake_word);
SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime);
#else
SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime);
// Play the pop up sound to indicate the wake word is detected
audio_service_.PlaySound(Lang::Sounds::OGG_POPUP);
#endif
} else if (device_state_ == kDeviceStateSpeaking) {
AbortSpeaking(kAbortReasonWakeWordDetected);
} else if (device_state_ == kDeviceStateActivating) {
SetDeviceState(kDeviceStateIdle);
}
}
void Application::AbortSpeaking(AbortReason reason) {
ESP_LOGI(TAG, "Abort speaking");
aborted_ = true;
if (protocol_) {
protocol_->SendAbortSpeaking(reason);
}
}
void Application::SetListeningMode(ListeningMode mode) {
listening_mode_ = mode;
SetDeviceState(kDeviceStateListening);
}
void Application::SetDeviceState(DeviceState state) {
if (device_state_ == state) {
return;
}
clock_ticks_ = 0;
auto previous_state = device_state_;
device_state_ = state;
ESP_LOGI(TAG, "STATE: %s", STATE_STRINGS[device_state_]);
// Send the state change event
DeviceStateEventManager::GetInstance().PostStateChangeEvent(previous_state, state);
auto& board = Board::GetInstance();
auto display = board.GetDisplay();
auto led = board.GetLed();
led->OnStateChanged();
// 根据设备状态通过 UART 发送指令给 RP2040驱动舵机执行对应动画
switch (state) {
case kDeviceStateUnknown:
case kDeviceStateIdle:
display->SetStatus(Lang::Strings::STANDBY);
display->SetEmotion("neutral");
audio_service_.EnableVoiceProcessing(false);
audio_service_.EnableWakeWordDetection(true);
uart_send_string("idle"); // → RP2040: 进入睡眠姿态(闭眼、耳朵收起)
break;
case kDeviceStateConnecting:
display->SetStatus(Lang::Strings::CONNECTING);
display->SetEmotion("neutral");
display->SetChatMessage("system", "");
break;
case kDeviceStateListening:
display->SetStatus(Lang::Strings::LISTENING);
display->SetEmotion("neutral");
uart_send_string("listening"); // → RP2040: 进入聆听姿态(好奇歪头、眼球随机看)
// Make sure the audio processor is running
if (!audio_service_.IsAudioProcessorRunning()) {
// Send the start listening command
protocol_->SendStartListening(listening_mode_);
audio_service_.EnableVoiceProcessing(true);
audio_service_.EnableWakeWordDetection(false);
}
break;
case kDeviceStateSpeaking:
display->SetStatus(Lang::Strings::SPEAKING);
uart_send_string("speaking"); // → RP2040: 进入说话姿态(嘴巴开合、眼球随机看、身体摆动)
if (listening_mode_ != kListeningModeRealtime) {
audio_service_.EnableVoiceProcessing(false);
// Only AFE wake word can be detected in speaking mode
audio_service_.EnableWakeWordDetection(audio_service_.IsAfeWakeWord());
}
audio_service_.ResetDecoder();
break;
default:
// Do nothing
break;
}
}
void Application::Reboot() {
ESP_LOGI(TAG, "Rebooting...");
// Disconnect the audio channel
if (protocol_ && protocol_->IsAudioChannelOpened()) {
protocol_->CloseAudioChannel();
}
protocol_.reset();
audio_service_.Stop();
vTaskDelay(pdMS_TO_TICKS(1000));
esp_restart();
}
bool Application::UpgradeFirmware(Ota& ota, const std::string& url) {
auto& board = Board::GetInstance();
auto display = board.GetDisplay();
// Use provided URL or get from OTA object
std::string upgrade_url = url.empty() ? ota.GetFirmwareUrl() : url;
std::string version_info = url.empty() ? ota.GetFirmwareVersion() : "(Manual upgrade)";
// Close audio channel if it's open
if (protocol_ && protocol_->IsAudioChannelOpened()) {
ESP_LOGI(TAG, "Closing audio channel before firmware upgrade");
protocol_->CloseAudioChannel();
}
ESP_LOGI(TAG, "Starting firmware upgrade from URL: %s", upgrade_url.c_str());
Alert(Lang::Strings::OTA_UPGRADE, Lang::Strings::UPGRADING, "download", Lang::Sounds::OGG_UPGRADE);
vTaskDelay(pdMS_TO_TICKS(3000));
SetDeviceState(kDeviceStateUpgrading);
std::string message = std::string(Lang::Strings::NEW_VERSION) + version_info;
display->SetChatMessage("system", message.c_str());
board.SetPowerSaveMode(false);
audio_service_.Stop();
vTaskDelay(pdMS_TO_TICKS(1000));
bool upgrade_success = ota.StartUpgradeFromUrl(upgrade_url, [display](int progress, size_t speed) {
std::thread([display, progress, speed]() {
char buffer[32];
snprintf(buffer, sizeof(buffer), "%d%% %uKB/s", progress, speed / 1024);
display->SetChatMessage("system", buffer);
}).detach();
});
if (!upgrade_success) {
// Upgrade failed, restart audio service and continue running
ESP_LOGE(TAG, "Firmware upgrade failed, restarting audio service and continuing operation...");
audio_service_.Start(); // Restart audio service
board.SetPowerSaveMode(true); // Restore power save mode
Alert(Lang::Strings::ERROR, Lang::Strings::UPGRADE_FAILED, "circle_xmark", Lang::Sounds::OGG_EXCLAMATION);
vTaskDelay(pdMS_TO_TICKS(3000));
return false;
} else {
// Upgrade success, reboot immediately
ESP_LOGI(TAG, "Firmware upgrade successful, rebooting...");
display->SetChatMessage("system", "Upgrade successful, rebooting...");
vTaskDelay(pdMS_TO_TICKS(1000)); // Brief pause to show message
Reboot();
return true;
}
}
void Application::WakeWordInvoke(const std::string& wake_word) {
if (!protocol_) {
return;
}
if (device_state_ == kDeviceStateIdle) {
audio_service_.EncodeWakeWord();
if (!protocol_->IsAudioChannelOpened()) {
SetDeviceState(kDeviceStateConnecting);
if (!protocol_->OpenAudioChannel()) {
audio_service_.EnableWakeWordDetection(true);
return;
}
}
ESP_LOGI(TAG, "Wake word detected: %s", wake_word.c_str());
#if CONFIG_USE_AFE_WAKE_WORD || CONFIG_USE_CUSTOM_WAKE_WORD
// Encode and send the wake word data to the server
while (auto packet = audio_service_.PopWakeWordPacket()) {
protocol_->SendAudio(std::move(packet));
}
// Set the chat state to wake word detected
protocol_->SendWakeWordDetected(wake_word);
SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime);
#else
SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime);
// Play the pop up sound to indicate the wake word is detected
audio_service_.PlaySound(Lang::Sounds::OGG_POPUP);
#endif
} else if (device_state_ == kDeviceStateSpeaking) {
Schedule([this]() {
AbortSpeaking(kAbortReasonNone);
});
} else if (device_state_ == kDeviceStateListening) {
Schedule([this]() {
if (protocol_) {
protocol_->CloseAudioChannel();
}
});
}
}
bool Application::CanEnterSleepMode() {
if (device_state_ != kDeviceStateIdle) {
return false;
}
if (protocol_ && protocol_->IsAudioChannelOpened()) {
return false;
}
if (!audio_service_.IsIdle()) {
return false;
}
// Now it is safe to enter sleep mode
return true;
}
void Application::SendMcpMessage(const std::string& payload) {
if (protocol_ == nullptr) {
return;
}
// Make sure you are using main thread to send MCP message
if (xTaskGetCurrentTaskHandle() == main_event_loop_task_handle_) {
protocol_->SendMcpMessage(payload);
} else {
Schedule([this, payload = std::move(payload)]() {
protocol_->SendMcpMessage(payload);
});
}
}
void Application::SetAecMode(AecMode mode) {
aec_mode_ = mode;
Schedule([this]() {
auto& board = Board::GetInstance();
auto display = board.GetDisplay();
switch (aec_mode_) {
case kAecOff:
audio_service_.EnableDeviceAec(false);
display->ShowNotification(Lang::Strings::RTC_MODE_OFF);
break;
case kAecOnServerSide:
audio_service_.EnableDeviceAec(false);
display->ShowNotification(Lang::Strings::RTC_MODE_ON);
break;
case kAecOnDeviceSide:
audio_service_.EnableDeviceAec(true);
display->ShowNotification(Lang::Strings::RTC_MODE_ON);
break;
}
// If the AEC mode is changed, close the audio channel
if (protocol_ && protocol_->IsAudioChannelOpened()) {
protocol_->CloseAudioChannel();
}
});
}
void Application::PlaySound(const std::string_view& sound) {
audio_service_.PlaySound(sound);
}