实现 ESP32-S3 上单摄像头人脸追踪的核心代码骨架,替代 Grove Vision AI V2
模块,通过 UART 发送人脸坐标驱动 RP2040 控制的眼球/YAW 舵机。
## 规划文档(docs/phase-01-face-tracking/)
- GOAL.md Phase 目标与 5 大成功标准
- RESEARCH.md esp-dl v3.2/3.3 + human_face_detect 0.4.1 技术调研
- PLAN.md 15 个原子任务的执行计划(T01-T15)
- PLAN_CHECK.md 计划审查报告(PASS_WITH_NOTES)
- PROGRESS.md 执行进度追踪(批次 1-3 已完成)
## 批次 1:依赖与开关(T01-T03)
- main/idf_component.yml
新增 esp-dl ~3.3.0 + human_face_detect 0.4.1(仅 S3/P4)
esp-sr 从 ~2.2.0 升级到 ~2.3.1,解决 esp-dsp 1.6/1.7 版本冲突
- main/Kconfig.projbuild
新增 CONFIG_XIAOZHI_ENABLE_FACE_TRACKING 开关(默认 y,depends on S3)
新增 CONFIG_XIAOZHI_FACE_TRACKING_FPS_CHOICE(5/10/15)
- main/boards/common/esp32_camera.{h,cc}
新增 ProbeFrameCapture() 最小 V4L2 DQBUF/QBUF 探针(T01)
- main/application.cc
Start() 末尾调用 probe 验证摄像头硬件链路
## 批次 2:人脸检测核心(T04-T06)
- main/boards/common/esp32_camera.{h,cc}
新增 FrameRef 结构体 + CaptureForDetection/ReleaseDetectionFrame
双超时 mutex 策略:face_tracker 10ms timeout 跳帧,Capture() RAII guard
- main/face_tracker.{h,cc}(新建)
Core 0 / 优先级 2 / 栈 8KB 独立任务
集成 esp-dl HumanFaceDetect 推理
坐标归一化 cx*224/W-112,匹配 RP2040 pixel_centre=112
多人脸遍历挑 score 最高,避免多脸时眼球摇摆
三重保护:Kconfig depends on S3 + 源文件 #if 守卫 + CMake 条件排除
- main/CMakeLists.txt
非 S3 目标从 SOURCES 移除 face_tracker.cc
## 批次 3:UART 协议扩展(T07)
- main/uart_component.{h,cc}
新增 uart_send_face(x,y) 发送 face:x,y\r\n 协议
extern "C" 链接名配合 face_tracker 的弱符号声明
全局 TX mutex 保护所有 UART 写入,防并发帧交织
uart_send_string 同步加锁保持一致性
## 编译验证
idf.py build 通过,固件 2.51MB / 剩余 1.46MB (36% free)
当前 face_tracker 未被 application 激活(留到 T11),
UART/摄像头现有功能零影响。
## 未完成(下次继续)
- T01 硬件 probe 实机验证
- T08-T10 RP2040 端 parse_face + facetrack 双数据源改造
- T11-T15 application 接入 + 端到端联调 + 性能调优 + 最终验收
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
180 lines
6.4 KiB
C++
180 lines
6.4 KiB
C++
// [T05/T06] 人脸追踪任务
|
||
// 只有 ESP32-S3 + CONFIG_XIAOZHI_ENABLE_FACE_TRACKING=y 才编译完整实现
|
||
// 其他情况编译 3 个空函数,保证链接通过
|
||
|
||
#include "face_tracker.h"
|
||
#include "sdkconfig.h"
|
||
|
||
#if defined(CONFIG_XIAOZHI_ENABLE_FACE_TRACKING) && defined(CONFIG_IDF_TARGET_ESP32S3)
|
||
|
||
#include "human_face_detect.hpp"
|
||
#include "dl_image_define.hpp"
|
||
#include "dl_detect_define.hpp"
|
||
#include "board.h"
|
||
#include "esp32_camera.h"
|
||
|
||
#include <esp_heap_caps.h>
|
||
#include <esp_log.h>
|
||
#include <esp_timer.h>
|
||
#include <freertos/FreeRTOS.h>
|
||
#include <freertos/task.h>
|
||
#include <list>
|
||
#include <new>
|
||
|
||
static const char* TAG = "FaceTracker";
|
||
static TaskHandle_t s_handle = nullptr;
|
||
static volatile bool s_stop = false;
|
||
static float s_last_fps = 0.0f;
|
||
|
||
// T06: uart_send_face 由 T07 在 uart_component.{h,cc} 中提供
|
||
// 此处用前向声明 + 弱符号,让 T07 完成前 face_tracker.cc 仍能通过编译
|
||
// T07 完成后该弱符号被真实实现覆盖,无需改动本文件
|
||
extern "C" __attribute__((weak)) void uart_send_face(int x_offset, int y_offset);
|
||
|
||
static void face_tracker_task(void* arg) {
|
||
(void)arg;
|
||
// 等待摄像头 ISP 预热 + 视频流启动稳定
|
||
vTaskDelay(pdMS_TO_TICKS(500));
|
||
|
||
ESP_LOGI(TAG, "face_tracker task started on core %d", xPortGetCoreID());
|
||
|
||
// 构造检测器:默认 model_type 由 CONFIG_DEFAULT_HUMAN_FACE_DETECT_MODEL 决定
|
||
// lazy_load=true(默认)以减少启动期内存瞬时占用
|
||
auto* detector = new(std::nothrow) HumanFaceDetect();
|
||
if (!detector) {
|
||
ESP_LOGE(TAG, "HumanFaceDetect 构造失败(PSRAM 不足?)");
|
||
multi_heap_info_t info;
|
||
heap_caps_get_info(&info, MALLOC_CAP_SPIRAM);
|
||
ESP_LOGE(TAG, "PSRAM free=%u total_allocated=%u",
|
||
(unsigned)info.total_free_bytes,
|
||
(unsigned)info.total_allocated_bytes);
|
||
s_handle = nullptr;
|
||
vTaskDelete(NULL);
|
||
return;
|
||
}
|
||
|
||
// 一次性打印启动时 PSRAM 占用供诊断(RESEARCH R2 风险跟踪)
|
||
{
|
||
multi_heap_info_t info;
|
||
heap_caps_get_info(&info, MALLOC_CAP_SPIRAM);
|
||
ESP_LOGI(TAG, "PSRAM after detector init: free=%u allocated=%u",
|
||
(unsigned)info.total_free_bytes,
|
||
(unsigned)info.total_allocated_bytes);
|
||
}
|
||
|
||
// 按 Kconfig 配置的 FPS 计算节拍
|
||
const TickType_t period = pdMS_TO_TICKS(1000 / CONFIG_XIAOZHI_FACE_TRACKING_FPS);
|
||
TickType_t last_wake = xTaskGetTickCount();
|
||
int hit = 0, miss = 0;
|
||
int64_t last_report_us = esp_timer_get_time();
|
||
|
||
while (!s_stop) {
|
||
vTaskDelayUntil(&last_wake, period);
|
||
|
||
auto* cam = dynamic_cast<Esp32Camera*>(Board::GetInstance().GetCamera());
|
||
if (!cam) {
|
||
continue;
|
||
}
|
||
|
||
Esp32Camera::FrameRef f;
|
||
if (!cam->CaptureForDetection(&f)) {
|
||
// [T04 策略] 拿不到 mutex(MCP 拍照中)或 DQBUF 失败 → 正常跳帧
|
||
continue;
|
||
}
|
||
|
||
// 组装 esp-dl 图像描述符
|
||
// RESEARCH Pitfall A1:先假定 YUYV;若首轮 score 低于 0.5 可改 RGB565LE(决策点 D-B)
|
||
dl::image::img_t img{};
|
||
img.data = (void*)f.data;
|
||
img.width = f.width;
|
||
img.height = f.height;
|
||
img.pix_type = dl::image::DL_IMAGE_PIX_TYPE_YUYV;
|
||
|
||
int64_t t0 = esp_timer_get_time();
|
||
auto& results = detector->run(img);
|
||
int64_t t1 = esp_timer_get_time();
|
||
|
||
// 立即归还 V4L2 缓冲,避免 face_track 占用时间长
|
||
cam->ReleaseDetectionFrame(f);
|
||
|
||
if (results.empty()) {
|
||
miss++;
|
||
} else {
|
||
hit++;
|
||
// PLAN 未明确排序策略,esp-dl 内部 nms 后 list 顺序不稳定
|
||
// 为健壮性,挑 score 最高的那个(避免多脸时摇摆)
|
||
const dl::detect::result_t* best = nullptr;
|
||
for (const auto& r : results) {
|
||
if (best == nullptr || r.score > best->score) {
|
||
best = &r;
|
||
}
|
||
}
|
||
// box: [left_up_x, left_up_y, right_down_x, right_down_y]
|
||
int cx = (best->box[0] + best->box[2]) / 2;
|
||
int cy = (best->box[1] + best->box[3]) / 2;
|
||
// 坐标映射(RESEARCH Pitfall 7):严格保持 cx * 224 / width - 112
|
||
// 对齐 RP2040 端 deadzone=20 / x_adj_factor=10 的基准
|
||
int x_offset = (f.width > 0) ? (cx * 224 / f.width - 112) : 0;
|
||
int y_offset = (f.height > 0) ? (cy * 224 / f.height - 112) : 0;
|
||
|
||
// T07 完成后,uart_send_face 弱符号会被真实实现覆盖
|
||
if (uart_send_face != nullptr) {
|
||
uart_send_face(x_offset, y_offset);
|
||
}
|
||
ESP_LOGD(TAG, "face score=%.2f offset=(%d,%d) infer=%lldus",
|
||
best->score, x_offset, y_offset, (long long)(t1 - t0));
|
||
}
|
||
|
||
// 每 10 秒汇报一次统计(加保底避免除零)
|
||
int64_t now = esp_timer_get_time();
|
||
if (now - last_report_us > 10000000LL) {
|
||
float elapsed_s = (now - last_report_us) / 1e6f;
|
||
if (elapsed_s > 0.1f) {
|
||
s_last_fps = (hit + miss) / elapsed_s;
|
||
ESP_LOGI(TAG, "face stats: hit=%d miss=%d fps=%.1f",
|
||
hit, miss, s_last_fps);
|
||
}
|
||
hit = miss = 0;
|
||
last_report_us = now;
|
||
}
|
||
}
|
||
|
||
delete detector;
|
||
ESP_LOGI(TAG, "face_tracker task exiting");
|
||
s_handle = nullptr;
|
||
vTaskDelete(NULL);
|
||
}
|
||
|
||
extern "C" void face_tracker_start(void) {
|
||
if (s_handle != nullptr) {
|
||
ESP_LOGW(TAG, "face_tracker already running, ignore start");
|
||
return;
|
||
}
|
||
s_stop = false;
|
||
// Core 0 + 优先级 2:低于 LVGL / 音频,避免抢占主路径
|
||
// 栈 8KB:给 esp-dl 推理留充足空间
|
||
BaseType_t ok = xTaskCreatePinnedToCore(
|
||
face_tracker_task, "face_track",
|
||
8 * 1024, nullptr, 2, &s_handle, 0);
|
||
if (ok != pdPASS) {
|
||
ESP_LOGE(TAG, "xTaskCreatePinnedToCore failed");
|
||
s_handle = nullptr;
|
||
}
|
||
}
|
||
|
||
extern "C" void face_tracker_stop(void) {
|
||
s_stop = true;
|
||
}
|
||
|
||
extern "C" float face_tracker_get_fps(void) {
|
||
return s_last_fps;
|
||
}
|
||
|
||
#else // 非 S3 或功能未启用:提供空壳,保证链接通过
|
||
|
||
extern "C" void face_tracker_start(void) {}
|
||
extern "C" void face_tracker_stop(void) {}
|
||
extern "C" float face_tracker_get_fps(void) { return 0.0f; }
|
||
|
||
#endif // CONFIG_XIAOZHI_ENABLE_FACE_TRACKING && CONFIG_IDF_TARGET_ESP32S3
|