CogletESP-camera-version/main/face_tracker.cc
Rdzleo e95d0c414e Phase 01 批次 1-3: 单摄像头人脸追踪基础设施
实现 ESP32-S3 上单摄像头人脸追踪的核心代码骨架,替代 Grove Vision AI V2
模块,通过 UART 发送人脸坐标驱动 RP2040 控制的眼球/YAW 舵机。

## 规划文档(docs/phase-01-face-tracking/)

- GOAL.md       Phase 目标与 5 大成功标准
- RESEARCH.md   esp-dl v3.2/3.3 + human_face_detect 0.4.1 技术调研
- PLAN.md       15 个原子任务的执行计划(T01-T15)
- PLAN_CHECK.md 计划审查报告(PASS_WITH_NOTES)
- PROGRESS.md   执行进度追踪(批次 1-3 已完成)

## 批次 1:依赖与开关(T01-T03)

- main/idf_component.yml
  新增 esp-dl ~3.3.0 + human_face_detect 0.4.1(仅 S3/P4)
  esp-sr 从 ~2.2.0 升级到 ~2.3.1,解决 esp-dsp 1.6/1.7 版本冲突
- main/Kconfig.projbuild
  新增 CONFIG_XIAOZHI_ENABLE_FACE_TRACKING 开关(默认 y,depends on S3)
  新增 CONFIG_XIAOZHI_FACE_TRACKING_FPS_CHOICE(5/10/15)
- main/boards/common/esp32_camera.{h,cc}
  新增 ProbeFrameCapture() 最小 V4L2 DQBUF/QBUF 探针(T01)
- main/application.cc
  Start() 末尾调用 probe 验证摄像头硬件链路

## 批次 2:人脸检测核心(T04-T06)

- main/boards/common/esp32_camera.{h,cc}
  新增 FrameRef 结构体 + CaptureForDetection/ReleaseDetectionFrame
  双超时 mutex 策略:face_tracker 10ms timeout 跳帧,Capture() RAII guard
- main/face_tracker.{h,cc}(新建)
  Core 0 / 优先级 2 / 栈 8KB 独立任务
  集成 esp-dl HumanFaceDetect 推理
  坐标归一化 cx*224/W-112,匹配 RP2040 pixel_centre=112
  多人脸遍历挑 score 最高,避免多脸时眼球摇摆
  三重保护:Kconfig depends on S3 + 源文件 #if 守卫 + CMake 条件排除
- main/CMakeLists.txt
  非 S3 目标从 SOURCES 移除 face_tracker.cc

## 批次 3:UART 协议扩展(T07)

- main/uart_component.{h,cc}
  新增 uart_send_face(x,y) 发送 face:x,y\r\n 协议
  extern "C" 链接名配合 face_tracker 的弱符号声明
  全局 TX mutex 保护所有 UART 写入,防并发帧交织
  uart_send_string 同步加锁保持一致性

## 编译验证

idf.py build 通过,固件 2.51MB / 剩余 1.46MB (36% free)
当前 face_tracker 未被 application 激活(留到 T11),
UART/摄像头现有功能零影响。

## 未完成(下次继续)

- T01 硬件 probe 实机验证
- T08-T10 RP2040 端 parse_face + facetrack 双数据源改造
- T11-T15 application 接入 + 端到端联调 + 性能调优 + 最终验收

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-17 18:24:27 +08:00

180 lines
6.4 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// [T05/T06] 人脸追踪任务
// 只有 ESP32-S3 + CONFIG_XIAOZHI_ENABLE_FACE_TRACKING=y 才编译完整实现
// 其他情况编译 3 个空函数,保证链接通过
#include "face_tracker.h"
#include "sdkconfig.h"
#if defined(CONFIG_XIAOZHI_ENABLE_FACE_TRACKING) && defined(CONFIG_IDF_TARGET_ESP32S3)
#include "human_face_detect.hpp"
#include "dl_image_define.hpp"
#include "dl_detect_define.hpp"
#include "board.h"
#include "esp32_camera.h"
#include <esp_heap_caps.h>
#include <esp_log.h>
#include <esp_timer.h>
#include <freertos/FreeRTOS.h>
#include <freertos/task.h>
#include <list>
#include <new>
static const char* TAG = "FaceTracker";
static TaskHandle_t s_handle = nullptr;
static volatile bool s_stop = false;
static float s_last_fps = 0.0f;
// T06: uart_send_face 由 T07 在 uart_component.{h,cc} 中提供
// 此处用前向声明 + 弱符号,让 T07 完成前 face_tracker.cc 仍能通过编译
// T07 完成后该弱符号被真实实现覆盖,无需改动本文件
extern "C" __attribute__((weak)) void uart_send_face(int x_offset, int y_offset);
static void face_tracker_task(void* arg) {
(void)arg;
// 等待摄像头 ISP 预热 + 视频流启动稳定
vTaskDelay(pdMS_TO_TICKS(500));
ESP_LOGI(TAG, "face_tracker task started on core %d", xPortGetCoreID());
// 构造检测器:默认 model_type 由 CONFIG_DEFAULT_HUMAN_FACE_DETECT_MODEL 决定
// lazy_load=true默认以减少启动期内存瞬时占用
auto* detector = new(std::nothrow) HumanFaceDetect();
if (!detector) {
ESP_LOGE(TAG, "HumanFaceDetect 构造失败PSRAM 不足?)");
multi_heap_info_t info;
heap_caps_get_info(&info, MALLOC_CAP_SPIRAM);
ESP_LOGE(TAG, "PSRAM free=%u total_allocated=%u",
(unsigned)info.total_free_bytes,
(unsigned)info.total_allocated_bytes);
s_handle = nullptr;
vTaskDelete(NULL);
return;
}
// 一次性打印启动时 PSRAM 占用供诊断RESEARCH R2 风险跟踪)
{
multi_heap_info_t info;
heap_caps_get_info(&info, MALLOC_CAP_SPIRAM);
ESP_LOGI(TAG, "PSRAM after detector init: free=%u allocated=%u",
(unsigned)info.total_free_bytes,
(unsigned)info.total_allocated_bytes);
}
// 按 Kconfig 配置的 FPS 计算节拍
const TickType_t period = pdMS_TO_TICKS(1000 / CONFIG_XIAOZHI_FACE_TRACKING_FPS);
TickType_t last_wake = xTaskGetTickCount();
int hit = 0, miss = 0;
int64_t last_report_us = esp_timer_get_time();
while (!s_stop) {
vTaskDelayUntil(&last_wake, period);
auto* cam = dynamic_cast<Esp32Camera*>(Board::GetInstance().GetCamera());
if (!cam) {
continue;
}
Esp32Camera::FrameRef f;
if (!cam->CaptureForDetection(&f)) {
// [T04 策略] 拿不到 mutexMCP 拍照中)或 DQBUF 失败 → 正常跳帧
continue;
}
// 组装 esp-dl 图像描述符
// RESEARCH Pitfall A1先假定 YUYV若首轮 score 低于 0.5 可改 RGB565LE决策点 D-B
dl::image::img_t img{};
img.data = (void*)f.data;
img.width = f.width;
img.height = f.height;
img.pix_type = dl::image::DL_IMAGE_PIX_TYPE_YUYV;
int64_t t0 = esp_timer_get_time();
auto& results = detector->run(img);
int64_t t1 = esp_timer_get_time();
// 立即归还 V4L2 缓冲,避免 face_track 占用时间长
cam->ReleaseDetectionFrame(f);
if (results.empty()) {
miss++;
} else {
hit++;
// PLAN 未明确排序策略esp-dl 内部 nms 后 list 顺序不稳定
// 为健壮性,挑 score 最高的那个(避免多脸时摇摆)
const dl::detect::result_t* best = nullptr;
for (const auto& r : results) {
if (best == nullptr || r.score > best->score) {
best = &r;
}
}
// box: [left_up_x, left_up_y, right_down_x, right_down_y]
int cx = (best->box[0] + best->box[2]) / 2;
int cy = (best->box[1] + best->box[3]) / 2;
// 坐标映射RESEARCH Pitfall 7严格保持 cx * 224 / width - 112
// 对齐 RP2040 端 deadzone=20 / x_adj_factor=10 的基准
int x_offset = (f.width > 0) ? (cx * 224 / f.width - 112) : 0;
int y_offset = (f.height > 0) ? (cy * 224 / f.height - 112) : 0;
// T07 完成后uart_send_face 弱符号会被真实实现覆盖
if (uart_send_face != nullptr) {
uart_send_face(x_offset, y_offset);
}
ESP_LOGD(TAG, "face score=%.2f offset=(%d,%d) infer=%lldus",
best->score, x_offset, y_offset, (long long)(t1 - t0));
}
// 每 10 秒汇报一次统计(加保底避免除零)
int64_t now = esp_timer_get_time();
if (now - last_report_us > 10000000LL) {
float elapsed_s = (now - last_report_us) / 1e6f;
if (elapsed_s > 0.1f) {
s_last_fps = (hit + miss) / elapsed_s;
ESP_LOGI(TAG, "face stats: hit=%d miss=%d fps=%.1f",
hit, miss, s_last_fps);
}
hit = miss = 0;
last_report_us = now;
}
}
delete detector;
ESP_LOGI(TAG, "face_tracker task exiting");
s_handle = nullptr;
vTaskDelete(NULL);
}
extern "C" void face_tracker_start(void) {
if (s_handle != nullptr) {
ESP_LOGW(TAG, "face_tracker already running, ignore start");
return;
}
s_stop = false;
// Core 0 + 优先级 2低于 LVGL / 音频,避免抢占主路径
// 栈 8KB给 esp-dl 推理留充足空间
BaseType_t ok = xTaskCreatePinnedToCore(
face_tracker_task, "face_track",
8 * 1024, nullptr, 2, &s_handle, 0);
if (ok != pdPASS) {
ESP_LOGE(TAG, "xTaskCreatePinnedToCore failed");
s_handle = nullptr;
}
}
extern "C" void face_tracker_stop(void) {
s_stop = true;
}
extern "C" float face_tracker_get_fps(void) {
return s_last_fps;
}
#else // 非 S3 或功能未启用:提供空壳,保证链接通过
extern "C" void face_tracker_start(void) {}
extern "C" void face_tracker_stop(void) {}
extern "C" float face_tracker_get_fps(void) { return 0.0f; }
#endif // CONFIG_XIAOZHI_ENABLE_FACE_TRACKING && CONFIG_IDF_TARGET_ESP32S3