CogletESP-camera-version/main/face_tracker.cc
Rdzleo fc07d3806d Phase 01 调试迭代: OV3660 人脸检测集成 + 23 条踩坑经验汇总
## 代码变更

### main/application.cc
修复 T01 probe 日志 %lld 格式 bug(改用 %lu + unsigned long)

### main/boards/common/esp32_camera.cc
- 修复 DVP V4L2 单 buffer 导致 DMA 饥饿:req.count 从 1 改为 2
- 修复 [T01] Probe 日志 elapsed=ldus 显示问题(同上格式修复)

### main/face_tracker.cc
多轮迭代:
- 新增 frame debug 诊断日志(打印 top-left/center 16B + zero_bytes 统计)
- pix_type 尝试路径:YUYV → RGB565LE → RGB565BE → YUYV → RGB888(手动转换)
- 手动实现 BT.601 公式 YUYV→RGB888 转换,绕过 ImagePreprocessor 黑盒
- face_tracker 任务从 Core 0 切换到 Core 1,避让 RMT/LED 死锁
- 新增 INFO 级限频日志(每秒 1 条 face 检测记录)
- 修复推理时长日志 %lld 格式 bug
- 连续 3 秒无人脸时打印 no face detected

### main/idf_component.yml
esp_video 升级 1.3.1 → ~1.4.1(手动 patch 修 xclk_freq bug)

### partitions/v2/16m.csv
OTA 分区扩容:3.94MB → 5MB,assets 缩到 5.875MB,支持 4.23MB 固件

### docs/phase-01-face-tracking/PROGRESS.md
更新 Phase 01 执行日志,记录实机调试细节

## 文档更新

### Coglet项目分析与开发指南.md 新增第六点五节

完整记录本轮调试的 23 个踩坑,分为:
1. 编译/配置类(5 个):板级重置、依赖冲突、bootloader 缓存、%lld 格式、xclk_freq bug
2. 摄像头数据链路(5 个):sensor driver 启用、V4L2 buffer 数量、分区扩容、镜头保护膜、光照
3. esp-dl 人脸检测(3 个):MSR letterbox 伪影、ESPDET OOD 默认输出、字节序判断
4. 任务调度(3 个):WDT 崩溃、GDMA ISR 崩溃、弱符号链接
5. RP2040 端(4 个):idle 回中、坐标累加撞限位、mpremote 阻塞、两分支代码差异
6. 硬件(3 个):飞线验证、360° 舵机误用、烧录生效验证

附调试方法论 6 条 + 未解决遗留问题 3 条

## 已解决问题

-  ESP-IDF 编译链路(依赖/分区/格式)
-  ESP32 + RP2040 端到端协议(face:x,y UART)
-  WDT 崩溃(face_tracker 切到 Core 1)
-  RP2040 眼球回中机制(idle 时回正)
-  V4L2 双 buffer(DMA 数据更新正常)

## 遗留问题(待解决)

-  face 检测 box 固定伪激活(无论 pix_type / 画面内容 / 模型选择)
-  GDMA ISR 每 ~30s 触发 InstrFetchProhibited 崩溃
- ⚠️ 端到端验收:眼球未真正跟随人脸

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-20 18:22:15 +08:00

280 lines
12 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// [T05/T06] 人脸追踪任务
// 只有 ESP32-S3 + CONFIG_XIAOZHI_ENABLE_FACE_TRACKING=y 才编译完整实现
// 其他情况编译 3 个空函数,保证链接通过
#include "face_tracker.h"
#include "sdkconfig.h"
#if defined(CONFIG_XIAOZHI_ENABLE_FACE_TRACKING) && defined(CONFIG_IDF_TARGET_ESP32S3)
#include "human_face_detect.hpp"
#include "dl_image_define.hpp"
#include "dl_detect_define.hpp"
#include "board.h"
#include "esp32_camera.h"
#include <esp_heap_caps.h>
#include <esp_log.h>
#include <esp_timer.h>
#include <freertos/FreeRTOS.h>
#include <freertos/task.h>
#include <list>
#include <new>
static const char* TAG = "FaceTracker";
static TaskHandle_t s_handle = nullptr;
static volatile bool s_stop = false;
static float s_last_fps = 0.0f;
// T06: uart_send_face 由 T07 在 uart_component.{h,cc} 中提供
// 此处用前向声明 + 弱符号,让 T07 完成前 face_tracker.cc 仍能通过编译
// T07 完成后该弱符号被真实实现覆盖,无需改动本文件
extern "C" __attribute__((weak)) void uart_send_face(int x_offset, int y_offset);
// YUYV → RGB888 手动转换(每 4 字节 YUYV 生成 2 像素 6 字节 RGB
// 公式BT.601R = Y + 1.402*(V-128); G = Y - 0.344*(U-128) - 0.714*(V-128); B = Y + 1.772*(U-128)
static inline void yuyv_to_rgb888_line(const uint8_t* yuyv, uint8_t* rgb, int pixels) {
for (int i = 0; i < pixels; i += 2) {
int y1 = yuyv[0];
int u = yuyv[1] - 128;
int y2 = yuyv[2];
int v = yuyv[3] - 128;
yuyv += 4;
// 像素 1
int r1 = y1 + (359 * v) / 256;
int g1 = y1 - (88 * u + 183 * v) / 256;
int b1 = y1 + (454 * u) / 256;
// 像素 2
int r2 = y2 + (359 * v) / 256;
int g2 = y2 - (88 * u + 183 * v) / 256;
int b2 = y2 + (454 * u) / 256;
*rgb++ = (uint8_t)(r1 < 0 ? 0 : r1 > 255 ? 255 : r1);
*rgb++ = (uint8_t)(g1 < 0 ? 0 : g1 > 255 ? 255 : g1);
*rgb++ = (uint8_t)(b1 < 0 ? 0 : b1 > 255 ? 255 : b1);
*rgb++ = (uint8_t)(r2 < 0 ? 0 : r2 > 255 ? 255 : r2);
*rgb++ = (uint8_t)(g2 < 0 ? 0 : g2 > 255 ? 255 : g2);
*rgb++ = (uint8_t)(b2 < 0 ? 0 : b2 > 255 ? 255 : b2);
}
}
static void face_tracker_task(void* arg) {
(void)arg;
// 等待摄像头 ISP 预热 + 视频流启动稳定
vTaskDelay(pdMS_TO_TICKS(500));
ESP_LOGI(TAG, "face_tracker task started on core %d", xPortGetCoreID());
// [2026-04-20 重大修复] 分配 PSRAM RGB888 缓冲区,手动 YUYV→RGB888 转换
// 绕过 esp-dl ImagePreprocessor 的 YUYV 路径(疑似产生固定激活 bug
// 240*240*3 = 172800 字节PSRAM 8MB 完全够
constexpr size_t RGB_SIZE = 240 * 240 * 3;
uint8_t* rgb_buf = (uint8_t*)heap_caps_malloc(RGB_SIZE, MALLOC_CAP_SPIRAM);
if (!rgb_buf) {
ESP_LOGE(TAG, "分配 RGB888 缓冲失败");
vTaskDelete(NULL);
return;
}
ESP_LOGI(TAG, "RGB888 转换缓冲已分配 %u bytes", (unsigned)RGB_SIZE);
// 构造检测器:默认 model_type 由 CONFIG_DEFAULT_HUMAN_FACE_DETECT_MODEL 决定
// lazy_load=true默认以减少启动期内存瞬时占用
auto* detector = new(std::nothrow) HumanFaceDetect();
if (!detector) {
ESP_LOGE(TAG, "HumanFaceDetect 构造失败PSRAM 不足?)");
multi_heap_info_t info;
heap_caps_get_info(&info, MALLOC_CAP_SPIRAM);
ESP_LOGE(TAG, "PSRAM free=%u total_allocated=%u",
(unsigned)info.total_free_bytes,
(unsigned)info.total_allocated_bytes);
s_handle = nullptr;
vTaskDelete(NULL);
return;
}
// 一次性打印启动时 PSRAM 占用供诊断RESEARCH R2 风险跟踪)
{
multi_heap_info_t info;
heap_caps_get_info(&info, MALLOC_CAP_SPIRAM);
ESP_LOGI(TAG, "PSRAM after detector init: free=%u allocated=%u",
(unsigned)info.total_free_bytes,
(unsigned)info.total_allocated_bytes);
}
// 按 Kconfig 配置的 FPS 计算节拍
const TickType_t period = pdMS_TO_TICKS(1000 / CONFIG_XIAOZHI_FACE_TRACKING_FPS);
TickType_t last_wake = xTaskGetTickCount();
int hit = 0, miss = 0;
int64_t last_report_us = esp_timer_get_time();
// 实时日志限频:每秒最多 1 条INFO 级别便于排查)
int64_t last_detail_log_us = 0;
int miss_streak = 0; // 连续 miss 计数
while (!s_stop) {
vTaskDelayUntil(&last_wake, period);
auto* cam = dynamic_cast<Esp32Camera*>(Board::GetInstance().GetCamera());
if (!cam) {
continue;
}
Esp32Camera::FrameRef f;
if (!cam->CaptureForDetection(&f)) {
// [T04 策略] 拿不到 mutexMCP 拍照中)或 DQBUF 失败 → 正常跳帧
continue;
}
// [Bug 1 诊断] 首次进入循环时,打印前 32 字节 + 中心像素 + 统计,判断数据性质
// 全零 → 摄像头无数据;规律 → 字节序/格式问题;随机 → 正常但模型看不懂
static bool debug_dumped = false;
if (!debug_dumped && f.data && f.len >= 32) {
debug_dumped = true;
const uint8_t* d = (const uint8_t*)f.data;
ESP_LOGI(TAG, "frame debug: size=%u w=%u h=%u len=%u",
(unsigned)f.width * f.height * 2, f.width, f.height, (unsigned)f.len);
// 打印左上角 16 字节 + 中心附近 16 字节
size_t center = (f.width * (f.height / 2) + f.width / 2) * 2;
if (center + 16 <= f.len) {
ESP_LOGI(TAG, "top-left 16B: %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x",
d[0],d[1],d[2],d[3],d[4],d[5],d[6],d[7],d[8],d[9],d[10],d[11],d[12],d[13],d[14],d[15]);
ESP_LOGI(TAG, "center 16B: %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x",
d[center],d[center+1],d[center+2],d[center+3],d[center+4],d[center+5],d[center+6],d[center+7],
d[center+8],d[center+9],d[center+10],d[center+11],d[center+12],d[center+13],d[center+14],d[center+15]);
}
// 统计:零字节比例(判断摄像头是否真有数据)
size_t zero_cnt = 0;
for (size_t i = 0; i < f.len; i++) if (d[i] == 0) zero_cnt++;
ESP_LOGI(TAG, "zero bytes: %u / %u (%.1f%%)",
(unsigned)zero_cnt, (unsigned)f.len, 100.0f * zero_cnt / f.len);
}
// [2026-04-20 重大修复] 手动 YUYV → RGB888 转换,绕过 esp-dl 预处理黑盒
// 以前img.pix_type = YUYV让 ImagePreprocessor 内部做 YUV→RGB但它产生固定激活
// 现在:先转成 RGB888 喂给模型pix_type 标 RGB888消除预处理不确定性
{
const uint8_t* src = (const uint8_t*)f.data;
uint8_t* dst = rgb_buf;
for (uint16_t row = 0; row < f.height; row++) {
yuyv_to_rgb888_line(src, dst, f.width);
src += f.width * 2; // YUYV 每像素 2 字节
dst += f.width * 3; // RGB888 每像素 3 字节
}
}
dl::image::img_t img{};
img.data = (void*)rgb_buf;
img.width = f.width;
img.height = f.height;
img.pix_type = dl::image::DL_IMAGE_PIX_TYPE_RGB888;
int64_t t0 = esp_timer_get_time();
auto& results = detector->run(img);
int64_t t1 = esp_timer_get_time();
// 立即归还 V4L2 缓冲,避免 face_track 占用时间长
cam->ReleaseDetectionFrame(f);
int64_t now_us = esp_timer_get_time();
if (results.empty()) {
miss++;
miss_streak++;
// 连续 3 秒无人脸时提示一次(按默认 FPS=10 折算 ~30 帧)
if (miss_streak == CONFIG_XIAOZHI_FACE_TRACKING_FPS * 3) {
ESP_LOGI(TAG, "no face detected in last 3s");
}
} else {
hit++;
miss_streak = 0;
// PLAN 未明确排序策略esp-dl 内部 nms 后 list 顺序不稳定
// 为健壮性,挑 score 最高的那个(避免多脸时摇摆)
const dl::detect::result_t* best = nullptr;
for (const auto& r : results) {
if (best == nullptr || r.score > best->score) {
best = &r;
}
}
// box: [left_up_x, left_up_y, right_down_x, right_down_y]
int cx = (best->box[0] + best->box[2]) / 2;
int cy = (best->box[1] + best->box[3]) / 2;
// 坐标映射RESEARCH Pitfall 7严格保持 cx * 224 / width - 112
// 对齐 RP2040 端 deadzone=20 / x_adj_factor=10 的基准
int x_offset = (f.width > 0) ? (cx * 224 / f.width - 112) : 0;
int y_offset = (f.height > 0) ? (cy * 224 / f.height - 112) : 0;
// T07 完成后uart_send_face 弱符号会被真实实现覆盖
if (uart_send_face != nullptr) {
uart_send_face(x_offset, y_offset);
}
// INFO 级别实时日志,限频每秒 1 条避免刷屏
// 修复:%lld 在 nano newlib 下输出异常,改为 %lu + uint32infer<2s 安全)
if (now_us - last_detail_log_us > 1000000LL) {
ESP_LOGI(TAG, "face: score=%.2f box=[%d,%d,%d,%d] offset=(%+d,%+d) infer=%lums",
best->score,
best->box[0], best->box[1], best->box[2], best->box[3],
x_offset, y_offset,
(unsigned long)((t1 - t0) / 1000));
last_detail_log_us = now_us;
}
// 高频详细日志保留为 LOGD需 idf.py monitor 按 Ctrl+T Y 切换为 DEBUG
ESP_LOGD(TAG, "face score=%.2f offset=(%d,%d) infer=%luus",
best->score, x_offset, y_offset, (unsigned long)(t1 - t0));
}
// 每 10 秒汇报一次统计(加保底避免除零)
int64_t now = esp_timer_get_time();
if (now - last_report_us > 10000000LL) {
float elapsed_s = (now - last_report_us) / 1e6f;
if (elapsed_s > 0.1f) {
s_last_fps = (hit + miss) / elapsed_s;
ESP_LOGI(TAG, "face stats: hit=%d miss=%d fps=%.1f",
hit, miss, s_last_fps);
}
hit = miss = 0;
last_report_us = now;
}
}
delete detector;
if (rgb_buf) {
heap_caps_free(rgb_buf);
}
ESP_LOGI(TAG, "face_tracker task exiting");
s_handle = nullptr;
vTaskDelete(NULL);
}
extern "C" void face_tracker_start(void) {
if (s_handle != nullptr) {
ESP_LOGW(TAG, "face_tracker already running, ignore start");
return;
}
s_stop = false;
// [2026-04-20 修复 WDT 崩溃] 原绑 Core 0 + 优先级 2 会导致:
// esp-dl 推理占 150ms → 同核的 RMT LED 驱动拿不到 spinlock 超过 300ms →
// 触发 Interrupt WDT → SetDeviceState 切换时点 LED 崩溃。
// 改绑到 Core 1WiFi/RMT/LED 在 Core 0音频在 Core 1 但只 speaking 时重载)。
// 栈 8KB给 esp-dl 推理留充足空间
BaseType_t ok = xTaskCreatePinnedToCore(
face_tracker_task, "face_track",
8 * 1024, nullptr, 2, &s_handle, 1);
if (ok != pdPASS) {
ESP_LOGE(TAG, "xTaskCreatePinnedToCore failed");
s_handle = nullptr;
}
}
extern "C" void face_tracker_stop(void) {
s_stop = true;
}
extern "C" float face_tracker_get_fps(void) {
return s_last_fps;
}
#else // 非 S3 或功能未启用:提供空壳,保证链接通过
extern "C" void face_tracker_start(void) {}
extern "C" void face_tracker_stop(void) {}
extern "C" float face_tracker_get_fps(void) { return 0.0f; }
#endif // CONFIG_XIAOZHI_ENABLE_FACE_TRACKING && CONFIG_IDF_TARGET_ESP32S3