## 代码变更 ### main/application.cc 修复 T01 probe 日志 %lld 格式 bug(改用 %lu + unsigned long) ### main/boards/common/esp32_camera.cc - 修复 DVP V4L2 单 buffer 导致 DMA 饥饿:req.count 从 1 改为 2 - 修复 [T01] Probe 日志 elapsed=ldus 显示问题(同上格式修复) ### main/face_tracker.cc 多轮迭代: - 新增 frame debug 诊断日志(打印 top-left/center 16B + zero_bytes 统计) - pix_type 尝试路径:YUYV → RGB565LE → RGB565BE → YUYV → RGB888(手动转换) - 手动实现 BT.601 公式 YUYV→RGB888 转换,绕过 ImagePreprocessor 黑盒 - face_tracker 任务从 Core 0 切换到 Core 1,避让 RMT/LED 死锁 - 新增 INFO 级限频日志(每秒 1 条 face 检测记录) - 修复推理时长日志 %lld 格式 bug - 连续 3 秒无人脸时打印 no face detected ### main/idf_component.yml esp_video 升级 1.3.1 → ~1.4.1(手动 patch 修 xclk_freq bug) ### partitions/v2/16m.csv OTA 分区扩容:3.94MB → 5MB,assets 缩到 5.875MB,支持 4.23MB 固件 ### docs/phase-01-face-tracking/PROGRESS.md 更新 Phase 01 执行日志,记录实机调试细节 ## 文档更新 ### Coglet项目分析与开发指南.md 新增第六点五节 完整记录本轮调试的 23 个踩坑,分为: 1. 编译/配置类(5 个):板级重置、依赖冲突、bootloader 缓存、%lld 格式、xclk_freq bug 2. 摄像头数据链路(5 个):sensor driver 启用、V4L2 buffer 数量、分区扩容、镜头保护膜、光照 3. esp-dl 人脸检测(3 个):MSR letterbox 伪影、ESPDET OOD 默认输出、字节序判断 4. 任务调度(3 个):WDT 崩溃、GDMA ISR 崩溃、弱符号链接 5. RP2040 端(4 个):idle 回中、坐标累加撞限位、mpremote 阻塞、两分支代码差异 6. 硬件(3 个):飞线验证、360° 舵机误用、烧录生效验证 附调试方法论 6 条 + 未解决遗留问题 3 条 ## 已解决问题 - ✅ ESP-IDF 编译链路(依赖/分区/格式) - ✅ ESP32 + RP2040 端到端协议(face:x,y UART) - ✅ WDT 崩溃(face_tracker 切到 Core 1) - ✅ RP2040 眼球回中机制(idle 时回正) - ✅ V4L2 双 buffer(DMA 数据更新正常) ## 遗留问题(待解决) - ❌ face 检测 box 固定伪激活(无论 pix_type / 画面内容 / 模型选择) - ❌ GDMA ISR 每 ~30s 触发 InstrFetchProhibited 崩溃 - ⚠️ 端到端验收:眼球未真正跟随人脸 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
280 lines
12 KiB
C++
280 lines
12 KiB
C++
// [T05/T06] 人脸追踪任务
|
||
// 只有 ESP32-S3 + CONFIG_XIAOZHI_ENABLE_FACE_TRACKING=y 才编译完整实现
|
||
// 其他情况编译 3 个空函数,保证链接通过
|
||
|
||
#include "face_tracker.h"
|
||
#include "sdkconfig.h"
|
||
|
||
#if defined(CONFIG_XIAOZHI_ENABLE_FACE_TRACKING) && defined(CONFIG_IDF_TARGET_ESP32S3)
|
||
|
||
#include "human_face_detect.hpp"
|
||
#include "dl_image_define.hpp"
|
||
#include "dl_detect_define.hpp"
|
||
#include "board.h"
|
||
#include "esp32_camera.h"
|
||
|
||
#include <esp_heap_caps.h>
|
||
#include <esp_log.h>
|
||
#include <esp_timer.h>
|
||
#include <freertos/FreeRTOS.h>
|
||
#include <freertos/task.h>
|
||
#include <list>
|
||
#include <new>
|
||
|
||
static const char* TAG = "FaceTracker";
|
||
static TaskHandle_t s_handle = nullptr;
|
||
static volatile bool s_stop = false;
|
||
static float s_last_fps = 0.0f;
|
||
|
||
// T06: uart_send_face 由 T07 在 uart_component.{h,cc} 中提供
|
||
// 此处用前向声明 + 弱符号,让 T07 完成前 face_tracker.cc 仍能通过编译
|
||
// T07 完成后该弱符号被真实实现覆盖,无需改动本文件
|
||
extern "C" __attribute__((weak)) void uart_send_face(int x_offset, int y_offset);
|
||
|
||
// YUYV → RGB888 手动转换(每 4 字节 YUYV 生成 2 像素 6 字节 RGB)
|
||
// 公式(BT.601):R = Y + 1.402*(V-128); G = Y - 0.344*(U-128) - 0.714*(V-128); B = Y + 1.772*(U-128)
|
||
static inline void yuyv_to_rgb888_line(const uint8_t* yuyv, uint8_t* rgb, int pixels) {
|
||
for (int i = 0; i < pixels; i += 2) {
|
||
int y1 = yuyv[0];
|
||
int u = yuyv[1] - 128;
|
||
int y2 = yuyv[2];
|
||
int v = yuyv[3] - 128;
|
||
yuyv += 4;
|
||
// 像素 1
|
||
int r1 = y1 + (359 * v) / 256;
|
||
int g1 = y1 - (88 * u + 183 * v) / 256;
|
||
int b1 = y1 + (454 * u) / 256;
|
||
// 像素 2
|
||
int r2 = y2 + (359 * v) / 256;
|
||
int g2 = y2 - (88 * u + 183 * v) / 256;
|
||
int b2 = y2 + (454 * u) / 256;
|
||
*rgb++ = (uint8_t)(r1 < 0 ? 0 : r1 > 255 ? 255 : r1);
|
||
*rgb++ = (uint8_t)(g1 < 0 ? 0 : g1 > 255 ? 255 : g1);
|
||
*rgb++ = (uint8_t)(b1 < 0 ? 0 : b1 > 255 ? 255 : b1);
|
||
*rgb++ = (uint8_t)(r2 < 0 ? 0 : r2 > 255 ? 255 : r2);
|
||
*rgb++ = (uint8_t)(g2 < 0 ? 0 : g2 > 255 ? 255 : g2);
|
||
*rgb++ = (uint8_t)(b2 < 0 ? 0 : b2 > 255 ? 255 : b2);
|
||
}
|
||
}
|
||
|
||
static void face_tracker_task(void* arg) {
|
||
(void)arg;
|
||
// 等待摄像头 ISP 预热 + 视频流启动稳定
|
||
vTaskDelay(pdMS_TO_TICKS(500));
|
||
|
||
ESP_LOGI(TAG, "face_tracker task started on core %d", xPortGetCoreID());
|
||
|
||
// [2026-04-20 重大修复] 分配 PSRAM RGB888 缓冲区,手动 YUYV→RGB888 转换
|
||
// 绕过 esp-dl ImagePreprocessor 的 YUYV 路径(疑似产生固定激活 bug)
|
||
// 240*240*3 = 172800 字节,PSRAM 8MB 完全够
|
||
constexpr size_t RGB_SIZE = 240 * 240 * 3;
|
||
uint8_t* rgb_buf = (uint8_t*)heap_caps_malloc(RGB_SIZE, MALLOC_CAP_SPIRAM);
|
||
if (!rgb_buf) {
|
||
ESP_LOGE(TAG, "分配 RGB888 缓冲失败");
|
||
vTaskDelete(NULL);
|
||
return;
|
||
}
|
||
ESP_LOGI(TAG, "RGB888 转换缓冲已分配 %u bytes", (unsigned)RGB_SIZE);
|
||
|
||
// 构造检测器:默认 model_type 由 CONFIG_DEFAULT_HUMAN_FACE_DETECT_MODEL 决定
|
||
// lazy_load=true(默认)以减少启动期内存瞬时占用
|
||
auto* detector = new(std::nothrow) HumanFaceDetect();
|
||
if (!detector) {
|
||
ESP_LOGE(TAG, "HumanFaceDetect 构造失败(PSRAM 不足?)");
|
||
multi_heap_info_t info;
|
||
heap_caps_get_info(&info, MALLOC_CAP_SPIRAM);
|
||
ESP_LOGE(TAG, "PSRAM free=%u total_allocated=%u",
|
||
(unsigned)info.total_free_bytes,
|
||
(unsigned)info.total_allocated_bytes);
|
||
s_handle = nullptr;
|
||
vTaskDelete(NULL);
|
||
return;
|
||
}
|
||
|
||
// 一次性打印启动时 PSRAM 占用供诊断(RESEARCH R2 风险跟踪)
|
||
{
|
||
multi_heap_info_t info;
|
||
heap_caps_get_info(&info, MALLOC_CAP_SPIRAM);
|
||
ESP_LOGI(TAG, "PSRAM after detector init: free=%u allocated=%u",
|
||
(unsigned)info.total_free_bytes,
|
||
(unsigned)info.total_allocated_bytes);
|
||
}
|
||
|
||
// 按 Kconfig 配置的 FPS 计算节拍
|
||
const TickType_t period = pdMS_TO_TICKS(1000 / CONFIG_XIAOZHI_FACE_TRACKING_FPS);
|
||
TickType_t last_wake = xTaskGetTickCount();
|
||
int hit = 0, miss = 0;
|
||
int64_t last_report_us = esp_timer_get_time();
|
||
// 实时日志限频:每秒最多 1 条(INFO 级别便于排查)
|
||
int64_t last_detail_log_us = 0;
|
||
int miss_streak = 0; // 连续 miss 计数
|
||
|
||
while (!s_stop) {
|
||
vTaskDelayUntil(&last_wake, period);
|
||
|
||
auto* cam = dynamic_cast<Esp32Camera*>(Board::GetInstance().GetCamera());
|
||
if (!cam) {
|
||
continue;
|
||
}
|
||
|
||
Esp32Camera::FrameRef f;
|
||
if (!cam->CaptureForDetection(&f)) {
|
||
// [T04 策略] 拿不到 mutex(MCP 拍照中)或 DQBUF 失败 → 正常跳帧
|
||
continue;
|
||
}
|
||
|
||
// [Bug 1 诊断] 首次进入循环时,打印前 32 字节 + 中心像素 + 统计,判断数据性质
|
||
// 全零 → 摄像头无数据;规律 → 字节序/格式问题;随机 → 正常但模型看不懂
|
||
static bool debug_dumped = false;
|
||
if (!debug_dumped && f.data && f.len >= 32) {
|
||
debug_dumped = true;
|
||
const uint8_t* d = (const uint8_t*)f.data;
|
||
ESP_LOGI(TAG, "frame debug: size=%u w=%u h=%u len=%u",
|
||
(unsigned)f.width * f.height * 2, f.width, f.height, (unsigned)f.len);
|
||
// 打印左上角 16 字节 + 中心附近 16 字节
|
||
size_t center = (f.width * (f.height / 2) + f.width / 2) * 2;
|
||
if (center + 16 <= f.len) {
|
||
ESP_LOGI(TAG, "top-left 16B: %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x",
|
||
d[0],d[1],d[2],d[3],d[4],d[5],d[6],d[7],d[8],d[9],d[10],d[11],d[12],d[13],d[14],d[15]);
|
||
ESP_LOGI(TAG, "center 16B: %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x",
|
||
d[center],d[center+1],d[center+2],d[center+3],d[center+4],d[center+5],d[center+6],d[center+7],
|
||
d[center+8],d[center+9],d[center+10],d[center+11],d[center+12],d[center+13],d[center+14],d[center+15]);
|
||
}
|
||
// 统计:零字节比例(判断摄像头是否真有数据)
|
||
size_t zero_cnt = 0;
|
||
for (size_t i = 0; i < f.len; i++) if (d[i] == 0) zero_cnt++;
|
||
ESP_LOGI(TAG, "zero bytes: %u / %u (%.1f%%)",
|
||
(unsigned)zero_cnt, (unsigned)f.len, 100.0f * zero_cnt / f.len);
|
||
}
|
||
|
||
// [2026-04-20 重大修复] 手动 YUYV → RGB888 转换,绕过 esp-dl 预处理黑盒
|
||
// 以前:img.pix_type = YUYV,让 ImagePreprocessor 内部做 YUV→RGB,但它产生固定激活
|
||
// 现在:先转成 RGB888 喂给模型,pix_type 标 RGB888,消除预处理不确定性
|
||
{
|
||
const uint8_t* src = (const uint8_t*)f.data;
|
||
uint8_t* dst = rgb_buf;
|
||
for (uint16_t row = 0; row < f.height; row++) {
|
||
yuyv_to_rgb888_line(src, dst, f.width);
|
||
src += f.width * 2; // YUYV 每像素 2 字节
|
||
dst += f.width * 3; // RGB888 每像素 3 字节
|
||
}
|
||
}
|
||
|
||
dl::image::img_t img{};
|
||
img.data = (void*)rgb_buf;
|
||
img.width = f.width;
|
||
img.height = f.height;
|
||
img.pix_type = dl::image::DL_IMAGE_PIX_TYPE_RGB888;
|
||
|
||
int64_t t0 = esp_timer_get_time();
|
||
auto& results = detector->run(img);
|
||
int64_t t1 = esp_timer_get_time();
|
||
|
||
// 立即归还 V4L2 缓冲,避免 face_track 占用时间长
|
||
cam->ReleaseDetectionFrame(f);
|
||
|
||
int64_t now_us = esp_timer_get_time();
|
||
if (results.empty()) {
|
||
miss++;
|
||
miss_streak++;
|
||
// 连续 3 秒无人脸时提示一次(按默认 FPS=10 折算 ~30 帧)
|
||
if (miss_streak == CONFIG_XIAOZHI_FACE_TRACKING_FPS * 3) {
|
||
ESP_LOGI(TAG, "no face detected in last 3s");
|
||
}
|
||
} else {
|
||
hit++;
|
||
miss_streak = 0;
|
||
// PLAN 未明确排序策略,esp-dl 内部 nms 后 list 顺序不稳定
|
||
// 为健壮性,挑 score 最高的那个(避免多脸时摇摆)
|
||
const dl::detect::result_t* best = nullptr;
|
||
for (const auto& r : results) {
|
||
if (best == nullptr || r.score > best->score) {
|
||
best = &r;
|
||
}
|
||
}
|
||
// box: [left_up_x, left_up_y, right_down_x, right_down_y]
|
||
int cx = (best->box[0] + best->box[2]) / 2;
|
||
int cy = (best->box[1] + best->box[3]) / 2;
|
||
// 坐标映射(RESEARCH Pitfall 7):严格保持 cx * 224 / width - 112
|
||
// 对齐 RP2040 端 deadzone=20 / x_adj_factor=10 的基准
|
||
int x_offset = (f.width > 0) ? (cx * 224 / f.width - 112) : 0;
|
||
int y_offset = (f.height > 0) ? (cy * 224 / f.height - 112) : 0;
|
||
|
||
// T07 完成后,uart_send_face 弱符号会被真实实现覆盖
|
||
if (uart_send_face != nullptr) {
|
||
uart_send_face(x_offset, y_offset);
|
||
}
|
||
// INFO 级别实时日志,限频每秒 1 条避免刷屏
|
||
// 修复:%lld 在 nano newlib 下输出异常,改为 %lu + uint32(infer<2s 安全)
|
||
if (now_us - last_detail_log_us > 1000000LL) {
|
||
ESP_LOGI(TAG, "face: score=%.2f box=[%d,%d,%d,%d] offset=(%+d,%+d) infer=%lums",
|
||
best->score,
|
||
best->box[0], best->box[1], best->box[2], best->box[3],
|
||
x_offset, y_offset,
|
||
(unsigned long)((t1 - t0) / 1000));
|
||
last_detail_log_us = now_us;
|
||
}
|
||
// 高频详细日志保留为 LOGD(需 idf.py monitor 按 Ctrl+T Y 切换为 DEBUG)
|
||
ESP_LOGD(TAG, "face score=%.2f offset=(%d,%d) infer=%luus",
|
||
best->score, x_offset, y_offset, (unsigned long)(t1 - t0));
|
||
}
|
||
|
||
// 每 10 秒汇报一次统计(加保底避免除零)
|
||
int64_t now = esp_timer_get_time();
|
||
if (now - last_report_us > 10000000LL) {
|
||
float elapsed_s = (now - last_report_us) / 1e6f;
|
||
if (elapsed_s > 0.1f) {
|
||
s_last_fps = (hit + miss) / elapsed_s;
|
||
ESP_LOGI(TAG, "face stats: hit=%d miss=%d fps=%.1f",
|
||
hit, miss, s_last_fps);
|
||
}
|
||
hit = miss = 0;
|
||
last_report_us = now;
|
||
}
|
||
}
|
||
|
||
delete detector;
|
||
if (rgb_buf) {
|
||
heap_caps_free(rgb_buf);
|
||
}
|
||
ESP_LOGI(TAG, "face_tracker task exiting");
|
||
s_handle = nullptr;
|
||
vTaskDelete(NULL);
|
||
}
|
||
|
||
extern "C" void face_tracker_start(void) {
|
||
if (s_handle != nullptr) {
|
||
ESP_LOGW(TAG, "face_tracker already running, ignore start");
|
||
return;
|
||
}
|
||
s_stop = false;
|
||
// [2026-04-20 修复 WDT 崩溃] 原绑 Core 0 + 优先级 2 会导致:
|
||
// esp-dl 推理占 150ms → 同核的 RMT LED 驱动拿不到 spinlock 超过 300ms →
|
||
// 触发 Interrupt WDT → SetDeviceState 切换时点 LED 崩溃。
|
||
// 改绑到 Core 1(WiFi/RMT/LED 在 Core 0,音频在 Core 1 但只 speaking 时重载)。
|
||
// 栈 8KB:给 esp-dl 推理留充足空间
|
||
BaseType_t ok = xTaskCreatePinnedToCore(
|
||
face_tracker_task, "face_track",
|
||
8 * 1024, nullptr, 2, &s_handle, 1);
|
||
if (ok != pdPASS) {
|
||
ESP_LOGE(TAG, "xTaskCreatePinnedToCore failed");
|
||
s_handle = nullptr;
|
||
}
|
||
}
|
||
|
||
extern "C" void face_tracker_stop(void) {
|
||
s_stop = true;
|
||
}
|
||
|
||
extern "C" float face_tracker_get_fps(void) {
|
||
return s_last_fps;
|
||
}
|
||
|
||
#else // 非 S3 或功能未启用:提供空壳,保证链接通过
|
||
|
||
extern "C" void face_tracker_start(void) {}
|
||
extern "C" void face_tracker_stop(void) {}
|
||
extern "C" float face_tracker_get_fps(void) { return 0.0f; }
|
||
|
||
#endif // CONFIG_XIAOZHI_ENABLE_FACE_TRACKING && CONFIG_IDF_TARGET_ESP32S3
|