// [T05/T06] 人脸追踪任务 // 只有 ESP32-S3 + CONFIG_XIAOZHI_ENABLE_FACE_TRACKING=y 才编译完整实现 // 其他情况编译 3 个空函数,保证链接通过 #include "face_tracker.h" #include "sdkconfig.h" #if defined(CONFIG_XIAOZHI_ENABLE_FACE_TRACKING) && defined(CONFIG_IDF_TARGET_ESP32S3) #include "human_face_detect.hpp" #include "dl_image_define.hpp" #include "dl_detect_define.hpp" #include "board.h" #include "esp32_camera.h" #include #include #include #include #include #include #include static const char* TAG = "FaceTracker"; static TaskHandle_t s_handle = nullptr; static volatile bool s_stop = false; static float s_last_fps = 0.0f; // T06: uart_send_face 由 T07 在 uart_component.{h,cc} 中提供 // 此处用前向声明 + 弱符号,让 T07 完成前 face_tracker.cc 仍能通过编译 // T07 完成后该弱符号被真实实现覆盖,无需改动本文件 extern "C" __attribute__((weak)) void uart_send_face(int x_offset, int y_offset); // YUYV → RGB888 手动转换(每 4 字节 YUYV 生成 2 像素 6 字节 RGB) // 公式(BT.601):R = Y + 1.402*(V-128); G = Y - 0.344*(U-128) - 0.714*(V-128); B = Y + 1.772*(U-128) static inline void yuyv_to_rgb888_line(const uint8_t* yuyv, uint8_t* rgb, int pixels) { for (int i = 0; i < pixels; i += 2) { int y1 = yuyv[0]; int u = yuyv[1] - 128; int y2 = yuyv[2]; int v = yuyv[3] - 128; yuyv += 4; // 像素 1 int r1 = y1 + (359 * v) / 256; int g1 = y1 - (88 * u + 183 * v) / 256; int b1 = y1 + (454 * u) / 256; // 像素 2 int r2 = y2 + (359 * v) / 256; int g2 = y2 - (88 * u + 183 * v) / 256; int b2 = y2 + (454 * u) / 256; *rgb++ = (uint8_t)(r1 < 0 ? 0 : r1 > 255 ? 255 : r1); *rgb++ = (uint8_t)(g1 < 0 ? 0 : g1 > 255 ? 255 : g1); *rgb++ = (uint8_t)(b1 < 0 ? 0 : b1 > 255 ? 255 : b1); *rgb++ = (uint8_t)(r2 < 0 ? 0 : r2 > 255 ? 255 : r2); *rgb++ = (uint8_t)(g2 < 0 ? 0 : g2 > 255 ? 255 : g2); *rgb++ = (uint8_t)(b2 < 0 ? 0 : b2 > 255 ? 255 : b2); } } static void face_tracker_task(void* arg) { (void)arg; // 等待摄像头 ISP 预热 + 视频流启动稳定 vTaskDelay(pdMS_TO_TICKS(500)); ESP_LOGI(TAG, "face_tracker task started on core %d", xPortGetCoreID()); // [2026-04-20 重大修复] 分配 PSRAM RGB888 缓冲区,手动 YUYV→RGB888 转换 // 绕过 esp-dl ImagePreprocessor 的 YUYV 路径(疑似产生固定激活 bug) // 240*240*3 = 172800 字节,PSRAM 8MB 完全够 constexpr size_t RGB_SIZE = 240 * 240 * 3; uint8_t* rgb_buf = (uint8_t*)heap_caps_malloc(RGB_SIZE, MALLOC_CAP_SPIRAM); if (!rgb_buf) { ESP_LOGE(TAG, "分配 RGB888 缓冲失败"); vTaskDelete(NULL); return; } ESP_LOGI(TAG, "RGB888 转换缓冲已分配 %u bytes", (unsigned)RGB_SIZE); // 构造检测器:默认 model_type 由 CONFIG_DEFAULT_HUMAN_FACE_DETECT_MODEL 决定 // lazy_load=true(默认)以减少启动期内存瞬时占用 auto* detector = new(std::nothrow) HumanFaceDetect(); if (!detector) { ESP_LOGE(TAG, "HumanFaceDetect 构造失败(PSRAM 不足?)"); multi_heap_info_t info; heap_caps_get_info(&info, MALLOC_CAP_SPIRAM); ESP_LOGE(TAG, "PSRAM free=%u total_allocated=%u", (unsigned)info.total_free_bytes, (unsigned)info.total_allocated_bytes); s_handle = nullptr; vTaskDelete(NULL); return; } // 一次性打印启动时 PSRAM 占用供诊断(RESEARCH R2 风险跟踪) { multi_heap_info_t info; heap_caps_get_info(&info, MALLOC_CAP_SPIRAM); ESP_LOGI(TAG, "PSRAM after detector init: free=%u allocated=%u", (unsigned)info.total_free_bytes, (unsigned)info.total_allocated_bytes); } // 按 Kconfig 配置的 FPS 计算节拍 const TickType_t period = pdMS_TO_TICKS(1000 / CONFIG_XIAOZHI_FACE_TRACKING_FPS); TickType_t last_wake = xTaskGetTickCount(); int hit = 0, miss = 0; int64_t last_report_us = esp_timer_get_time(); // 实时日志限频:每秒最多 1 条(INFO 级别便于排查) int64_t last_detail_log_us = 0; int miss_streak = 0; // 连续 miss 计数 while (!s_stop) { vTaskDelayUntil(&last_wake, period); auto* cam = dynamic_cast(Board::GetInstance().GetCamera()); if (!cam) { continue; } Esp32Camera::FrameRef f; if (!cam->CaptureForDetection(&f)) { // [T04 策略] 拿不到 mutex(MCP 拍照中)或 DQBUF 失败 → 正常跳帧 continue; } // [Bug 1 诊断] 首次进入循环时,打印前 32 字节 + 中心像素 + 统计,判断数据性质 // 全零 → 摄像头无数据;规律 → 字节序/格式问题;随机 → 正常但模型看不懂 static bool debug_dumped = false; if (!debug_dumped && f.data && f.len >= 32) { debug_dumped = true; const uint8_t* d = (const uint8_t*)f.data; ESP_LOGI(TAG, "frame debug: size=%u w=%u h=%u len=%u", (unsigned)f.width * f.height * 2, f.width, f.height, (unsigned)f.len); // 打印左上角 16 字节 + 中心附近 16 字节 size_t center = (f.width * (f.height / 2) + f.width / 2) * 2; if (center + 16 <= f.len) { ESP_LOGI(TAG, "top-left 16B: %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x", d[0],d[1],d[2],d[3],d[4],d[5],d[6],d[7],d[8],d[9],d[10],d[11],d[12],d[13],d[14],d[15]); ESP_LOGI(TAG, "center 16B: %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x", d[center],d[center+1],d[center+2],d[center+3],d[center+4],d[center+5],d[center+6],d[center+7], d[center+8],d[center+9],d[center+10],d[center+11],d[center+12],d[center+13],d[center+14],d[center+15]); } // 统计:零字节比例(判断摄像头是否真有数据) size_t zero_cnt = 0; for (size_t i = 0; i < f.len; i++) if (d[i] == 0) zero_cnt++; ESP_LOGI(TAG, "zero bytes: %u / %u (%.1f%%)", (unsigned)zero_cnt, (unsigned)f.len, 100.0f * zero_cnt / f.len); } // [2026-04-20 重大修复] 手动 YUYV → RGB888 转换,绕过 esp-dl 预处理黑盒 // 以前:img.pix_type = YUYV,让 ImagePreprocessor 内部做 YUV→RGB,但它产生固定激活 // 现在:先转成 RGB888 喂给模型,pix_type 标 RGB888,消除预处理不确定性 { const uint8_t* src = (const uint8_t*)f.data; uint8_t* dst = rgb_buf; for (uint16_t row = 0; row < f.height; row++) { yuyv_to_rgb888_line(src, dst, f.width); src += f.width * 2; // YUYV 每像素 2 字节 dst += f.width * 3; // RGB888 每像素 3 字节 } } dl::image::img_t img{}; img.data = (void*)rgb_buf; img.width = f.width; img.height = f.height; img.pix_type = dl::image::DL_IMAGE_PIX_TYPE_RGB888; int64_t t0 = esp_timer_get_time(); auto& results = detector->run(img); int64_t t1 = esp_timer_get_time(); // 立即归还 V4L2 缓冲,避免 face_track 占用时间长 cam->ReleaseDetectionFrame(f); int64_t now_us = esp_timer_get_time(); if (results.empty()) { miss++; miss_streak++; // 连续 3 秒无人脸时提示一次(按默认 FPS=10 折算 ~30 帧) if (miss_streak == CONFIG_XIAOZHI_FACE_TRACKING_FPS * 3) { ESP_LOGI(TAG, "no face detected in last 3s"); } } else { hit++; miss_streak = 0; // PLAN 未明确排序策略,esp-dl 内部 nms 后 list 顺序不稳定 // 为健壮性,挑 score 最高的那个(避免多脸时摇摆) const dl::detect::result_t* best = nullptr; for (const auto& r : results) { if (best == nullptr || r.score > best->score) { best = &r; } } // box: [left_up_x, left_up_y, right_down_x, right_down_y] int cx = (best->box[0] + best->box[2]) / 2; int cy = (best->box[1] + best->box[3]) / 2; // 坐标映射(RESEARCH Pitfall 7):严格保持 cx * 224 / width - 112 // 对齐 RP2040 端 deadzone=20 / x_adj_factor=10 的基准 int x_offset = (f.width > 0) ? (cx * 224 / f.width - 112) : 0; int y_offset = (f.height > 0) ? (cy * 224 / f.height - 112) : 0; // T07 完成后,uart_send_face 弱符号会被真实实现覆盖 if (uart_send_face != nullptr) { uart_send_face(x_offset, y_offset); } // INFO 级别实时日志,限频每秒 1 条避免刷屏 // 修复:%lld 在 nano newlib 下输出异常,改为 %lu + uint32(infer<2s 安全) if (now_us - last_detail_log_us > 1000000LL) { ESP_LOGI(TAG, "face: score=%.2f box=[%d,%d,%d,%d] offset=(%+d,%+d) infer=%lums", best->score, best->box[0], best->box[1], best->box[2], best->box[3], x_offset, y_offset, (unsigned long)((t1 - t0) / 1000)); last_detail_log_us = now_us; } // 高频详细日志保留为 LOGD(需 idf.py monitor 按 Ctrl+T Y 切换为 DEBUG) ESP_LOGD(TAG, "face score=%.2f offset=(%d,%d) infer=%luus", best->score, x_offset, y_offset, (unsigned long)(t1 - t0)); } // 每 10 秒汇报一次统计(加保底避免除零) int64_t now = esp_timer_get_time(); if (now - last_report_us > 10000000LL) { float elapsed_s = (now - last_report_us) / 1e6f; if (elapsed_s > 0.1f) { s_last_fps = (hit + miss) / elapsed_s; ESP_LOGI(TAG, "face stats: hit=%d miss=%d fps=%.1f", hit, miss, s_last_fps); } hit = miss = 0; last_report_us = now; } } delete detector; if (rgb_buf) { heap_caps_free(rgb_buf); } ESP_LOGI(TAG, "face_tracker task exiting"); s_handle = nullptr; vTaskDelete(NULL); } extern "C" void face_tracker_start(void) { if (s_handle != nullptr) { ESP_LOGW(TAG, "face_tracker already running, ignore start"); return; } s_stop = false; // [2026-04-20 修复 WDT 崩溃] 原绑 Core 0 + 优先级 2 会导致: // esp-dl 推理占 150ms → 同核的 RMT LED 驱动拿不到 spinlock 超过 300ms → // 触发 Interrupt WDT → SetDeviceState 切换时点 LED 崩溃。 // 改绑到 Core 1(WiFi/RMT/LED 在 Core 0,音频在 Core 1 但只 speaking 时重载)。 // 栈 8KB:给 esp-dl 推理留充足空间 BaseType_t ok = xTaskCreatePinnedToCore( face_tracker_task, "face_track", 8 * 1024, nullptr, 2, &s_handle, 1); if (ok != pdPASS) { ESP_LOGE(TAG, "xTaskCreatePinnedToCore failed"); s_handle = nullptr; } } extern "C" void face_tracker_stop(void) { s_stop = true; } extern "C" float face_tracker_get_fps(void) { return s_last_fps; } #else // 非 S3 或功能未启用:提供空壳,保证链接通过 extern "C" void face_tracker_start(void) {} extern "C" void face_tracker_stop(void) {} extern "C" float face_tracker_get_fps(void) { return 0.0f; } #endif // CONFIG_XIAOZHI_ENABLE_FACE_TRACKING && CONFIG_IDF_TARGET_ESP32S3