feat(self-report): 日志中台自身接入错误上报
Some checks failed
Build and Deploy Log Center / build-and-deploy (push) Failing after 1m19s

- 新增 app/self_report.py:后端运行时异常直接写入自身数据库
- main.py:添加全局异常处理器 + 启动时注册 log_center_api/web 项目
- web/api.ts:添加 reportError 函数 + Axios 5xx 拦截器
- web/main.tsx:添加 window.onerror / onunhandledrejection 全局捕获
- deploy.yaml:CI/CD 流水线各步骤失败时上报(build/deploy)
- 重写 integration_guide.md:按三类上报(runtime/cicd/deployment)重新组织

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
zyc 2026-02-26 10:08:26 +08:00
parent b178d24e73
commit c8204b6d47
6 changed files with 847 additions and 185 deletions

View File

@ -6,6 +6,9 @@ on:
- main
- master
env:
LOG_CENTER_URL: https://qiyuan-log-center-api.airlabs.art
jobs:
build-and-deploy:
runs-on: ubuntu-latest
@ -29,6 +32,7 @@ jobs:
# Build API Image
- name: Build and Push API
id: build-api
uses: docker/build-push-action@v4
with:
context: .
@ -39,6 +43,7 @@ jobs:
# Build Web Image
- name: Build and Push Web
id: build-web
uses: docker/build-push-action@v4
with:
context: ./web
@ -51,6 +56,7 @@ jobs:
# Build K8s Monitor Image
- name: Build and Push K8s Monitor
id: build-monitor
uses: docker/build-push-action@v4
with:
context: ./k8s-monitor
@ -72,9 +78,10 @@ jobs:
kubeconfig: ${{ secrets.KUBE_CONFIG }}
- name: Update K8s Manifests
id: deploy
run: |
echo "Environment: Production"
# Replace image placeholders
sed -i "s|\${CI_REGISTRY_IMAGE}/log-center-api:latest|${{ secrets.SWR_SERVER }}/${{ secrets.SWR_ORG }}/log-center-api:latest|g" k8s/api-deployment-prod.yaml
sed -i "s|\${CI_REGISTRY_IMAGE}/log-center-web:latest|${{ secrets.SWR_SERVER }}/${{ secrets.SWR_ORG }}/log-center-web:latest|g" k8s/web-deployment-prod.yaml
@ -89,3 +96,123 @@ jobs:
# Restart deployments
kubectl rollout restart deployment/log-center-api
kubectl rollout restart deployment/log-center-web
# ==================== CI/CD 错误上报 ====================
- name: Report API Build Failure
if: failure() && steps.build-api.outcome == 'failure'
run: |
curl -s -X POST "${LOG_CENTER_URL}/api/v1/logs/report" \
-H "Content-Type: application/json" \
-d '{
"project_id": "log_center_api",
"environment": "cicd",
"level": "ERROR",
"source": "cicd",
"commit_hash": "'"$GITHUB_SHA"'",
"error": {
"type": "DockerBuildError",
"message": "Log Center API Docker build failed",
"file_path": null,
"line_number": null,
"stack_trace": ["API Docker build step failed. Check CI logs for details."]
},
"context": {
"workflow_name": "'"$GITHUB_WORKFLOW"'",
"job_name": "'"$GITHUB_JOB"'",
"step_name": "Build and Push API",
"run_id": "'"$GITHUB_RUN_ID"'",
"branch": "'"$GITHUB_REF_NAME"'",
"repository": "'"$GITHUB_REPOSITORY"'",
"run_url": "'"$GITHUB_SERVER_URL"'/'"$GITHUB_REPOSITORY"'/actions/runs/'"$GITHUB_RUN_ID"'"
}
}' --connect-timeout 5 --max-time 10 || true
- name: Report Web Build Failure
if: failure() && steps.build-web.outcome == 'failure'
run: |
curl -s -X POST "${LOG_CENTER_URL}/api/v1/logs/report" \
-H "Content-Type: application/json" \
-d '{
"project_id": "log_center_web",
"environment": "cicd",
"level": "ERROR",
"source": "cicd",
"commit_hash": "'"$GITHUB_SHA"'",
"error": {
"type": "DockerBuildError",
"message": "Log Center Web Docker build failed",
"file_path": null,
"line_number": null,
"stack_trace": ["Web Docker build step failed. Check CI logs for details."]
},
"context": {
"workflow_name": "'"$GITHUB_WORKFLOW"'",
"job_name": "'"$GITHUB_JOB"'",
"step_name": "Build and Push Web",
"run_id": "'"$GITHUB_RUN_ID"'",
"branch": "'"$GITHUB_REF_NAME"'",
"repository": "'"$GITHUB_REPOSITORY"'",
"run_url": "'"$GITHUB_SERVER_URL"'/'"$GITHUB_REPOSITORY"'/actions/runs/'"$GITHUB_RUN_ID"'"
}
}' --connect-timeout 5 --max-time 10 || true
- name: Report Monitor Build Failure
if: failure() && steps.build-monitor.outcome == 'failure'
run: |
curl -s -X POST "${LOG_CENTER_URL}/api/v1/logs/report" \
-H "Content-Type: application/json" \
-d '{
"project_id": "log_center_api",
"environment": "cicd",
"level": "ERROR",
"source": "cicd",
"commit_hash": "'"$GITHUB_SHA"'",
"error": {
"type": "DockerBuildError",
"message": "K8s Monitor Docker build failed",
"file_path": null,
"line_number": null,
"stack_trace": ["K8s Monitor Docker build step failed. Check CI logs for details."]
},
"context": {
"workflow_name": "'"$GITHUB_WORKFLOW"'",
"job_name": "'"$GITHUB_JOB"'",
"step_name": "Build and Push K8s Monitor",
"run_id": "'"$GITHUB_RUN_ID"'",
"branch": "'"$GITHUB_REF_NAME"'",
"repository": "'"$GITHUB_REPOSITORY"'",
"run_url": "'"$GITHUB_SERVER_URL"'/'"$GITHUB_REPOSITORY"'/actions/runs/'"$GITHUB_RUN_ID"'"
}
}' --connect-timeout 5 --max-time 10 || true
- name: Report Deploy Failure
if: failure() && steps.deploy.outcome == 'failure'
run: |
curl -s -X POST "${LOG_CENTER_URL}/api/v1/logs/report" \
-H "Content-Type: application/json" \
-d '{
"project_id": "log_center_api",
"environment": "cicd",
"level": "ERROR",
"source": "deployment",
"commit_hash": "'"$GITHUB_SHA"'",
"error": {
"type": "DeployError",
"message": "Log Center K8s deployment failed",
"file_path": null,
"line_number": null,
"stack_trace": ["K8s deployment step failed. Check CI logs for details."]
},
"context": {
"workflow_name": "'"$GITHUB_WORKFLOW"'",
"job_name": "'"$GITHUB_JOB"'",
"step_name": "Update K8s Manifests",
"run_id": "'"$GITHUB_RUN_ID"'",
"branch": "'"$GITHUB_REF_NAME"'",
"repository": "'"$GITHUB_REPOSITORY"'",
"namespace": "default",
"deployment_name": "log-center",
"run_url": "'"$GITHUB_SERVER_URL"'/'"$GITHUB_REPOSITORY"'/actions/runs/'"$GITHUB_RUN_ID"'"
}
}' --connect-timeout 5 --max-time 10 || true

View File

@ -1,10 +1,12 @@
from fastapi import FastAPI, Depends, HTTPException, Query
from fastapi import FastAPI, Depends, HTTPException, Query, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from sqlmodel.ext.asyncio.session import AsyncSession
from sqlmodel import select, func, text
from .database import init_db, get_session
from .database import init_db, get_session, engine
from .models import ErrorLog, ErrorLogCreate, LogStatus, TaskStatusUpdate, RepairTask, RepairTaskCreate, Project, ProjectUpdate
from .gitea_client import GiteaClient
from .self_report import self_report_error
from datetime import datetime, timedelta
from typing import Optional, List
from pydantic import BaseModel
@ -25,6 +27,58 @@ app.add_middleware(
@app.on_event("startup")
async def on_startup():
await init_db()
await _register_self_projects()
async def _register_self_projects():
"""启动时注册日志中台自身的项目信息。"""
from sqlalchemy.orm import sessionmaker as sa_sessionmaker
projects = [
{
"project_id": "log_center_api",
"name": "Log Center API",
"repo_url": "https://gitea.airlabs.art/zyc/qy_gitlab.git",
"local_path": "/Users/maidong/Desktop/zyc/qy_gitlab/log_center",
"description": "日志中台 FastAPI 后端服务",
},
{
"project_id": "log_center_web",
"name": "Log Center Web",
"repo_url": "https://gitea.airlabs.art/zyc/qy_gitlab.git",
"local_path": "/Users/maidong/Desktop/zyc/qy_gitlab/log_center/web",
"description": "日志中台 React 管理端",
},
]
async_session = sa_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
async with async_session() as session:
for proj_data in projects:
stmt = select(Project).where(Project.project_id == proj_data["project_id"])
result = await session.exec(stmt)
existing = result.first()
if not existing:
session.add(Project(**proj_data))
else:
# 更新元信息(仓库地址、路径等可能变更)
for key, value in proj_data.items():
if key != "project_id":
setattr(existing, key, value)
existing.updated_at = datetime.utcnow()
session.add(existing)
await session.commit()
@app.exception_handler(Exception)
async def global_exception_handler(request: Request, exc: Exception):
"""捕获所有未处理异常,上报到自身数据库后返回 500。"""
await self_report_error(exc, context={
"url": str(request.url),
"method": request.method,
})
return JSONResponse(
status_code=500,
content={"detail": "Internal Server Error"},
)
def generate_fingerprint(log: ErrorLogCreate) -> str:
source = log.source

80
app/self_report.py Normal file
View File

@ -0,0 +1,80 @@
"""Log Center 自身错误上报:将 API 运行时异常写入自己的数据库。"""
import os
import sys
import traceback
import hashlib
from datetime import datetime
from sqlmodel import select
from sqlmodel.ext.asyncio.session import AsyncSession
from sqlalchemy.orm import sessionmaker
from .database import engine
from .models import ErrorLog, LogStatus, Project
PROJECT_ID = "log_center_api"
ENVIRONMENT = os.getenv("ENVIRONMENT", "production")
async def self_report_error(exc: Exception, context: dict = None):
"""将 Log Center API 自身的异常写入数据库。
直接操作数据库而非走 HTTP避免循环依赖和额外开销
任何内部错误都静默处理绝不影响主业务
"""
try:
tb = traceback.extract_tb(exc.__traceback__)
last_frame = tb[-1] if tb else None
error_type = type(exc).__name__
file_path = last_frame.filename if last_frame else "unknown"
line_number = last_frame.lineno if last_frame else 0
stack_trace = traceback.format_exception(exc)
# 生成指纹(与 main.py 中 runtime 类型的逻辑一致)
raw = f"{PROJECT_ID}|{error_type}|{file_path}|{line_number}"
fingerprint = hashlib.md5(raw.encode()).hexdigest()
async_session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
async with async_session() as session:
# 去重检查
stmt = select(ErrorLog).where(ErrorLog.fingerprint == fingerprint)
result = await session.exec(stmt)
existing = result.first()
if existing:
if existing.status not in [LogStatus.DEPLOYED, LogStatus.FIXED, LogStatus.VERIFIED]:
return # 已在追踪中,跳过
# 回归:已修复的错误再次出现
existing.status = LogStatus.NEW
existing.timestamp = datetime.utcnow()
existing.retry_count = 0
session.add(existing)
await session.commit()
return
# 确保 project 记录存在
proj_stmt = select(Project).where(Project.project_id == PROJECT_ID)
proj_result = await session.exec(proj_stmt)
if not proj_result.first():
session.add(Project(project_id=PROJECT_ID, name="Log Center API"))
new_log = ErrorLog(
project_id=PROJECT_ID,
environment=ENVIRONMENT,
level="ERROR",
source="runtime",
error_type=error_type,
error_message=str(exc),
file_path=file_path,
line_number=line_number,
stack_trace=stack_trace,
context=context or {},
fingerprint=fingerprint,
timestamp=datetime.utcnow(),
)
session.add(new_log)
await session.commit()
except Exception:
# 自身上报绝不能导致服务崩溃
traceback.print_exc(file=sys.stderr)

View File

@ -2,19 +2,28 @@
## 概述
Log Center 是一个集中式错误日志收集与 AI 自动修复平台,提供 REST API 供各项目接入,实现运行时错误的统一收集、去重、追踪、分析和自动修复
Log Center 是一个集中式错误日志收集与 AI 自动修复平台,提供 REST API 供各项目接入。
接入流程
接入后覆盖三类错误上报
1. 项目首次上报日志时自动注册到 Log Center
2. 在 Web 管理端配置项目的**仓库地址**和**本地路径**
3. Repair Agent 根据配置自动拉取代码并修复 Bug
| 类型 | `source` 值 | 说明 | 触发方式 |
|------|-------------|------|----------|
| 日常运行错误 | `runtime` | 应用运行时的异常Python/JS/Dart | 代码中全局捕获异常自动上报 |
| CI/CD 错误 | `cicd` | 构建、测试、Lint 等流水线失败 | Gitea Actions 步骤失败时上报 |
| K8s 部署错误 | `deployment` | Pod 异常状态CrashLoopBackOff、OOMKilled 等) | K8s CronJob 定时扫描上报 |
**完整接入流程:**
1. **注册项目信息** — 调用 API 提交项目元信息(名称、仓库地址、本地路径)
2. **接入日常运行错误上报** — 在应用代码中集成全局异常捕获
3. **接入 CI/CD 错误上报** — 在 Gitea Actions 流水线中添加失败上报步骤
4. **接入 K8s 部署错误上报** — 在 K8s Pod 健康监控中添加项目映射
> **重要**: 必须先完成步骤 1否则 Repair Agent 无法定位代码仓库和本地路径。
---
## 快速开始
### 服务地址
## 服务地址
| 环境 | API 地址 | 仪表盘 |
|------|----------|--------|
@ -23,23 +32,141 @@ Log Center 是一个集中式错误日志收集与 AI 自动修复平台,提
---
## API 接口
## 步骤 1注册项目信息
### 上报错误日志
首次接入 Log Center 时,**必须先注册项目信息**。这是 Repair Agent 正常工作的前提。
**POST** `/api/v1/logs/report`
### 注册方式
#### 请求体 (JSON)
先上报一条初始化日志(触发项目自动创建),再调用 PUT 接口补全元信息:
```bash
# 1. 上报初始化日志,触发项目自动创建
curl -X POST "${LOG_CENTER_URL}/api/v1/logs/report" \
-H "Content-Type: application/json" \
-d '{
"project_id": "your_project_id",
"environment": "production",
"level": "WARNING",
"error": {
"type": "ProjectInit",
"message": "Project registered to Log Center",
"stack_trace": ["Project initialization"]
},
"repo_url": "https://gitea.airlabs.art/team/your_project.git"
}'
# 2. 补全项目元信息
curl -X PUT "${LOG_CENTER_URL}/api/v1/projects/your_project_id" \
-H "Content-Type: application/json" \
-d '{
"name": "项目显示名称",
"repo_url": "https://gitea.airlabs.art/team/your_project.git",
"local_path": "/absolute/path/to/project",
"description": "项目描述"
}'
```
### 各语言注册示例
#### Python
```python
import requests
import os
LOG_CENTER_URL = os.getenv("LOG_CENTER_URL", "http://localhost:8002")
def register_project():
"""首次接入时调用,注册项目到 Log Center。"""
project_id = "your_project_id"
# 1. 上报初始化日志触发项目创建
requests.post(f"{LOG_CENTER_URL}/api/v1/logs/report", json={
"project_id": project_id,
"environment": os.getenv("ENVIRONMENT", "production"),
"level": "WARNING",
"error": {
"type": "ProjectInit",
"message": "Project registered to Log Center",
"stack_trace": ["Project initialization"],
},
"repo_url": "https://gitea.airlabs.art/team/your_project.git",
}, timeout=5)
# 2. 补全项目元信息
requests.put(f"{LOG_CENTER_URL}/api/v1/projects/{project_id}", json={
"name": "项目显示名称",
"repo_url": "https://gitea.airlabs.art/team/your_project.git",
"local_path": "/absolute/path/to/project",
"description": "项目描述",
}, timeout=5)
```
#### JavaScript / TypeScript
```typescript
const LOG_CENTER_URL = import.meta.env.VITE_LOG_CENTER_URL || 'http://localhost:8002';
async function registerProject() {
const projectId = 'your_project_id';
// 1. 上报初始化日志触发项目创建
await fetch(`${LOG_CENTER_URL}/api/v1/logs/report`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
project_id: projectId,
environment: import.meta.env.MODE,
level: 'WARNING',
error: {
type: 'ProjectInit',
message: 'Project registered to Log Center',
stack_trace: ['Project initialization'],
},
repo_url: 'https://gitea.airlabs.art/team/your_project.git',
}),
});
// 2. 补全项目元信息
await fetch(`${LOG_CENTER_URL}/api/v1/projects/${projectId}`, {
method: 'PUT',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
name: '项目显示名称',
repo_url: 'https://gitea.airlabs.art/team/your_project.git',
local_path: '/absolute/path/to/project',
description: '项目描述',
}),
});
}
```
### 项目元信息字段
| 字段 | 类型 | 必填 | 说明 |
|------|------|------|------|
| `project_id` | string | ✅ | 项目唯一标识,如 `rtc_backend`, `rtc_web` |
| `name` | string | ✅ | 项目显示名称 |
| `repo_url` | string | ✅ | Git 仓库地址Repair Agent 克隆/推送代码用) |
| `local_path` | string | ✅ | 本地项目绝对路径Repair Agent 在此目录执行修复) |
| `description` | string | ❌ | 项目描述 |
---
## 步骤 2接入日常运行错误上报
> `source: "runtime"`(默认值,可不传)
在应用代码中集成全局异常捕获,运行时发生未处理异常时自动上报到 Log Center。
### 上报格式
```json
{
"project_id": "rtc_backend",
"environment": "production",
"level": "ERROR",
"timestamp": "2026-01-30T10:30:00Z",
"version": "1.2.3",
"commit_hash": "abc1234",
"repo_url": "https://gitea.example.com/team/rtc_backend.git",
"error": {
"type": "ValueError",
"message": "invalid literal for int() with base 10: 'abc'",
@ -48,117 +175,34 @@ Log Center 是一个集中式错误日志收集与 AI 自动修复平台,提
"stack_trace": [
"Traceback (most recent call last):",
" File \"apps/users/views.py\", line 42, in get_user",
" user_id = int(request.GET['id'])",
"ValueError: invalid literal for int() with base 10: 'abc'"
]
},
"context": {
"url": "/api/users/123",
"method": "GET",
"user_id": "u_12345",
"request_id": "req_abc123"
"user_id": "u_12345"
}
}
```
#### 字段说明
### Runtime 字段说明
| 字段 | 类型 | 必填 | 说明 |
|------|------|------|------|
| `project_id` | string | ✅ | 项目标识,如 `rtc_backend`, `rtc_web`, `airhub_app` |
| `project_id` | string | ✅ | 项目标识 |
| `environment` | string | ✅ | 环境:`development`, `staging`, `production` |
| `level` | string | ✅ | 日志级别:`ERROR`, `WARNING`, `CRITICAL` |
| `source` | string | ❌ | 来源:`runtime`(默认), `cicd`, `deployment` |
| `timestamp` | string | ❌ | ISO 8601 格式,不传则使用服务器时间 |
| `source` | string | ❌ | 默认 `runtime`,无需传 |
| `timestamp` | string | ❌ | ISO 8601 格式,不传则用服务器时间 |
| `version` | string | ❌ | 应用版本号 |
| `commit_hash` | string | ❌ | Git commit hash |
| `repo_url` | string | ❌ | 项目仓库地址,首次上报时传入可自动关联到项目 |
| `error.type` | string | ✅ | 异常类型,如 `ValueError`, `TypeError` |
| `error.message` | string | ✅ | 错误消息 |
| `error.file_path` | string | ❌ | 出错文件路径runtime 必填cicd/deployment 可选) |
| `error.line_number` | int | ❌ | 出错行号runtime 必填cicd/deployment 可选) |
| `error.file_path` | string | ✅ | 出错文件路径 |
| `error.line_number` | int | ✅ | 出错行号 |
| `error.stack_trace` | array | ✅ | 堆栈信息(数组或字符串) |
| `context` | object | ❌ | 额外上下文信息URL、用户ID等 |
> **项目自动注册**: 首次上报日志时,系统会根据 `project_id` 自动创建项目记录。如果同时传入 `repo_url`,会自动关联仓库地址,供 Repair Agent 使用。
#### 响应
**成功 (200)**
```json
{
"message": "Log reported",
"id": 123
}
```
**已存在 (200)** - 重复错误自动去重
```json
{
"message": "Log deduplicated",
"id": 123,
"status": "NEW"
}
```
---
### 项目管理 API
项目在首次日志上报时自动创建,之后可通过 API 或 Web 管理端编辑配置。
#### 获取项目列表
**GET** `/api/v1/projects`
```json
{
"projects": [
{
"id": 1,
"project_id": "rtc_backend",
"name": "RTC 后端",
"repo_url": "https://gitea.example.com/team/rtc_backend.git",
"local_path": "/home/dev/projects/rtc_backend",
"description": "Django 后端服务",
"created_at": "2026-01-15T08:00:00",
"updated_at": "2026-02-20T10:30:00"
}
]
}
```
#### 获取项目详情
**GET** `/api/v1/projects/{project_id}`
返回单个项目的完整信息。
#### 编辑项目配置
**PUT** `/api/v1/projects/{project_id}`
```json
{
"name": "RTC 后端",
"repo_url": "https://gitea.example.com/team/rtc_backend.git",
"local_path": "/home/dev/projects/rtc_backend",
"description": "Django 后端服务"
}
```
| 字段 | 类型 | 说明 |
|------|------|------|
| `name` | string | 项目显示名称 |
| `repo_url` | string | Git 仓库地址Repair Agent 克隆/推送代码用) |
| `local_path` | string | 本地项目路径Repair Agent 在此目录执行修复) |
| `description` | string | 项目描述 |
> **注意**: `repo_url``local_path` 是 Repair Agent 正常工作的关键配置。未配置时 Agent 将无法执行 Git 操作或定位项目代码。可在 Web 管理端的「项目管理」页面中配置。
---
## 接入示例
| `context` | object | ❌ | 额外上下文信息 |
### Python (Django / FastAPI)
@ -170,7 +214,7 @@ import os
LOG_CENTER_URL = os.getenv("LOG_CENTER_URL", "http://localhost:8002")
def report_error(exc, context=None):
"""上报错误到 Log Center"""
"""上报运行时错误到 Log Center"""
tb = traceback.extract_tb(exc.__traceback__)
last_frame = tb[-1] if tb else None
@ -178,7 +222,6 @@ def report_error(exc, context=None):
"project_id": "rtc_backend",
"environment": os.getenv("ENVIRONMENT", "development"),
"level": "ERROR",
"repo_url": os.getenv("REPO_URL", ""), # 可选:关联仓库地址
"error": {
"type": type(exc).__name__,
"message": str(exc),
@ -193,55 +236,49 @@ def report_error(exc, context=None):
requests.post(
f"{LOG_CENTER_URL}/api/v1/logs/report",
json=payload,
timeout=3 # 快速失败,不影响主业务
timeout=3
)
except Exception:
pass # 静默失败,不影响主业务
```
#### Django 集成位置
修改 `utils/exceptions.py``custom_exception_handler`:
**Django 集成位置** — 修改 `utils/exceptions.py``custom_exception_handler`:
```python
def custom_exception_handler(exc, context):
# 上报到 Log Center (异步,不阻塞响应)
# 上报到 Log Center
report_error(exc, {
"view": str(context.get("view")),
"request_path": context.get("request").path if context.get("request") else None,
})
# ... 原有逻辑不变 ...
```
---
**FastAPI 集成位置** — 添加全局异常处理器:
```python
from fastapi import Request
from fastapi.responses import JSONResponse
@app.exception_handler(Exception)
async def global_exception_handler(request: Request, exc: Exception):
await report_error(exc, context={
"url": str(request.url),
"method": request.method,
})
return JSONResponse(status_code=500, content={"detail": "Internal Server Error"})
```
### JavaScript / TypeScript (React / Vue)
```typescript
const LOG_CENTER_URL = import.meta.env.VITE_LOG_CENTER_URL || 'http://localhost:8002';
interface ErrorPayload {
project_id: string;
environment: string;
level: string;
repo_url?: string;
error: {
type: string;
message: string;
file_path: string;
line_number: number;
stack_trace: string[];
};
context?: Record<string, unknown>;
}
export function reportError(error: Error, context?: Record<string, unknown>) {
// 解析堆栈信息
const stackLines = error.stack?.split('\n') || [];
const match = stackLines[1]?.match(/at\s+.*\s+\((.+):(\d+):\d+\)/);
const payload: ErrorPayload = {
const payload = {
project_id: 'rtc_web',
environment: import.meta.env.MODE,
level: 'ERROR',
@ -259,12 +296,9 @@ export function reportError(error: Error, context?: Record<string, unknown>) {
},
};
// 使用 sendBeacon 确保页面关闭时也能发送
const blob = new Blob([JSON.stringify(payload)], { type: 'application/json' });
if (navigator.sendBeacon) {
navigator.sendBeacon(
`${LOG_CENTER_URL}/api/v1/logs/report`,
JSON.stringify(payload)
);
navigator.sendBeacon(`${LOG_CENTER_URL}/api/v1/logs/report`, blob);
} else {
fetch(`${LOG_CENTER_URL}/api/v1/logs/report`, {
method: 'POST',
@ -276,27 +310,40 @@ export function reportError(error: Error, context?: Record<string, unknown>) {
}
```
#### Axios 拦截器集成
修改 `src/api/request.ts`:
**全局错误捕获** — 在 `main.tsx` / `main.ts` 入口文件中:
```typescript
request.interceptors.response.use(
(response) => { /* ... */ },
(error: AxiosError) => {
// 上报到 Log Center
reportError(error, {
url: error.config?.url,
method: error.config?.method,
status: error.response?.status,
});
// JS 运行时异常
window.onerror = (_message, source, lineno, colno, error) => {
if (error) reportError(error, { source, lineno, colno });
};
// ... 原有逻辑不变 ...
}
);
// 未处理的 Promise rejection
window.onunhandledrejection = (event: PromiseRejectionEvent) => {
const error = event.reason instanceof Error
? event.reason
: new Error(String(event.reason));
reportError(error, { type: 'unhandledrejection' });
};
```
---
**Axios 拦截器** — 在 `api.ts` / `request.ts` 中(仅上报 5xx 服务端错误):
```typescript
api.interceptors.response.use(
(response) => response,
(error: AxiosError) => {
if (error.response && error.response.status >= 500) {
reportError(error, {
api_url: error.config?.url,
method: error.config?.method,
status: error.response.status,
});
}
return Promise.reject(error);
},
);
```
### Flutter (Dart)
@ -311,14 +358,12 @@ const logCenterUrl = String.fromEnvironment(
Future<void> reportError(dynamic error, StackTrace stackTrace, {Map<String, dynamic>? context}) async {
final stackLines = stackTrace.toString().split('\n');
// 解析第一行获取文件和行号
final match = RegExp(r'#0\s+.*\((.+):(\d+):\d+\)').firstMatch(stackLines.first);
final payload = {
'project_id': 'airhub_app',
'environment': const String.fromEnvironment('ENVIRONMENT', defaultValue: 'development'),
'level': 'ERROR',
'repo_url': 'https://gitea.example.com/team/airhub_app.git',
'error': {
'type': error.runtimeType.toString(),
'message': error.toString(),
@ -341,7 +386,7 @@ Future<void> reportError(dynamic error, StackTrace stackTrace, {Map<String, dyna
}
```
`main.dart` 中全局捕获:
**全局捕获** — 在 `main.dart` 中:
```dart
void main() {
@ -359,9 +404,284 @@ void main() {
---
## 步骤 3接入 CI/CD 错误上报
> `source: "cicd"`
在 Gitea Actions 流水线中,为每个关键步骤添加失败上报,构建/测试/Lint 失败时自动上报到 Log Center。
### 上报格式
```json
{
"project_id": "rtc_backend",
"environment": "cicd",
"level": "ERROR",
"source": "cicd",
"commit_hash": "abc1234",
"error": {
"type": "DockerBuildError",
"message": "Docker build failed",
"file_path": null,
"line_number": null,
"stack_trace": ["Build step failed. Check CI logs for details."]
},
"context": {
"workflow_name": "Build and Deploy",
"job_name": "build",
"step_name": "Build Docker Image",
"run_id": "123",
"branch": "main",
"repository": "team/rtc_backend",
"run_url": "https://gitea.airlabs.art/team/rtc_backend/actions/runs/123"
}
}
```
### CI/CD 特有字段
| 字段 | 说明 |
|------|------|
| `source` | **必须**设为 `"cicd"` |
| `environment` | 设为 `"cicd"` |
| `error.type` | 推荐值:`DockerBuildError`, `NpmBuildError`, `TestFailure`, `LintError`, `CIBuildError` |
| `error.file_path` | 可为 `null` |
| `error.line_number` | 可为 `null` |
| `context.workflow_name` | 工作流名称 |
| `context.job_name` | Job 名称 |
| `context.step_name` | 失败的步骤名称 |
| `context.run_id` | 运行 ID |
| `context.run_url` | CI 运行详情链接 |
| `context.branch` | 分支名 |
### Gitea Actions 集成方式
为每个关键步骤添加 `id`,然后在末尾添加条件上报步骤:
```yaml
name: Build and Deploy
on:
push:
branches: [main]
env:
LOG_CENTER_URL: https://qiyuan-log-center-api.airlabs.art
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
# 关键步骤:添加 id
- name: Build Docker Image
id: build
run: docker build -t myapp:latest .
- name: Run Tests
id: test
run: docker run myapp:latest python -m pytest
- name: Deploy
id: deploy
run: kubectl apply -f k8s/
# ===== 失败上报步骤(放在所有关键步骤之后) =====
- name: Report Build Failure
if: failure() && steps.build.outcome == 'failure'
run: |
curl -s -X POST "${LOG_CENTER_URL}/api/v1/logs/report" \
-H "Content-Type: application/json" \
-d '{
"project_id": "'"${GITHUB_REPOSITORY##*/}"'",
"environment": "cicd",
"level": "ERROR",
"source": "cicd",
"commit_hash": "'"$GITHUB_SHA"'",
"error": {
"type": "DockerBuildError",
"message": "Docker build failed",
"file_path": null,
"line_number": null,
"stack_trace": ["Docker build step failed. Check CI logs."]
},
"context": {
"workflow_name": "'"$GITHUB_WORKFLOW"'",
"job_name": "'"$GITHUB_JOB"'",
"step_name": "Build Docker Image",
"run_id": "'"$GITHUB_RUN_ID"'",
"branch": "'"$GITHUB_REF_NAME"'",
"repository": "'"$GITHUB_REPOSITORY"'",
"run_url": "'"$GITHUB_SERVER_URL"'/'"$GITHUB_REPOSITORY"'/actions/runs/'"$GITHUB_RUN_ID"'"
}
}' --connect-timeout 5 --max-time 10 || true
- name: Report Test Failure
if: failure() && steps.test.outcome == 'failure'
run: |
curl -s -X POST "${LOG_CENTER_URL}/api/v1/logs/report" \
-H "Content-Type: application/json" \
-d '{
"project_id": "'"${GITHUB_REPOSITORY##*/}"'",
"environment": "cicd",
"level": "ERROR",
"source": "cicd",
"commit_hash": "'"$GITHUB_SHA"'",
"error": {
"type": "TestFailure",
"message": "Tests failed in CI pipeline",
"file_path": null,
"line_number": null,
"stack_trace": ["Test step failed. Check CI logs."]
},
"context": {
"workflow_name": "'"$GITHUB_WORKFLOW"'",
"job_name": "'"$GITHUB_JOB"'",
"step_name": "Run Tests",
"run_id": "'"$GITHUB_RUN_ID"'",
"branch": "'"$GITHUB_REF_NAME"'",
"repository": "'"$GITHUB_REPOSITORY"'",
"run_url": "'"$GITHUB_SERVER_URL"'/'"$GITHUB_REPOSITORY"'/actions/runs/'"$GITHUB_RUN_ID"'"
}
}' --connect-timeout 5 --max-time 10 || true
```
### 使用 report-cicd-error.sh 脚本
项目提供了通用上报脚本 `scripts/report-cicd-error.sh`(需要 `jq`),可在 CI 步骤中使用:
```bash
# 用法: ./scripts/report-cicd-error.sh <project_id> <step_name> <error_message_or_file>
./scripts/report-cicd-error.sh rtc_backend "Build Docker Image" "Docker build failed: exit code 1"
./scripts/report-cicd-error.sh rtc_backend "Run Tests" /tmp/test-output.log
```
脚本会自动:
- 根据步骤名推断 `error_type`DockerBuildError / NpmBuildError / TestFailure / LintError
- 读取 Gitea Actions 环境变量填充 context
- 如果传入文件路径,读取最后 100 行作为 stack_trace
---
## 步骤 4接入 K8s 部署错误上报
> `source: "deployment"`
通过 K8s Pod 健康监控 CronJob定时扫描集群中异常 Pod 并上报到 Log Center。
### 上报格式
```json
{
"project_id": "rtc_backend",
"environment": "production",
"level": "CRITICAL",
"source": "deployment",
"error": {
"type": "CrashLoopBackOff",
"message": "CrashLoopBackOff: back-off restarting failed container (pod: rtc-backend-xxx, container: api)",
"file_path": null,
"line_number": null,
"stack_trace": ["...容器崩溃前的日志(最后 50 行)..."]
},
"context": {
"namespace": "default",
"pod_name": "rtc-backend-xxx-yyy",
"container_name": "api",
"deployment_name": "rtc-backend",
"restart_count": 5,
"node_name": "node-1"
}
}
```
### Deployment 特有字段
| 字段 | 说明 |
|------|------|
| `source` | **必须**设为 `"deployment"` |
| `level` | 建议 `"CRITICAL"`Pod 异常通常较严重 |
| `error.type` | 取自 K8s 状态:`CrashLoopBackOff`, `OOMKilled`, `ImagePullBackOff`, `ErrImagePull` 等 |
| `error.file_path` | 可为 `null` |
| `error.line_number` | 可为 `null` |
| `error.stack_trace` | 容器崩溃前的日志输出 |
| `context.namespace` | K8s 命名空间 |
| `context.pod_name` | Pod 名称 |
| `context.deployment_name` | Deployment 名称(用于指纹去重) |
| `context.restart_count` | 重启次数 |
| `context.node_name` | 节点名 |
### 监控的异常状态
| 状态 | 说明 |
|------|------|
| `CrashLoopBackOff` | 容器反复崩溃重启 |
| `OOMKilled` | 内存溢出被杀 |
| `ImagePullBackOff` / `ErrImagePull` | 拉取镜像失败 |
| `CreateContainerConfigError` | 容器配置错误 |
| `RunContainerError` | 容器启动失败 |
### 接入方式:添加 Pod label 映射
K8s Monitor CronJob 已在集群中运行,每 5 分钟扫描一次。新项目接入只需在 `k8s-monitor/monitor.py``APP_TO_PROJECT` 字典中添加映射:
```python
# k8s-monitor/monitor.py
APP_TO_PROJECT = {
"rtc-backend": "rtc_backend", # Pod 的 app label -> project_id
"rtc-backend-dev": "rtc_backend",
"rtc-web": "rtc_web",
"rtc-web-dev": "rtc_web",
"log-center-api": "log_center_api",
"log-center-web": "log_center_web",
# 新项目在此添加映射
"your-app": "your_project_id",
}
```
确保你的 K8s Deployment 有 `app` label
```yaml
metadata:
labels:
app: your-app # 与 APP_TO_PROJECT 中的 key 一致
```
### CronJob 部署配置
如果集群中尚未部署 Monitor使用以下配置
```yaml
# k8s/monitor-cronjob.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: pod-health-monitor
spec:
schedule: "*/5 * * * *"
jobTemplate:
spec:
template:
spec:
serviceAccountName: pod-monitor
containers:
- name: monitor
image: your-registry/k8s-pod-monitor:latest
env:
- name: LOG_CENTER_URL
value: "https://qiyuan-log-center-api.airlabs.art"
- name: MONITOR_NAMESPACE
value: "default"
restartPolicy: OnFailure
```
---
## 错误去重机制
Log Center 使用 **指纹(fingerprint)** 对错误进行去重,按来源使用不同的指纹策略:
Log Center 使用 **指纹(fingerprint)** 对错误进行去重,三类来源使用不同的指纹策略:
| 来源 | 指纹组成 |
|------|----------|
@ -369,7 +689,7 @@ Log Center 使用 **指纹(fingerprint)** 对错误进行去重,按来源使
| `cicd` | `MD5(project_id \| cicd \| error_type \| job_name \| step_name)` |
| `deployment` | `MD5(project_id \| deployment \| error_type \| namespace \| deployment_name)` |
相同指纹的错误只记录一次。如果已修复的错误再次出现,系统会自动重新打开(回归检测)。
相同指纹的错误只记录一次。已修复的错误再次出现会自动重新打开(回归检测)。
---
@ -391,39 +711,47 @@ NEW → VERIFYING → PENDING_FIX → FIXING → FIXED → VERIFIED → DEPLOYED
| `FIXED` | 已修复,待验证 |
| `VERIFIED` | 已验证修复 |
| `DEPLOYED` | 已部署上线 |
| `FIX_FAILED` | 修复失败(失败原因会记录到数据库并在 Web 端展示) |
| `FIX_FAILED` | 修复失败 |
---
## Web 管理端
## API 参考
### 项目管理
### 上报错误日志
访问 Web 管理端的「项目管理」页面,可以:
**POST** `/api/v1/logs/report`
- 查看所有已注册项目及其配置状态
- 编辑项目的**仓库地址**`repo_url`)和**本地路径**`local_path`
- 未配置的字段会标红提示
**响应:**
> Repair Agent 依赖这两个配置来定位项目代码和执行 Git 操作。请确保在接入后及时配置。
```json
// 新错误
{"message": "Log reported", "id": 123}
### 缺陷追踪
// 重复错误(去重)
{"message": "Log deduplicated", "id": 123, "status": "NEW"}
- **缺陷列表**: 按项目、来源、状态筛选,修复失败的缺陷会直接显示失败原因
- **缺陷详情**: 查看完整错误信息、堆栈、上下文,以及修复历史记录
- **修复报告**: 查看每轮 AI 修复的详细过程(分析、代码变更、测试结果、失败原因)
// 回归(已修复的错误再次出现)
{"message": "Regression detected, reopened", "id": 123}
```
### 项目管理 API
| 方法 | 路径 | 说明 |
|------|------|------|
| GET | `/api/v1/projects` | 获取项目列表 |
| GET | `/api/v1/projects/{project_id}` | 获取项目详情 |
| PUT | `/api/v1/projects/{project_id}` | 编辑项目配置 |
---
## 最佳实践
1. **首次接入时传入 `repo_url`**: 在日志上报中包含仓库地址,省去手动配置步骤
2. **设置超时**: 上报请求设置 3 秒超时,避免影响主业务
3. **静默失败**: 上报失败不应影响用户体验
4. **异步上报**: 使用异步方式上报,不阻塞主流程
5. **添加上下文**: 尽量添加有用的上下文信息用户ID、请求URL等
6. **环境区分**: 正确设置 `environment` 字段区分开发/生产
7. **配置本地路径**: 接入后在 Web 端配置 `local_path`,使 Repair Agent 能正确定位代码
1. **设置超时**: 上报请求设置 3 秒超时,避免影响主业务
2. **静默失败**: 上报失败不应影响用户体验,所有 catch 块静默处理
3. **异步上报**: 使用异步方式上报,不阻塞主流程
4. **添加上下文**: 尽量添加有用的上下文信息用户ID、请求URL等
5. **环境区分**: 正确设置 `environment` 字段区分开发/生产
6. **CI/CD 用 `|| true`**: 上报步骤失败不应阻断流水线
---
@ -434,7 +762,6 @@ NEW → VERIFYING → PENDING_FIX → FIXING → FIXED → VERIFIED → DEPLOYED
# .env
LOG_CENTER_URL=http://localhost:8002
ENVIRONMENT=development
REPO_URL=https://gitea.example.com/team/rtc_backend.git # 可选
```
### JavaScript 项目
@ -450,8 +777,14 @@ flutter run --dart-define=LOG_CENTER_URL=http://localhost:8002
flutter run --dart-define=ENVIRONMENT=development
```
### Gitea Actions
```yaml
env:
LOG_CENTER_URL: https://qiyuan-log-center-api.airlabs.art
```
---
## API 文档
## 完整 API 文档
完整 API 文档请访问: [http://localhost:8002/docs](http://localhost:8002/docs)
访问: [http://localhost:8002/docs](http://localhost:8002/docs)

View File

@ -1,4 +1,4 @@
import axios from 'axios';
import axios, { AxiosError } from 'axios';
const API_BASE = import.meta.env.VITE_API_BASE_URL || 'https://qiyuan-log-center-api.airlabs.art';
@ -7,6 +7,58 @@ const api = axios.create({
timeout: 10000,
});
// ==================== 自身错误上报 ====================
export function reportError(error: Error, context?: Record<string, unknown>) {
const stackLines = error.stack?.split('\n') || [];
const match = stackLines[1]?.match(/at\s+.*\s+\((.+):(\d+):\d+\)/);
const payload = {
project_id: 'log_center_web',
environment: import.meta.env.MODE,
level: 'ERROR',
error: {
type: error.name,
message: error.message,
file_path: match?.[1] || 'unknown',
line_number: parseInt(match?.[2] || '0'),
stack_trace: stackLines,
},
context: {
url: window.location.href,
userAgent: navigator.userAgent,
...context,
},
};
const blob = new Blob([JSON.stringify(payload)], { type: 'application/json' });
if (navigator.sendBeacon) {
navigator.sendBeacon(`${API_BASE}/api/v1/logs/report`, blob);
} else {
fetch(`${API_BASE}/api/v1/logs/report`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(payload),
keepalive: true,
}).catch(() => {});
}
}
// Axios 拦截器:上报 5xx 服务端错误
api.interceptors.response.use(
(response) => response,
(error: AxiosError) => {
if (error.response && error.response.status >= 500) {
reportError(error, {
api_url: error.config?.url,
method: error.config?.method,
status: error.response.status,
});
}
return Promise.reject(error);
},
);
// Types
export interface ErrorLog {
id: number;

View File

@ -2,6 +2,22 @@ import { StrictMode } from 'react'
import { createRoot } from 'react-dom/client'
import './index.css'
import App from './App.tsx'
import { reportError } from './api'
// 全局错误捕获JS 运行时异常
window.onerror = (_message, source, lineno, colno, error) => {
if (error) {
reportError(error, { source, lineno, colno })
}
}
// 全局错误捕获:未处理的 Promise rejection
window.onunhandledrejection = (event: PromiseRejectionEvent) => {
const error = event.reason instanceof Error
? event.reason
: new Error(String(event.reason))
reportError(error, { type: 'unhandledrejection' })
}
createRoot(document.getElementById('root')!).render(
<StrictMode>