feat(self-report): 日志中台自身接入错误上报
Some checks failed
Build and Deploy Log Center / build-and-deploy (push) Failing after 1m19s
Some checks failed
Build and Deploy Log Center / build-and-deploy (push) Failing after 1m19s
- 新增 app/self_report.py:后端运行时异常直接写入自身数据库 - main.py:添加全局异常处理器 + 启动时注册 log_center_api/web 项目 - web/api.ts:添加 reportError 函数 + Axios 5xx 拦截器 - web/main.tsx:添加 window.onerror / onunhandledrejection 全局捕获 - deploy.yaml:CI/CD 流水线各步骤失败时上报(build/deploy) - 重写 integration_guide.md:按三类上报(runtime/cicd/deployment)重新组织 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
b178d24e73
commit
c8204b6d47
@ -6,6 +6,9 @@ on:
|
||||
- main
|
||||
- master
|
||||
|
||||
env:
|
||||
LOG_CENTER_URL: https://qiyuan-log-center-api.airlabs.art
|
||||
|
||||
jobs:
|
||||
build-and-deploy:
|
||||
runs-on: ubuntu-latest
|
||||
@ -29,6 +32,7 @@ jobs:
|
||||
|
||||
# Build API Image
|
||||
- name: Build and Push API
|
||||
id: build-api
|
||||
uses: docker/build-push-action@v4
|
||||
with:
|
||||
context: .
|
||||
@ -39,6 +43,7 @@ jobs:
|
||||
|
||||
# Build Web Image
|
||||
- name: Build and Push Web
|
||||
id: build-web
|
||||
uses: docker/build-push-action@v4
|
||||
with:
|
||||
context: ./web
|
||||
@ -51,6 +56,7 @@ jobs:
|
||||
|
||||
# Build K8s Monitor Image
|
||||
- name: Build and Push K8s Monitor
|
||||
id: build-monitor
|
||||
uses: docker/build-push-action@v4
|
||||
with:
|
||||
context: ./k8s-monitor
|
||||
@ -72,9 +78,10 @@ jobs:
|
||||
kubeconfig: ${{ secrets.KUBE_CONFIG }}
|
||||
|
||||
- name: Update K8s Manifests
|
||||
id: deploy
|
||||
run: |
|
||||
echo "Environment: Production"
|
||||
|
||||
|
||||
# Replace image placeholders
|
||||
sed -i "s|\${CI_REGISTRY_IMAGE}/log-center-api:latest|${{ secrets.SWR_SERVER }}/${{ secrets.SWR_ORG }}/log-center-api:latest|g" k8s/api-deployment-prod.yaml
|
||||
sed -i "s|\${CI_REGISTRY_IMAGE}/log-center-web:latest|${{ secrets.SWR_SERVER }}/${{ secrets.SWR_ORG }}/log-center-web:latest|g" k8s/web-deployment-prod.yaml
|
||||
@ -89,3 +96,123 @@ jobs:
|
||||
# Restart deployments
|
||||
kubectl rollout restart deployment/log-center-api
|
||||
kubectl rollout restart deployment/log-center-web
|
||||
|
||||
# ==================== CI/CD 错误上报 ====================
|
||||
|
||||
- name: Report API Build Failure
|
||||
if: failure() && steps.build-api.outcome == 'failure'
|
||||
run: |
|
||||
curl -s -X POST "${LOG_CENTER_URL}/api/v1/logs/report" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"project_id": "log_center_api",
|
||||
"environment": "cicd",
|
||||
"level": "ERROR",
|
||||
"source": "cicd",
|
||||
"commit_hash": "'"$GITHUB_SHA"'",
|
||||
"error": {
|
||||
"type": "DockerBuildError",
|
||||
"message": "Log Center API Docker build failed",
|
||||
"file_path": null,
|
||||
"line_number": null,
|
||||
"stack_trace": ["API Docker build step failed. Check CI logs for details."]
|
||||
},
|
||||
"context": {
|
||||
"workflow_name": "'"$GITHUB_WORKFLOW"'",
|
||||
"job_name": "'"$GITHUB_JOB"'",
|
||||
"step_name": "Build and Push API",
|
||||
"run_id": "'"$GITHUB_RUN_ID"'",
|
||||
"branch": "'"$GITHUB_REF_NAME"'",
|
||||
"repository": "'"$GITHUB_REPOSITORY"'",
|
||||
"run_url": "'"$GITHUB_SERVER_URL"'/'"$GITHUB_REPOSITORY"'/actions/runs/'"$GITHUB_RUN_ID"'"
|
||||
}
|
||||
}' --connect-timeout 5 --max-time 10 || true
|
||||
|
||||
- name: Report Web Build Failure
|
||||
if: failure() && steps.build-web.outcome == 'failure'
|
||||
run: |
|
||||
curl -s -X POST "${LOG_CENTER_URL}/api/v1/logs/report" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"project_id": "log_center_web",
|
||||
"environment": "cicd",
|
||||
"level": "ERROR",
|
||||
"source": "cicd",
|
||||
"commit_hash": "'"$GITHUB_SHA"'",
|
||||
"error": {
|
||||
"type": "DockerBuildError",
|
||||
"message": "Log Center Web Docker build failed",
|
||||
"file_path": null,
|
||||
"line_number": null,
|
||||
"stack_trace": ["Web Docker build step failed. Check CI logs for details."]
|
||||
},
|
||||
"context": {
|
||||
"workflow_name": "'"$GITHUB_WORKFLOW"'",
|
||||
"job_name": "'"$GITHUB_JOB"'",
|
||||
"step_name": "Build and Push Web",
|
||||
"run_id": "'"$GITHUB_RUN_ID"'",
|
||||
"branch": "'"$GITHUB_REF_NAME"'",
|
||||
"repository": "'"$GITHUB_REPOSITORY"'",
|
||||
"run_url": "'"$GITHUB_SERVER_URL"'/'"$GITHUB_REPOSITORY"'/actions/runs/'"$GITHUB_RUN_ID"'"
|
||||
}
|
||||
}' --connect-timeout 5 --max-time 10 || true
|
||||
|
||||
- name: Report Monitor Build Failure
|
||||
if: failure() && steps.build-monitor.outcome == 'failure'
|
||||
run: |
|
||||
curl -s -X POST "${LOG_CENTER_URL}/api/v1/logs/report" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"project_id": "log_center_api",
|
||||
"environment": "cicd",
|
||||
"level": "ERROR",
|
||||
"source": "cicd",
|
||||
"commit_hash": "'"$GITHUB_SHA"'",
|
||||
"error": {
|
||||
"type": "DockerBuildError",
|
||||
"message": "K8s Monitor Docker build failed",
|
||||
"file_path": null,
|
||||
"line_number": null,
|
||||
"stack_trace": ["K8s Monitor Docker build step failed. Check CI logs for details."]
|
||||
},
|
||||
"context": {
|
||||
"workflow_name": "'"$GITHUB_WORKFLOW"'",
|
||||
"job_name": "'"$GITHUB_JOB"'",
|
||||
"step_name": "Build and Push K8s Monitor",
|
||||
"run_id": "'"$GITHUB_RUN_ID"'",
|
||||
"branch": "'"$GITHUB_REF_NAME"'",
|
||||
"repository": "'"$GITHUB_REPOSITORY"'",
|
||||
"run_url": "'"$GITHUB_SERVER_URL"'/'"$GITHUB_REPOSITORY"'/actions/runs/'"$GITHUB_RUN_ID"'"
|
||||
}
|
||||
}' --connect-timeout 5 --max-time 10 || true
|
||||
|
||||
- name: Report Deploy Failure
|
||||
if: failure() && steps.deploy.outcome == 'failure'
|
||||
run: |
|
||||
curl -s -X POST "${LOG_CENTER_URL}/api/v1/logs/report" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"project_id": "log_center_api",
|
||||
"environment": "cicd",
|
||||
"level": "ERROR",
|
||||
"source": "deployment",
|
||||
"commit_hash": "'"$GITHUB_SHA"'",
|
||||
"error": {
|
||||
"type": "DeployError",
|
||||
"message": "Log Center K8s deployment failed",
|
||||
"file_path": null,
|
||||
"line_number": null,
|
||||
"stack_trace": ["K8s deployment step failed. Check CI logs for details."]
|
||||
},
|
||||
"context": {
|
||||
"workflow_name": "'"$GITHUB_WORKFLOW"'",
|
||||
"job_name": "'"$GITHUB_JOB"'",
|
||||
"step_name": "Update K8s Manifests",
|
||||
"run_id": "'"$GITHUB_RUN_ID"'",
|
||||
"branch": "'"$GITHUB_REF_NAME"'",
|
||||
"repository": "'"$GITHUB_REPOSITORY"'",
|
||||
"namespace": "default",
|
||||
"deployment_name": "log-center",
|
||||
"run_url": "'"$GITHUB_SERVER_URL"'/'"$GITHUB_REPOSITORY"'/actions/runs/'"$GITHUB_RUN_ID"'"
|
||||
}
|
||||
}' --connect-timeout 5 --max-time 10 || true
|
||||
|
||||
58
app/main.py
58
app/main.py
@ -1,10 +1,12 @@
|
||||
from fastapi import FastAPI, Depends, HTTPException, Query
|
||||
from fastapi import FastAPI, Depends, HTTPException, Query, Request
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse
|
||||
from sqlmodel.ext.asyncio.session import AsyncSession
|
||||
from sqlmodel import select, func, text
|
||||
from .database import init_db, get_session
|
||||
from .database import init_db, get_session, engine
|
||||
from .models import ErrorLog, ErrorLogCreate, LogStatus, TaskStatusUpdate, RepairTask, RepairTaskCreate, Project, ProjectUpdate
|
||||
from .gitea_client import GiteaClient
|
||||
from .self_report import self_report_error
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Optional, List
|
||||
from pydantic import BaseModel
|
||||
@ -25,6 +27,58 @@ app.add_middleware(
|
||||
@app.on_event("startup")
|
||||
async def on_startup():
|
||||
await init_db()
|
||||
await _register_self_projects()
|
||||
|
||||
|
||||
async def _register_self_projects():
|
||||
"""启动时注册日志中台自身的项目信息。"""
|
||||
from sqlalchemy.orm import sessionmaker as sa_sessionmaker
|
||||
projects = [
|
||||
{
|
||||
"project_id": "log_center_api",
|
||||
"name": "Log Center API",
|
||||
"repo_url": "https://gitea.airlabs.art/zyc/qy_gitlab.git",
|
||||
"local_path": "/Users/maidong/Desktop/zyc/qy_gitlab/log_center",
|
||||
"description": "日志中台 FastAPI 后端服务",
|
||||
},
|
||||
{
|
||||
"project_id": "log_center_web",
|
||||
"name": "Log Center Web",
|
||||
"repo_url": "https://gitea.airlabs.art/zyc/qy_gitlab.git",
|
||||
"local_path": "/Users/maidong/Desktop/zyc/qy_gitlab/log_center/web",
|
||||
"description": "日志中台 React 管理端",
|
||||
},
|
||||
]
|
||||
async_session = sa_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
|
||||
async with async_session() as session:
|
||||
for proj_data in projects:
|
||||
stmt = select(Project).where(Project.project_id == proj_data["project_id"])
|
||||
result = await session.exec(stmt)
|
||||
existing = result.first()
|
||||
if not existing:
|
||||
session.add(Project(**proj_data))
|
||||
else:
|
||||
# 更新元信息(仓库地址、路径等可能变更)
|
||||
for key, value in proj_data.items():
|
||||
if key != "project_id":
|
||||
setattr(existing, key, value)
|
||||
existing.updated_at = datetime.utcnow()
|
||||
session.add(existing)
|
||||
await session.commit()
|
||||
|
||||
|
||||
@app.exception_handler(Exception)
|
||||
async def global_exception_handler(request: Request, exc: Exception):
|
||||
"""捕获所有未处理异常,上报到自身数据库后返回 500。"""
|
||||
await self_report_error(exc, context={
|
||||
"url": str(request.url),
|
||||
"method": request.method,
|
||||
})
|
||||
return JSONResponse(
|
||||
status_code=500,
|
||||
content={"detail": "Internal Server Error"},
|
||||
)
|
||||
|
||||
|
||||
def generate_fingerprint(log: ErrorLogCreate) -> str:
|
||||
source = log.source
|
||||
|
||||
80
app/self_report.py
Normal file
80
app/self_report.py
Normal file
@ -0,0 +1,80 @@
|
||||
"""Log Center 自身错误上报:将 API 运行时异常写入自己的数据库。"""
|
||||
import os
|
||||
import sys
|
||||
import traceback
|
||||
import hashlib
|
||||
from datetime import datetime
|
||||
|
||||
from sqlmodel import select
|
||||
from sqlmodel.ext.asyncio.session import AsyncSession
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
from .database import engine
|
||||
from .models import ErrorLog, LogStatus, Project
|
||||
|
||||
PROJECT_ID = "log_center_api"
|
||||
ENVIRONMENT = os.getenv("ENVIRONMENT", "production")
|
||||
|
||||
|
||||
async def self_report_error(exc: Exception, context: dict = None):
|
||||
"""将 Log Center API 自身的异常写入数据库。
|
||||
|
||||
直接操作数据库而非走 HTTP,避免循环依赖和额外开销。
|
||||
任何内部错误都静默处理,绝不影响主业务。
|
||||
"""
|
||||
try:
|
||||
tb = traceback.extract_tb(exc.__traceback__)
|
||||
last_frame = tb[-1] if tb else None
|
||||
|
||||
error_type = type(exc).__name__
|
||||
file_path = last_frame.filename if last_frame else "unknown"
|
||||
line_number = last_frame.lineno if last_frame else 0
|
||||
stack_trace = traceback.format_exception(exc)
|
||||
|
||||
# 生成指纹(与 main.py 中 runtime 类型的逻辑一致)
|
||||
raw = f"{PROJECT_ID}|{error_type}|{file_path}|{line_number}"
|
||||
fingerprint = hashlib.md5(raw.encode()).hexdigest()
|
||||
|
||||
async_session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
|
||||
async with async_session() as session:
|
||||
# 去重检查
|
||||
stmt = select(ErrorLog).where(ErrorLog.fingerprint == fingerprint)
|
||||
result = await session.exec(stmt)
|
||||
existing = result.first()
|
||||
|
||||
if existing:
|
||||
if existing.status not in [LogStatus.DEPLOYED, LogStatus.FIXED, LogStatus.VERIFIED]:
|
||||
return # 已在追踪中,跳过
|
||||
# 回归:已修复的错误再次出现
|
||||
existing.status = LogStatus.NEW
|
||||
existing.timestamp = datetime.utcnow()
|
||||
existing.retry_count = 0
|
||||
session.add(existing)
|
||||
await session.commit()
|
||||
return
|
||||
|
||||
# 确保 project 记录存在
|
||||
proj_stmt = select(Project).where(Project.project_id == PROJECT_ID)
|
||||
proj_result = await session.exec(proj_stmt)
|
||||
if not proj_result.first():
|
||||
session.add(Project(project_id=PROJECT_ID, name="Log Center API"))
|
||||
|
||||
new_log = ErrorLog(
|
||||
project_id=PROJECT_ID,
|
||||
environment=ENVIRONMENT,
|
||||
level="ERROR",
|
||||
source="runtime",
|
||||
error_type=error_type,
|
||||
error_message=str(exc),
|
||||
file_path=file_path,
|
||||
line_number=line_number,
|
||||
stack_trace=stack_trace,
|
||||
context=context or {},
|
||||
fingerprint=fingerprint,
|
||||
timestamp=datetime.utcnow(),
|
||||
)
|
||||
session.add(new_log)
|
||||
await session.commit()
|
||||
except Exception:
|
||||
# 自身上报绝不能导致服务崩溃
|
||||
traceback.print_exc(file=sys.stderr)
|
||||
@ -2,19 +2,28 @@
|
||||
|
||||
## 概述
|
||||
|
||||
Log Center 是一个集中式错误日志收集与 AI 自动修复平台,提供 REST API 供各项目接入,实现运行时错误的统一收集、去重、追踪、分析和自动修复。
|
||||
Log Center 是一个集中式错误日志收集与 AI 自动修复平台,提供 REST API 供各项目接入。
|
||||
|
||||
接入流程:
|
||||
接入后覆盖三类错误上报:
|
||||
|
||||
1. 项目首次上报日志时自动注册到 Log Center
|
||||
2. 在 Web 管理端配置项目的**仓库地址**和**本地路径**
|
||||
3. Repair Agent 根据配置自动拉取代码并修复 Bug
|
||||
| 类型 | `source` 值 | 说明 | 触发方式 |
|
||||
|------|-------------|------|----------|
|
||||
| 日常运行错误 | `runtime` | 应用运行时的异常(Python/JS/Dart) | 代码中全局捕获异常自动上报 |
|
||||
| CI/CD 错误 | `cicd` | 构建、测试、Lint 等流水线失败 | Gitea Actions 步骤失败时上报 |
|
||||
| K8s 部署错误 | `deployment` | Pod 异常状态(CrashLoopBackOff、OOMKilled 等) | K8s CronJob 定时扫描上报 |
|
||||
|
||||
**完整接入流程:**
|
||||
|
||||
1. **注册项目信息** — 调用 API 提交项目元信息(名称、仓库地址、本地路径)
|
||||
2. **接入日常运行错误上报** — 在应用代码中集成全局异常捕获
|
||||
3. **接入 CI/CD 错误上报** — 在 Gitea Actions 流水线中添加失败上报步骤
|
||||
4. **接入 K8s 部署错误上报** — 在 K8s Pod 健康监控中添加项目映射
|
||||
|
||||
> **重要**: 必须先完成步骤 1,否则 Repair Agent 无法定位代码仓库和本地路径。
|
||||
|
||||
---
|
||||
|
||||
## 快速开始
|
||||
|
||||
### 服务地址
|
||||
## 服务地址
|
||||
|
||||
| 环境 | API 地址 | 仪表盘 |
|
||||
|------|----------|--------|
|
||||
@ -23,23 +32,141 @@ Log Center 是一个集中式错误日志收集与 AI 自动修复平台,提
|
||||
|
||||
---
|
||||
|
||||
## API 接口
|
||||
## 步骤 1:注册项目信息
|
||||
|
||||
### 上报错误日志
|
||||
首次接入 Log Center 时,**必须先注册项目信息**。这是 Repair Agent 正常工作的前提。
|
||||
|
||||
**POST** `/api/v1/logs/report`
|
||||
### 注册方式
|
||||
|
||||
#### 请求体 (JSON)
|
||||
先上报一条初始化日志(触发项目自动创建),再调用 PUT 接口补全元信息:
|
||||
|
||||
```bash
|
||||
# 1. 上报初始化日志,触发项目自动创建
|
||||
curl -X POST "${LOG_CENTER_URL}/api/v1/logs/report" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"project_id": "your_project_id",
|
||||
"environment": "production",
|
||||
"level": "WARNING",
|
||||
"error": {
|
||||
"type": "ProjectInit",
|
||||
"message": "Project registered to Log Center",
|
||||
"stack_trace": ["Project initialization"]
|
||||
},
|
||||
"repo_url": "https://gitea.airlabs.art/team/your_project.git"
|
||||
}'
|
||||
|
||||
# 2. 补全项目元信息
|
||||
curl -X PUT "${LOG_CENTER_URL}/api/v1/projects/your_project_id" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"name": "项目显示名称",
|
||||
"repo_url": "https://gitea.airlabs.art/team/your_project.git",
|
||||
"local_path": "/absolute/path/to/project",
|
||||
"description": "项目描述"
|
||||
}'
|
||||
```
|
||||
|
||||
### 各语言注册示例
|
||||
|
||||
#### Python
|
||||
|
||||
```python
|
||||
import requests
|
||||
import os
|
||||
|
||||
LOG_CENTER_URL = os.getenv("LOG_CENTER_URL", "http://localhost:8002")
|
||||
|
||||
def register_project():
|
||||
"""首次接入时调用,注册项目到 Log Center。"""
|
||||
project_id = "your_project_id"
|
||||
|
||||
# 1. 上报初始化日志触发项目创建
|
||||
requests.post(f"{LOG_CENTER_URL}/api/v1/logs/report", json={
|
||||
"project_id": project_id,
|
||||
"environment": os.getenv("ENVIRONMENT", "production"),
|
||||
"level": "WARNING",
|
||||
"error": {
|
||||
"type": "ProjectInit",
|
||||
"message": "Project registered to Log Center",
|
||||
"stack_trace": ["Project initialization"],
|
||||
},
|
||||
"repo_url": "https://gitea.airlabs.art/team/your_project.git",
|
||||
}, timeout=5)
|
||||
|
||||
# 2. 补全项目元信息
|
||||
requests.put(f"{LOG_CENTER_URL}/api/v1/projects/{project_id}", json={
|
||||
"name": "项目显示名称",
|
||||
"repo_url": "https://gitea.airlabs.art/team/your_project.git",
|
||||
"local_path": "/absolute/path/to/project",
|
||||
"description": "项目描述",
|
||||
}, timeout=5)
|
||||
```
|
||||
|
||||
#### JavaScript / TypeScript
|
||||
|
||||
```typescript
|
||||
const LOG_CENTER_URL = import.meta.env.VITE_LOG_CENTER_URL || 'http://localhost:8002';
|
||||
|
||||
async function registerProject() {
|
||||
const projectId = 'your_project_id';
|
||||
|
||||
// 1. 上报初始化日志触发项目创建
|
||||
await fetch(`${LOG_CENTER_URL}/api/v1/logs/report`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
project_id: projectId,
|
||||
environment: import.meta.env.MODE,
|
||||
level: 'WARNING',
|
||||
error: {
|
||||
type: 'ProjectInit',
|
||||
message: 'Project registered to Log Center',
|
||||
stack_trace: ['Project initialization'],
|
||||
},
|
||||
repo_url: 'https://gitea.airlabs.art/team/your_project.git',
|
||||
}),
|
||||
});
|
||||
|
||||
// 2. 补全项目元信息
|
||||
await fetch(`${LOG_CENTER_URL}/api/v1/projects/${projectId}`, {
|
||||
method: 'PUT',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
name: '项目显示名称',
|
||||
repo_url: 'https://gitea.airlabs.art/team/your_project.git',
|
||||
local_path: '/absolute/path/to/project',
|
||||
description: '项目描述',
|
||||
}),
|
||||
});
|
||||
}
|
||||
```
|
||||
|
||||
### 项目元信息字段
|
||||
|
||||
| 字段 | 类型 | 必填 | 说明 |
|
||||
|------|------|------|------|
|
||||
| `project_id` | string | ✅ | 项目唯一标识,如 `rtc_backend`, `rtc_web` |
|
||||
| `name` | string | ✅ | 项目显示名称 |
|
||||
| `repo_url` | string | ✅ | Git 仓库地址(Repair Agent 克隆/推送代码用) |
|
||||
| `local_path` | string | ✅ | 本地项目绝对路径(Repair Agent 在此目录执行修复) |
|
||||
| `description` | string | ❌ | 项目描述 |
|
||||
|
||||
---
|
||||
|
||||
## 步骤 2:接入日常运行错误上报
|
||||
|
||||
> `source: "runtime"`(默认值,可不传)
|
||||
|
||||
在应用代码中集成全局异常捕获,运行时发生未处理异常时自动上报到 Log Center。
|
||||
|
||||
### 上报格式
|
||||
|
||||
```json
|
||||
{
|
||||
"project_id": "rtc_backend",
|
||||
"environment": "production",
|
||||
"level": "ERROR",
|
||||
"timestamp": "2026-01-30T10:30:00Z",
|
||||
"version": "1.2.3",
|
||||
"commit_hash": "abc1234",
|
||||
"repo_url": "https://gitea.example.com/team/rtc_backend.git",
|
||||
"error": {
|
||||
"type": "ValueError",
|
||||
"message": "invalid literal for int() with base 10: 'abc'",
|
||||
@ -48,117 +175,34 @@ Log Center 是一个集中式错误日志收集与 AI 自动修复平台,提
|
||||
"stack_trace": [
|
||||
"Traceback (most recent call last):",
|
||||
" File \"apps/users/views.py\", line 42, in get_user",
|
||||
" user_id = int(request.GET['id'])",
|
||||
"ValueError: invalid literal for int() with base 10: 'abc'"
|
||||
]
|
||||
},
|
||||
"context": {
|
||||
"url": "/api/users/123",
|
||||
"method": "GET",
|
||||
"user_id": "u_12345",
|
||||
"request_id": "req_abc123"
|
||||
"user_id": "u_12345"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### 字段说明
|
||||
### Runtime 字段说明
|
||||
|
||||
| 字段 | 类型 | 必填 | 说明 |
|
||||
|------|------|------|------|
|
||||
| `project_id` | string | ✅ | 项目标识,如 `rtc_backend`, `rtc_web`, `airhub_app` |
|
||||
| `project_id` | string | ✅ | 项目标识 |
|
||||
| `environment` | string | ✅ | 环境:`development`, `staging`, `production` |
|
||||
| `level` | string | ✅ | 日志级别:`ERROR`, `WARNING`, `CRITICAL` |
|
||||
| `source` | string | ❌ | 来源:`runtime`(默认), `cicd`, `deployment` |
|
||||
| `timestamp` | string | ❌ | ISO 8601 格式,不传则使用服务器时间 |
|
||||
| `source` | string | ❌ | 默认 `runtime`,无需传 |
|
||||
| `timestamp` | string | ❌ | ISO 8601 格式,不传则用服务器时间 |
|
||||
| `version` | string | ❌ | 应用版本号 |
|
||||
| `commit_hash` | string | ❌ | Git commit hash |
|
||||
| `repo_url` | string | ❌ | 项目仓库地址,首次上报时传入可自动关联到项目 |
|
||||
| `error.type` | string | ✅ | 异常类型,如 `ValueError`, `TypeError` |
|
||||
| `error.message` | string | ✅ | 错误消息 |
|
||||
| `error.file_path` | string | ❌ | 出错文件路径(runtime 必填,cicd/deployment 可选) |
|
||||
| `error.line_number` | int | ❌ | 出错行号(runtime 必填,cicd/deployment 可选) |
|
||||
| `error.file_path` | string | ✅ | 出错文件路径 |
|
||||
| `error.line_number` | int | ✅ | 出错行号 |
|
||||
| `error.stack_trace` | array | ✅ | 堆栈信息(数组或字符串) |
|
||||
| `context` | object | ❌ | 额外上下文信息(URL、用户ID等) |
|
||||
|
||||
> **项目自动注册**: 首次上报日志时,系统会根据 `project_id` 自动创建项目记录。如果同时传入 `repo_url`,会自动关联仓库地址,供 Repair Agent 使用。
|
||||
|
||||
#### 响应
|
||||
|
||||
**成功 (200)**
|
||||
```json
|
||||
{
|
||||
"message": "Log reported",
|
||||
"id": 123
|
||||
}
|
||||
```
|
||||
|
||||
**已存在 (200)** - 重复错误自动去重
|
||||
```json
|
||||
{
|
||||
"message": "Log deduplicated",
|
||||
"id": 123,
|
||||
"status": "NEW"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 项目管理 API
|
||||
|
||||
项目在首次日志上报时自动创建,之后可通过 API 或 Web 管理端编辑配置。
|
||||
|
||||
#### 获取项目列表
|
||||
|
||||
**GET** `/api/v1/projects`
|
||||
|
||||
```json
|
||||
{
|
||||
"projects": [
|
||||
{
|
||||
"id": 1,
|
||||
"project_id": "rtc_backend",
|
||||
"name": "RTC 后端",
|
||||
"repo_url": "https://gitea.example.com/team/rtc_backend.git",
|
||||
"local_path": "/home/dev/projects/rtc_backend",
|
||||
"description": "Django 后端服务",
|
||||
"created_at": "2026-01-15T08:00:00",
|
||||
"updated_at": "2026-02-20T10:30:00"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
#### 获取项目详情
|
||||
|
||||
**GET** `/api/v1/projects/{project_id}`
|
||||
|
||||
返回单个项目的完整信息。
|
||||
|
||||
#### 编辑项目配置
|
||||
|
||||
**PUT** `/api/v1/projects/{project_id}`
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "RTC 后端",
|
||||
"repo_url": "https://gitea.example.com/team/rtc_backend.git",
|
||||
"local_path": "/home/dev/projects/rtc_backend",
|
||||
"description": "Django 后端服务"
|
||||
}
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|------|------|------|
|
||||
| `name` | string | 项目显示名称 |
|
||||
| `repo_url` | string | Git 仓库地址(Repair Agent 克隆/推送代码用) |
|
||||
| `local_path` | string | 本地项目路径(Repair Agent 在此目录执行修复) |
|
||||
| `description` | string | 项目描述 |
|
||||
|
||||
> **注意**: `repo_url` 和 `local_path` 是 Repair Agent 正常工作的关键配置。未配置时 Agent 将无法执行 Git 操作或定位项目代码。可在 Web 管理端的「项目管理」页面中配置。
|
||||
|
||||
---
|
||||
|
||||
## 接入示例
|
||||
| `context` | object | ❌ | 额外上下文信息 |
|
||||
|
||||
### Python (Django / FastAPI)
|
||||
|
||||
@ -170,7 +214,7 @@ import os
|
||||
LOG_CENTER_URL = os.getenv("LOG_CENTER_URL", "http://localhost:8002")
|
||||
|
||||
def report_error(exc, context=None):
|
||||
"""上报错误到 Log Center"""
|
||||
"""上报运行时错误到 Log Center"""
|
||||
tb = traceback.extract_tb(exc.__traceback__)
|
||||
last_frame = tb[-1] if tb else None
|
||||
|
||||
@ -178,7 +222,6 @@ def report_error(exc, context=None):
|
||||
"project_id": "rtc_backend",
|
||||
"environment": os.getenv("ENVIRONMENT", "development"),
|
||||
"level": "ERROR",
|
||||
"repo_url": os.getenv("REPO_URL", ""), # 可选:关联仓库地址
|
||||
"error": {
|
||||
"type": type(exc).__name__,
|
||||
"message": str(exc),
|
||||
@ -193,55 +236,49 @@ def report_error(exc, context=None):
|
||||
requests.post(
|
||||
f"{LOG_CENTER_URL}/api/v1/logs/report",
|
||||
json=payload,
|
||||
timeout=3 # 快速失败,不影响主业务
|
||||
timeout=3
|
||||
)
|
||||
except Exception:
|
||||
pass # 静默失败,不影响主业务
|
||||
```
|
||||
|
||||
#### Django 集成位置
|
||||
|
||||
修改 `utils/exceptions.py` 的 `custom_exception_handler`:
|
||||
**Django 集成位置** — 修改 `utils/exceptions.py` 的 `custom_exception_handler`:
|
||||
|
||||
```python
|
||||
def custom_exception_handler(exc, context):
|
||||
# 上报到 Log Center (异步,不阻塞响应)
|
||||
# 上报到 Log Center
|
||||
report_error(exc, {
|
||||
"view": str(context.get("view")),
|
||||
"request_path": context.get("request").path if context.get("request") else None,
|
||||
})
|
||||
|
||||
# ... 原有逻辑不变 ...
|
||||
```
|
||||
|
||||
---
|
||||
**FastAPI 集成位置** — 添加全局异常处理器:
|
||||
|
||||
```python
|
||||
from fastapi import Request
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
@app.exception_handler(Exception)
|
||||
async def global_exception_handler(request: Request, exc: Exception):
|
||||
await report_error(exc, context={
|
||||
"url": str(request.url),
|
||||
"method": request.method,
|
||||
})
|
||||
return JSONResponse(status_code=500, content={"detail": "Internal Server Error"})
|
||||
```
|
||||
|
||||
### JavaScript / TypeScript (React / Vue)
|
||||
|
||||
```typescript
|
||||
const LOG_CENTER_URL = import.meta.env.VITE_LOG_CENTER_URL || 'http://localhost:8002';
|
||||
|
||||
interface ErrorPayload {
|
||||
project_id: string;
|
||||
environment: string;
|
||||
level: string;
|
||||
repo_url?: string;
|
||||
error: {
|
||||
type: string;
|
||||
message: string;
|
||||
file_path: string;
|
||||
line_number: number;
|
||||
stack_trace: string[];
|
||||
};
|
||||
context?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export function reportError(error: Error, context?: Record<string, unknown>) {
|
||||
// 解析堆栈信息
|
||||
const stackLines = error.stack?.split('\n') || [];
|
||||
const match = stackLines[1]?.match(/at\s+.*\s+\((.+):(\d+):\d+\)/);
|
||||
|
||||
const payload: ErrorPayload = {
|
||||
const payload = {
|
||||
project_id: 'rtc_web',
|
||||
environment: import.meta.env.MODE,
|
||||
level: 'ERROR',
|
||||
@ -259,12 +296,9 @@ export function reportError(error: Error, context?: Record<string, unknown>) {
|
||||
},
|
||||
};
|
||||
|
||||
// 使用 sendBeacon 确保页面关闭时也能发送
|
||||
const blob = new Blob([JSON.stringify(payload)], { type: 'application/json' });
|
||||
if (navigator.sendBeacon) {
|
||||
navigator.sendBeacon(
|
||||
`${LOG_CENTER_URL}/api/v1/logs/report`,
|
||||
JSON.stringify(payload)
|
||||
);
|
||||
navigator.sendBeacon(`${LOG_CENTER_URL}/api/v1/logs/report`, blob);
|
||||
} else {
|
||||
fetch(`${LOG_CENTER_URL}/api/v1/logs/report`, {
|
||||
method: 'POST',
|
||||
@ -276,27 +310,40 @@ export function reportError(error: Error, context?: Record<string, unknown>) {
|
||||
}
|
||||
```
|
||||
|
||||
#### Axios 拦截器集成
|
||||
|
||||
修改 `src/api/request.ts`:
|
||||
**全局错误捕获** — 在 `main.tsx` / `main.ts` 入口文件中:
|
||||
|
||||
```typescript
|
||||
request.interceptors.response.use(
|
||||
(response) => { /* ... */ },
|
||||
(error: AxiosError) => {
|
||||
// 上报到 Log Center
|
||||
reportError(error, {
|
||||
url: error.config?.url,
|
||||
method: error.config?.method,
|
||||
status: error.response?.status,
|
||||
});
|
||||
// JS 运行时异常
|
||||
window.onerror = (_message, source, lineno, colno, error) => {
|
||||
if (error) reportError(error, { source, lineno, colno });
|
||||
};
|
||||
|
||||
// ... 原有逻辑不变 ...
|
||||
}
|
||||
);
|
||||
// 未处理的 Promise rejection
|
||||
window.onunhandledrejection = (event: PromiseRejectionEvent) => {
|
||||
const error = event.reason instanceof Error
|
||||
? event.reason
|
||||
: new Error(String(event.reason));
|
||||
reportError(error, { type: 'unhandledrejection' });
|
||||
};
|
||||
```
|
||||
|
||||
---
|
||||
**Axios 拦截器** — 在 `api.ts` / `request.ts` 中(仅上报 5xx 服务端错误):
|
||||
|
||||
```typescript
|
||||
api.interceptors.response.use(
|
||||
(response) => response,
|
||||
(error: AxiosError) => {
|
||||
if (error.response && error.response.status >= 500) {
|
||||
reportError(error, {
|
||||
api_url: error.config?.url,
|
||||
method: error.config?.method,
|
||||
status: error.response.status,
|
||||
});
|
||||
}
|
||||
return Promise.reject(error);
|
||||
},
|
||||
);
|
||||
```
|
||||
|
||||
### Flutter (Dart)
|
||||
|
||||
@ -311,14 +358,12 @@ const logCenterUrl = String.fromEnvironment(
|
||||
|
||||
Future<void> reportError(dynamic error, StackTrace stackTrace, {Map<String, dynamic>? context}) async {
|
||||
final stackLines = stackTrace.toString().split('\n');
|
||||
// 解析第一行获取文件和行号
|
||||
final match = RegExp(r'#0\s+.*\((.+):(\d+):\d+\)').firstMatch(stackLines.first);
|
||||
|
||||
final payload = {
|
||||
'project_id': 'airhub_app',
|
||||
'environment': const String.fromEnvironment('ENVIRONMENT', defaultValue: 'development'),
|
||||
'level': 'ERROR',
|
||||
'repo_url': 'https://gitea.example.com/team/airhub_app.git',
|
||||
'error': {
|
||||
'type': error.runtimeType.toString(),
|
||||
'message': error.toString(),
|
||||
@ -341,7 +386,7 @@ Future<void> reportError(dynamic error, StackTrace stackTrace, {Map<String, dyna
|
||||
}
|
||||
```
|
||||
|
||||
在 `main.dart` 中全局捕获:
|
||||
**全局捕获** — 在 `main.dart` 中:
|
||||
|
||||
```dart
|
||||
void main() {
|
||||
@ -359,9 +404,284 @@ void main() {
|
||||
|
||||
---
|
||||
|
||||
## 步骤 3:接入 CI/CD 错误上报
|
||||
|
||||
> `source: "cicd"`
|
||||
|
||||
在 Gitea Actions 流水线中,为每个关键步骤添加失败上报,构建/测试/Lint 失败时自动上报到 Log Center。
|
||||
|
||||
### 上报格式
|
||||
|
||||
```json
|
||||
{
|
||||
"project_id": "rtc_backend",
|
||||
"environment": "cicd",
|
||||
"level": "ERROR",
|
||||
"source": "cicd",
|
||||
"commit_hash": "abc1234",
|
||||
"error": {
|
||||
"type": "DockerBuildError",
|
||||
"message": "Docker build failed",
|
||||
"file_path": null,
|
||||
"line_number": null,
|
||||
"stack_trace": ["Build step failed. Check CI logs for details."]
|
||||
},
|
||||
"context": {
|
||||
"workflow_name": "Build and Deploy",
|
||||
"job_name": "build",
|
||||
"step_name": "Build Docker Image",
|
||||
"run_id": "123",
|
||||
"branch": "main",
|
||||
"repository": "team/rtc_backend",
|
||||
"run_url": "https://gitea.airlabs.art/team/rtc_backend/actions/runs/123"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### CI/CD 特有字段
|
||||
|
||||
| 字段 | 说明 |
|
||||
|------|------|
|
||||
| `source` | **必须**设为 `"cicd"` |
|
||||
| `environment` | 设为 `"cicd"` |
|
||||
| `error.type` | 推荐值:`DockerBuildError`, `NpmBuildError`, `TestFailure`, `LintError`, `CIBuildError` |
|
||||
| `error.file_path` | 可为 `null` |
|
||||
| `error.line_number` | 可为 `null` |
|
||||
| `context.workflow_name` | 工作流名称 |
|
||||
| `context.job_name` | Job 名称 |
|
||||
| `context.step_name` | 失败的步骤名称 |
|
||||
| `context.run_id` | 运行 ID |
|
||||
| `context.run_url` | CI 运行详情链接 |
|
||||
| `context.branch` | 分支名 |
|
||||
|
||||
### Gitea Actions 集成方式
|
||||
|
||||
为每个关键步骤添加 `id`,然后在末尾添加条件上报步骤:
|
||||
|
||||
```yaml
|
||||
name: Build and Deploy
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
|
||||
env:
|
||||
LOG_CENTER_URL: https://qiyuan-log-center-api.airlabs.art
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
# 关键步骤:添加 id
|
||||
- name: Build Docker Image
|
||||
id: build
|
||||
run: docker build -t myapp:latest .
|
||||
|
||||
- name: Run Tests
|
||||
id: test
|
||||
run: docker run myapp:latest python -m pytest
|
||||
|
||||
- name: Deploy
|
||||
id: deploy
|
||||
run: kubectl apply -f k8s/
|
||||
|
||||
# ===== 失败上报步骤(放在所有关键步骤之后) =====
|
||||
|
||||
- name: Report Build Failure
|
||||
if: failure() && steps.build.outcome == 'failure'
|
||||
run: |
|
||||
curl -s -X POST "${LOG_CENTER_URL}/api/v1/logs/report" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"project_id": "'"${GITHUB_REPOSITORY##*/}"'",
|
||||
"environment": "cicd",
|
||||
"level": "ERROR",
|
||||
"source": "cicd",
|
||||
"commit_hash": "'"$GITHUB_SHA"'",
|
||||
"error": {
|
||||
"type": "DockerBuildError",
|
||||
"message": "Docker build failed",
|
||||
"file_path": null,
|
||||
"line_number": null,
|
||||
"stack_trace": ["Docker build step failed. Check CI logs."]
|
||||
},
|
||||
"context": {
|
||||
"workflow_name": "'"$GITHUB_WORKFLOW"'",
|
||||
"job_name": "'"$GITHUB_JOB"'",
|
||||
"step_name": "Build Docker Image",
|
||||
"run_id": "'"$GITHUB_RUN_ID"'",
|
||||
"branch": "'"$GITHUB_REF_NAME"'",
|
||||
"repository": "'"$GITHUB_REPOSITORY"'",
|
||||
"run_url": "'"$GITHUB_SERVER_URL"'/'"$GITHUB_REPOSITORY"'/actions/runs/'"$GITHUB_RUN_ID"'"
|
||||
}
|
||||
}' --connect-timeout 5 --max-time 10 || true
|
||||
|
||||
- name: Report Test Failure
|
||||
if: failure() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
curl -s -X POST "${LOG_CENTER_URL}/api/v1/logs/report" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"project_id": "'"${GITHUB_REPOSITORY##*/}"'",
|
||||
"environment": "cicd",
|
||||
"level": "ERROR",
|
||||
"source": "cicd",
|
||||
"commit_hash": "'"$GITHUB_SHA"'",
|
||||
"error": {
|
||||
"type": "TestFailure",
|
||||
"message": "Tests failed in CI pipeline",
|
||||
"file_path": null,
|
||||
"line_number": null,
|
||||
"stack_trace": ["Test step failed. Check CI logs."]
|
||||
},
|
||||
"context": {
|
||||
"workflow_name": "'"$GITHUB_WORKFLOW"'",
|
||||
"job_name": "'"$GITHUB_JOB"'",
|
||||
"step_name": "Run Tests",
|
||||
"run_id": "'"$GITHUB_RUN_ID"'",
|
||||
"branch": "'"$GITHUB_REF_NAME"'",
|
||||
"repository": "'"$GITHUB_REPOSITORY"'",
|
||||
"run_url": "'"$GITHUB_SERVER_URL"'/'"$GITHUB_REPOSITORY"'/actions/runs/'"$GITHUB_RUN_ID"'"
|
||||
}
|
||||
}' --connect-timeout 5 --max-time 10 || true
|
||||
```
|
||||
|
||||
### 使用 report-cicd-error.sh 脚本
|
||||
|
||||
项目提供了通用上报脚本 `scripts/report-cicd-error.sh`(需要 `jq`),可在 CI 步骤中使用:
|
||||
|
||||
```bash
|
||||
# 用法: ./scripts/report-cicd-error.sh <project_id> <step_name> <error_message_or_file>
|
||||
./scripts/report-cicd-error.sh rtc_backend "Build Docker Image" "Docker build failed: exit code 1"
|
||||
./scripts/report-cicd-error.sh rtc_backend "Run Tests" /tmp/test-output.log
|
||||
```
|
||||
|
||||
脚本会自动:
|
||||
- 根据步骤名推断 `error_type`(DockerBuildError / NpmBuildError / TestFailure / LintError)
|
||||
- 读取 Gitea Actions 环境变量填充 context
|
||||
- 如果传入文件路径,读取最后 100 行作为 stack_trace
|
||||
|
||||
---
|
||||
|
||||
## 步骤 4:接入 K8s 部署错误上报
|
||||
|
||||
> `source: "deployment"`
|
||||
|
||||
通过 K8s Pod 健康监控 CronJob,定时扫描集群中异常 Pod 并上报到 Log Center。
|
||||
|
||||
### 上报格式
|
||||
|
||||
```json
|
||||
{
|
||||
"project_id": "rtc_backend",
|
||||
"environment": "production",
|
||||
"level": "CRITICAL",
|
||||
"source": "deployment",
|
||||
"error": {
|
||||
"type": "CrashLoopBackOff",
|
||||
"message": "CrashLoopBackOff: back-off restarting failed container (pod: rtc-backend-xxx, container: api)",
|
||||
"file_path": null,
|
||||
"line_number": null,
|
||||
"stack_trace": ["...容器崩溃前的日志(最后 50 行)..."]
|
||||
},
|
||||
"context": {
|
||||
"namespace": "default",
|
||||
"pod_name": "rtc-backend-xxx-yyy",
|
||||
"container_name": "api",
|
||||
"deployment_name": "rtc-backend",
|
||||
"restart_count": 5,
|
||||
"node_name": "node-1"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Deployment 特有字段
|
||||
|
||||
| 字段 | 说明 |
|
||||
|------|------|
|
||||
| `source` | **必须**设为 `"deployment"` |
|
||||
| `level` | 建议 `"CRITICAL"`,Pod 异常通常较严重 |
|
||||
| `error.type` | 取自 K8s 状态:`CrashLoopBackOff`, `OOMKilled`, `ImagePullBackOff`, `ErrImagePull` 等 |
|
||||
| `error.file_path` | 可为 `null` |
|
||||
| `error.line_number` | 可为 `null` |
|
||||
| `error.stack_trace` | 容器崩溃前的日志输出 |
|
||||
| `context.namespace` | K8s 命名空间 |
|
||||
| `context.pod_name` | Pod 名称 |
|
||||
| `context.deployment_name` | Deployment 名称(用于指纹去重) |
|
||||
| `context.restart_count` | 重启次数 |
|
||||
| `context.node_name` | 节点名 |
|
||||
|
||||
### 监控的异常状态
|
||||
|
||||
| 状态 | 说明 |
|
||||
|------|------|
|
||||
| `CrashLoopBackOff` | 容器反复崩溃重启 |
|
||||
| `OOMKilled` | 内存溢出被杀 |
|
||||
| `ImagePullBackOff` / `ErrImagePull` | 拉取镜像失败 |
|
||||
| `CreateContainerConfigError` | 容器配置错误 |
|
||||
| `RunContainerError` | 容器启动失败 |
|
||||
|
||||
### 接入方式:添加 Pod label 映射
|
||||
|
||||
K8s Monitor CronJob 已在集群中运行,每 5 分钟扫描一次。新项目接入只需在 `k8s-monitor/monitor.py` 的 `APP_TO_PROJECT` 字典中添加映射:
|
||||
|
||||
```python
|
||||
# k8s-monitor/monitor.py
|
||||
APP_TO_PROJECT = {
|
||||
"rtc-backend": "rtc_backend", # Pod 的 app label -> project_id
|
||||
"rtc-backend-dev": "rtc_backend",
|
||||
"rtc-web": "rtc_web",
|
||||
"rtc-web-dev": "rtc_web",
|
||||
"log-center-api": "log_center_api",
|
||||
"log-center-web": "log_center_web",
|
||||
# 新项目在此添加映射
|
||||
"your-app": "your_project_id",
|
||||
}
|
||||
```
|
||||
|
||||
确保你的 K8s Deployment 有 `app` label:
|
||||
|
||||
```yaml
|
||||
metadata:
|
||||
labels:
|
||||
app: your-app # 与 APP_TO_PROJECT 中的 key 一致
|
||||
```
|
||||
|
||||
### CronJob 部署配置
|
||||
|
||||
如果集群中尚未部署 Monitor,使用以下配置:
|
||||
|
||||
```yaml
|
||||
# k8s/monitor-cronjob.yaml
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: pod-health-monitor
|
||||
spec:
|
||||
schedule: "*/5 * * * *"
|
||||
jobTemplate:
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
serviceAccountName: pod-monitor
|
||||
containers:
|
||||
- name: monitor
|
||||
image: your-registry/k8s-pod-monitor:latest
|
||||
env:
|
||||
- name: LOG_CENTER_URL
|
||||
value: "https://qiyuan-log-center-api.airlabs.art"
|
||||
- name: MONITOR_NAMESPACE
|
||||
value: "default"
|
||||
restartPolicy: OnFailure
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 错误去重机制
|
||||
|
||||
Log Center 使用 **指纹(fingerprint)** 对错误进行去重,按来源使用不同的指纹策略:
|
||||
Log Center 使用 **指纹(fingerprint)** 对错误进行去重,三类来源使用不同的指纹策略:
|
||||
|
||||
| 来源 | 指纹组成 |
|
||||
|------|----------|
|
||||
@ -369,7 +689,7 @@ Log Center 使用 **指纹(fingerprint)** 对错误进行去重,按来源使
|
||||
| `cicd` | `MD5(project_id \| cicd \| error_type \| job_name \| step_name)` |
|
||||
| `deployment` | `MD5(project_id \| deployment \| error_type \| namespace \| deployment_name)` |
|
||||
|
||||
相同指纹的错误只会记录一次。如果已修复的错误再次出现,系统会自动重新打开(回归检测)。
|
||||
相同指纹的错误只记录一次。已修复的错误再次出现会自动重新打开(回归检测)。
|
||||
|
||||
---
|
||||
|
||||
@ -391,39 +711,47 @@ NEW → VERIFYING → PENDING_FIX → FIXING → FIXED → VERIFIED → DEPLOYED
|
||||
| `FIXED` | 已修复,待验证 |
|
||||
| `VERIFIED` | 已验证修复 |
|
||||
| `DEPLOYED` | 已部署上线 |
|
||||
| `FIX_FAILED` | 修复失败(失败原因会记录到数据库并在 Web 端展示) |
|
||||
| `FIX_FAILED` | 修复失败 |
|
||||
|
||||
---
|
||||
|
||||
## Web 管理端
|
||||
## API 参考
|
||||
|
||||
### 项目管理
|
||||
### 上报错误日志
|
||||
|
||||
访问 Web 管理端的「项目管理」页面,可以:
|
||||
**POST** `/api/v1/logs/report`
|
||||
|
||||
- 查看所有已注册项目及其配置状态
|
||||
- 编辑项目的**仓库地址**(`repo_url`)和**本地路径**(`local_path`)
|
||||
- 未配置的字段会标红提示
|
||||
**响应:**
|
||||
|
||||
> Repair Agent 依赖这两个配置来定位项目代码和执行 Git 操作。请确保在接入后及时配置。
|
||||
```json
|
||||
// 新错误
|
||||
{"message": "Log reported", "id": 123}
|
||||
|
||||
### 缺陷追踪
|
||||
// 重复错误(去重)
|
||||
{"message": "Log deduplicated", "id": 123, "status": "NEW"}
|
||||
|
||||
- **缺陷列表**: 按项目、来源、状态筛选,修复失败的缺陷会直接显示失败原因
|
||||
- **缺陷详情**: 查看完整错误信息、堆栈、上下文,以及修复历史记录
|
||||
- **修复报告**: 查看每轮 AI 修复的详细过程(分析、代码变更、测试结果、失败原因)
|
||||
// 回归(已修复的错误再次出现)
|
||||
{"message": "Regression detected, reopened", "id": 123}
|
||||
```
|
||||
|
||||
### 项目管理 API
|
||||
|
||||
| 方法 | 路径 | 说明 |
|
||||
|------|------|------|
|
||||
| GET | `/api/v1/projects` | 获取项目列表 |
|
||||
| GET | `/api/v1/projects/{project_id}` | 获取项目详情 |
|
||||
| PUT | `/api/v1/projects/{project_id}` | 编辑项目配置 |
|
||||
|
||||
---
|
||||
|
||||
## 最佳实践
|
||||
|
||||
1. **首次接入时传入 `repo_url`**: 在日志上报中包含仓库地址,省去手动配置步骤
|
||||
2. **设置超时**: 上报请求设置 3 秒超时,避免影响主业务
|
||||
3. **静默失败**: 上报失败不应影响用户体验
|
||||
4. **异步上报**: 使用异步方式上报,不阻塞主流程
|
||||
5. **添加上下文**: 尽量添加有用的上下文信息(用户ID、请求URL等)
|
||||
6. **环境区分**: 正确设置 `environment` 字段区分开发/生产
|
||||
7. **配置本地路径**: 接入后在 Web 端配置 `local_path`,使 Repair Agent 能正确定位代码
|
||||
1. **设置超时**: 上报请求设置 3 秒超时,避免影响主业务
|
||||
2. **静默失败**: 上报失败不应影响用户体验,所有 catch 块静默处理
|
||||
3. **异步上报**: 使用异步方式上报,不阻塞主流程
|
||||
4. **添加上下文**: 尽量添加有用的上下文信息(用户ID、请求URL等)
|
||||
5. **环境区分**: 正确设置 `environment` 字段区分开发/生产
|
||||
6. **CI/CD 用 `|| true`**: 上报步骤失败不应阻断流水线
|
||||
|
||||
---
|
||||
|
||||
@ -434,7 +762,6 @@ NEW → VERIFYING → PENDING_FIX → FIXING → FIXED → VERIFIED → DEPLOYED
|
||||
# .env
|
||||
LOG_CENTER_URL=http://localhost:8002
|
||||
ENVIRONMENT=development
|
||||
REPO_URL=https://gitea.example.com/team/rtc_backend.git # 可选
|
||||
```
|
||||
|
||||
### JavaScript 项目
|
||||
@ -450,8 +777,14 @@ flutter run --dart-define=LOG_CENTER_URL=http://localhost:8002
|
||||
flutter run --dart-define=ENVIRONMENT=development
|
||||
```
|
||||
|
||||
### Gitea Actions
|
||||
```yaml
|
||||
env:
|
||||
LOG_CENTER_URL: https://qiyuan-log-center-api.airlabs.art
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## API 文档
|
||||
## 完整 API 文档
|
||||
|
||||
完整 API 文档请访问: [http://localhost:8002/docs](http://localhost:8002/docs)
|
||||
访问: [http://localhost:8002/docs](http://localhost:8002/docs)
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
import axios from 'axios';
|
||||
import axios, { AxiosError } from 'axios';
|
||||
|
||||
const API_BASE = import.meta.env.VITE_API_BASE_URL || 'https://qiyuan-log-center-api.airlabs.art';
|
||||
|
||||
@ -7,6 +7,58 @@ const api = axios.create({
|
||||
timeout: 10000,
|
||||
});
|
||||
|
||||
// ==================== 自身错误上报 ====================
|
||||
|
||||
export function reportError(error: Error, context?: Record<string, unknown>) {
|
||||
const stackLines = error.stack?.split('\n') || [];
|
||||
const match = stackLines[1]?.match(/at\s+.*\s+\((.+):(\d+):\d+\)/);
|
||||
|
||||
const payload = {
|
||||
project_id: 'log_center_web',
|
||||
environment: import.meta.env.MODE,
|
||||
level: 'ERROR',
|
||||
error: {
|
||||
type: error.name,
|
||||
message: error.message,
|
||||
file_path: match?.[1] || 'unknown',
|
||||
line_number: parseInt(match?.[2] || '0'),
|
||||
stack_trace: stackLines,
|
||||
},
|
||||
context: {
|
||||
url: window.location.href,
|
||||
userAgent: navigator.userAgent,
|
||||
...context,
|
||||
},
|
||||
};
|
||||
|
||||
const blob = new Blob([JSON.stringify(payload)], { type: 'application/json' });
|
||||
if (navigator.sendBeacon) {
|
||||
navigator.sendBeacon(`${API_BASE}/api/v1/logs/report`, blob);
|
||||
} else {
|
||||
fetch(`${API_BASE}/api/v1/logs/report`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(payload),
|
||||
keepalive: true,
|
||||
}).catch(() => {});
|
||||
}
|
||||
}
|
||||
|
||||
// Axios 拦截器:上报 5xx 服务端错误
|
||||
api.interceptors.response.use(
|
||||
(response) => response,
|
||||
(error: AxiosError) => {
|
||||
if (error.response && error.response.status >= 500) {
|
||||
reportError(error, {
|
||||
api_url: error.config?.url,
|
||||
method: error.config?.method,
|
||||
status: error.response.status,
|
||||
});
|
||||
}
|
||||
return Promise.reject(error);
|
||||
},
|
||||
);
|
||||
|
||||
// Types
|
||||
export interface ErrorLog {
|
||||
id: number;
|
||||
|
||||
@ -2,6 +2,22 @@ import { StrictMode } from 'react'
|
||||
import { createRoot } from 'react-dom/client'
|
||||
import './index.css'
|
||||
import App from './App.tsx'
|
||||
import { reportError } from './api'
|
||||
|
||||
// 全局错误捕获:JS 运行时异常
|
||||
window.onerror = (_message, source, lineno, colno, error) => {
|
||||
if (error) {
|
||||
reportError(error, { source, lineno, colno })
|
||||
}
|
||||
}
|
||||
|
||||
// 全局错误捕获:未处理的 Promise rejection
|
||||
window.onunhandledrejection = (event: PromiseRejectionEvent) => {
|
||||
const error = event.reason instanceof Error
|
||||
? event.reason
|
||||
: new Error(String(event.reason))
|
||||
reportError(error, { type: 'unhandledrejection' })
|
||||
}
|
||||
|
||||
createRoot(document.getElementById('root')!).render(
|
||||
<StrictMode>
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user