From c8204b6d478c9b236f748e5cb0d451ed73cd6f1c Mon Sep 17 00:00:00 2001 From: zyc <1439655764@qq.com> Date: Thu, 26 Feb 2026 10:08:26 +0800 Subject: [PATCH] =?UTF-8?q?feat(self-report):=20=E6=97=A5=E5=BF=97?= =?UTF-8?q?=E4=B8=AD=E5=8F=B0=E8=87=AA=E8=BA=AB=E6=8E=A5=E5=85=A5=E9=94=99?= =?UTF-8?q?=E8=AF=AF=E4=B8=8A=E6=8A=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 新增 app/self_report.py:后端运行时异常直接写入自身数据库 - main.py:添加全局异常处理器 + 启动时注册 log_center_api/web 项目 - web/api.ts:添加 reportError 函数 + Axios 5xx 拦截器 - web/main.tsx:添加 window.onerror / onunhandledrejection 全局捕获 - deploy.yaml:CI/CD 流水线各步骤失败时上报(build/deploy) - 重写 integration_guide.md:按三类上报(runtime/cicd/deployment)重新组织 Co-Authored-By: Claude Opus 4.6 --- .gitea/workflows/deploy.yaml | 129 ++++++- app/main.py | 58 ++- app/self_report.py | 80 ++++ docs/integration_guide.md | 695 ++++++++++++++++++++++++++--------- web/src/api.ts | 54 ++- web/src/main.tsx | 16 + 6 files changed, 847 insertions(+), 185 deletions(-) create mode 100644 app/self_report.py diff --git a/.gitea/workflows/deploy.yaml b/.gitea/workflows/deploy.yaml index 11bddae..d2492c2 100644 --- a/.gitea/workflows/deploy.yaml +++ b/.gitea/workflows/deploy.yaml @@ -6,6 +6,9 @@ on: - main - master +env: + LOG_CENTER_URL: https://qiyuan-log-center-api.airlabs.art + jobs: build-and-deploy: runs-on: ubuntu-latest @@ -29,6 +32,7 @@ jobs: # Build API Image - name: Build and Push API + id: build-api uses: docker/build-push-action@v4 with: context: . @@ -39,6 +43,7 @@ jobs: # Build Web Image - name: Build and Push Web + id: build-web uses: docker/build-push-action@v4 with: context: ./web @@ -51,6 +56,7 @@ jobs: # Build K8s Monitor Image - name: Build and Push K8s Monitor + id: build-monitor uses: docker/build-push-action@v4 with: context: ./k8s-monitor @@ -72,9 +78,10 @@ jobs: kubeconfig: ${{ secrets.KUBE_CONFIG }} - name: Update K8s Manifests + id: deploy run: | echo "Environment: Production" - + # Replace image placeholders sed -i "s|\${CI_REGISTRY_IMAGE}/log-center-api:latest|${{ secrets.SWR_SERVER }}/${{ secrets.SWR_ORG }}/log-center-api:latest|g" k8s/api-deployment-prod.yaml sed -i "s|\${CI_REGISTRY_IMAGE}/log-center-web:latest|${{ secrets.SWR_SERVER }}/${{ secrets.SWR_ORG }}/log-center-web:latest|g" k8s/web-deployment-prod.yaml @@ -89,3 +96,123 @@ jobs: # Restart deployments kubectl rollout restart deployment/log-center-api kubectl rollout restart deployment/log-center-web + + # ==================== CI/CD 错误上报 ==================== + + - name: Report API Build Failure + if: failure() && steps.build-api.outcome == 'failure' + run: | + curl -s -X POST "${LOG_CENTER_URL}/api/v1/logs/report" \ + -H "Content-Type: application/json" \ + -d '{ + "project_id": "log_center_api", + "environment": "cicd", + "level": "ERROR", + "source": "cicd", + "commit_hash": "'"$GITHUB_SHA"'", + "error": { + "type": "DockerBuildError", + "message": "Log Center API Docker build failed", + "file_path": null, + "line_number": null, + "stack_trace": ["API Docker build step failed. Check CI logs for details."] + }, + "context": { + "workflow_name": "'"$GITHUB_WORKFLOW"'", + "job_name": "'"$GITHUB_JOB"'", + "step_name": "Build and Push API", + "run_id": "'"$GITHUB_RUN_ID"'", + "branch": "'"$GITHUB_REF_NAME"'", + "repository": "'"$GITHUB_REPOSITORY"'", + "run_url": "'"$GITHUB_SERVER_URL"'/'"$GITHUB_REPOSITORY"'/actions/runs/'"$GITHUB_RUN_ID"'" + } + }' --connect-timeout 5 --max-time 10 || true + + - name: Report Web Build Failure + if: failure() && steps.build-web.outcome == 'failure' + run: | + curl -s -X POST "${LOG_CENTER_URL}/api/v1/logs/report" \ + -H "Content-Type: application/json" \ + -d '{ + "project_id": "log_center_web", + "environment": "cicd", + "level": "ERROR", + "source": "cicd", + "commit_hash": "'"$GITHUB_SHA"'", + "error": { + "type": "DockerBuildError", + "message": "Log Center Web Docker build failed", + "file_path": null, + "line_number": null, + "stack_trace": ["Web Docker build step failed. Check CI logs for details."] + }, + "context": { + "workflow_name": "'"$GITHUB_WORKFLOW"'", + "job_name": "'"$GITHUB_JOB"'", + "step_name": "Build and Push Web", + "run_id": "'"$GITHUB_RUN_ID"'", + "branch": "'"$GITHUB_REF_NAME"'", + "repository": "'"$GITHUB_REPOSITORY"'", + "run_url": "'"$GITHUB_SERVER_URL"'/'"$GITHUB_REPOSITORY"'/actions/runs/'"$GITHUB_RUN_ID"'" + } + }' --connect-timeout 5 --max-time 10 || true + + - name: Report Monitor Build Failure + if: failure() && steps.build-monitor.outcome == 'failure' + run: | + curl -s -X POST "${LOG_CENTER_URL}/api/v1/logs/report" \ + -H "Content-Type: application/json" \ + -d '{ + "project_id": "log_center_api", + "environment": "cicd", + "level": "ERROR", + "source": "cicd", + "commit_hash": "'"$GITHUB_SHA"'", + "error": { + "type": "DockerBuildError", + "message": "K8s Monitor Docker build failed", + "file_path": null, + "line_number": null, + "stack_trace": ["K8s Monitor Docker build step failed. Check CI logs for details."] + }, + "context": { + "workflow_name": "'"$GITHUB_WORKFLOW"'", + "job_name": "'"$GITHUB_JOB"'", + "step_name": "Build and Push K8s Monitor", + "run_id": "'"$GITHUB_RUN_ID"'", + "branch": "'"$GITHUB_REF_NAME"'", + "repository": "'"$GITHUB_REPOSITORY"'", + "run_url": "'"$GITHUB_SERVER_URL"'/'"$GITHUB_REPOSITORY"'/actions/runs/'"$GITHUB_RUN_ID"'" + } + }' --connect-timeout 5 --max-time 10 || true + + - name: Report Deploy Failure + if: failure() && steps.deploy.outcome == 'failure' + run: | + curl -s -X POST "${LOG_CENTER_URL}/api/v1/logs/report" \ + -H "Content-Type: application/json" \ + -d '{ + "project_id": "log_center_api", + "environment": "cicd", + "level": "ERROR", + "source": "deployment", + "commit_hash": "'"$GITHUB_SHA"'", + "error": { + "type": "DeployError", + "message": "Log Center K8s deployment failed", + "file_path": null, + "line_number": null, + "stack_trace": ["K8s deployment step failed. Check CI logs for details."] + }, + "context": { + "workflow_name": "'"$GITHUB_WORKFLOW"'", + "job_name": "'"$GITHUB_JOB"'", + "step_name": "Update K8s Manifests", + "run_id": "'"$GITHUB_RUN_ID"'", + "branch": "'"$GITHUB_REF_NAME"'", + "repository": "'"$GITHUB_REPOSITORY"'", + "namespace": "default", + "deployment_name": "log-center", + "run_url": "'"$GITHUB_SERVER_URL"'/'"$GITHUB_REPOSITORY"'/actions/runs/'"$GITHUB_RUN_ID"'" + } + }' --connect-timeout 5 --max-time 10 || true diff --git a/app/main.py b/app/main.py index df528d6..bd44ef6 100644 --- a/app/main.py +++ b/app/main.py @@ -1,10 +1,12 @@ -from fastapi import FastAPI, Depends, HTTPException, Query +from fastapi import FastAPI, Depends, HTTPException, Query, Request from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import JSONResponse from sqlmodel.ext.asyncio.session import AsyncSession from sqlmodel import select, func, text -from .database import init_db, get_session +from .database import init_db, get_session, engine from .models import ErrorLog, ErrorLogCreate, LogStatus, TaskStatusUpdate, RepairTask, RepairTaskCreate, Project, ProjectUpdate from .gitea_client import GiteaClient +from .self_report import self_report_error from datetime import datetime, timedelta from typing import Optional, List from pydantic import BaseModel @@ -25,6 +27,58 @@ app.add_middleware( @app.on_event("startup") async def on_startup(): await init_db() + await _register_self_projects() + + +async def _register_self_projects(): + """启动时注册日志中台自身的项目信息。""" + from sqlalchemy.orm import sessionmaker as sa_sessionmaker + projects = [ + { + "project_id": "log_center_api", + "name": "Log Center API", + "repo_url": "https://gitea.airlabs.art/zyc/qy_gitlab.git", + "local_path": "/Users/maidong/Desktop/zyc/qy_gitlab/log_center", + "description": "日志中台 FastAPI 后端服务", + }, + { + "project_id": "log_center_web", + "name": "Log Center Web", + "repo_url": "https://gitea.airlabs.art/zyc/qy_gitlab.git", + "local_path": "/Users/maidong/Desktop/zyc/qy_gitlab/log_center/web", + "description": "日志中台 React 管理端", + }, + ] + async_session = sa_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False) + async with async_session() as session: + for proj_data in projects: + stmt = select(Project).where(Project.project_id == proj_data["project_id"]) + result = await session.exec(stmt) + existing = result.first() + if not existing: + session.add(Project(**proj_data)) + else: + # 更新元信息(仓库地址、路径等可能变更) + for key, value in proj_data.items(): + if key != "project_id": + setattr(existing, key, value) + existing.updated_at = datetime.utcnow() + session.add(existing) + await session.commit() + + +@app.exception_handler(Exception) +async def global_exception_handler(request: Request, exc: Exception): + """捕获所有未处理异常,上报到自身数据库后返回 500。""" + await self_report_error(exc, context={ + "url": str(request.url), + "method": request.method, + }) + return JSONResponse( + status_code=500, + content={"detail": "Internal Server Error"}, + ) + def generate_fingerprint(log: ErrorLogCreate) -> str: source = log.source diff --git a/app/self_report.py b/app/self_report.py new file mode 100644 index 0000000..29cbabf --- /dev/null +++ b/app/self_report.py @@ -0,0 +1,80 @@ +"""Log Center 自身错误上报:将 API 运行时异常写入自己的数据库。""" +import os +import sys +import traceback +import hashlib +from datetime import datetime + +from sqlmodel import select +from sqlmodel.ext.asyncio.session import AsyncSession +from sqlalchemy.orm import sessionmaker + +from .database import engine +from .models import ErrorLog, LogStatus, Project + +PROJECT_ID = "log_center_api" +ENVIRONMENT = os.getenv("ENVIRONMENT", "production") + + +async def self_report_error(exc: Exception, context: dict = None): + """将 Log Center API 自身的异常写入数据库。 + + 直接操作数据库而非走 HTTP,避免循环依赖和额外开销。 + 任何内部错误都静默处理,绝不影响主业务。 + """ + try: + tb = traceback.extract_tb(exc.__traceback__) + last_frame = tb[-1] if tb else None + + error_type = type(exc).__name__ + file_path = last_frame.filename if last_frame else "unknown" + line_number = last_frame.lineno if last_frame else 0 + stack_trace = traceback.format_exception(exc) + + # 生成指纹(与 main.py 中 runtime 类型的逻辑一致) + raw = f"{PROJECT_ID}|{error_type}|{file_path}|{line_number}" + fingerprint = hashlib.md5(raw.encode()).hexdigest() + + async_session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False) + async with async_session() as session: + # 去重检查 + stmt = select(ErrorLog).where(ErrorLog.fingerprint == fingerprint) + result = await session.exec(stmt) + existing = result.first() + + if existing: + if existing.status not in [LogStatus.DEPLOYED, LogStatus.FIXED, LogStatus.VERIFIED]: + return # 已在追踪中,跳过 + # 回归:已修复的错误再次出现 + existing.status = LogStatus.NEW + existing.timestamp = datetime.utcnow() + existing.retry_count = 0 + session.add(existing) + await session.commit() + return + + # 确保 project 记录存在 + proj_stmt = select(Project).where(Project.project_id == PROJECT_ID) + proj_result = await session.exec(proj_stmt) + if not proj_result.first(): + session.add(Project(project_id=PROJECT_ID, name="Log Center API")) + + new_log = ErrorLog( + project_id=PROJECT_ID, + environment=ENVIRONMENT, + level="ERROR", + source="runtime", + error_type=error_type, + error_message=str(exc), + file_path=file_path, + line_number=line_number, + stack_trace=stack_trace, + context=context or {}, + fingerprint=fingerprint, + timestamp=datetime.utcnow(), + ) + session.add(new_log) + await session.commit() + except Exception: + # 自身上报绝不能导致服务崩溃 + traceback.print_exc(file=sys.stderr) diff --git a/docs/integration_guide.md b/docs/integration_guide.md index 03ec46a..da69f9a 100644 --- a/docs/integration_guide.md +++ b/docs/integration_guide.md @@ -2,19 +2,28 @@ ## 概述 -Log Center 是一个集中式错误日志收集与 AI 自动修复平台,提供 REST API 供各项目接入,实现运行时错误的统一收集、去重、追踪、分析和自动修复。 +Log Center 是一个集中式错误日志收集与 AI 自动修复平台,提供 REST API 供各项目接入。 -接入流程: +接入后覆盖三类错误上报: -1. 项目首次上报日志时自动注册到 Log Center -2. 在 Web 管理端配置项目的**仓库地址**和**本地路径** -3. Repair Agent 根据配置自动拉取代码并修复 Bug +| 类型 | `source` 值 | 说明 | 触发方式 | +|------|-------------|------|----------| +| 日常运行错误 | `runtime` | 应用运行时的异常(Python/JS/Dart) | 代码中全局捕获异常自动上报 | +| CI/CD 错误 | `cicd` | 构建、测试、Lint 等流水线失败 | Gitea Actions 步骤失败时上报 | +| K8s 部署错误 | `deployment` | Pod 异常状态(CrashLoopBackOff、OOMKilled 等) | K8s CronJob 定时扫描上报 | + +**完整接入流程:** + +1. **注册项目信息** — 调用 API 提交项目元信息(名称、仓库地址、本地路径) +2. **接入日常运行错误上报** — 在应用代码中集成全局异常捕获 +3. **接入 CI/CD 错误上报** — 在 Gitea Actions 流水线中添加失败上报步骤 +4. **接入 K8s 部署错误上报** — 在 K8s Pod 健康监控中添加项目映射 + +> **重要**: 必须先完成步骤 1,否则 Repair Agent 无法定位代码仓库和本地路径。 --- -## 快速开始 - -### 服务地址 +## 服务地址 | 环境 | API 地址 | 仪表盘 | |------|----------|--------| @@ -23,23 +32,141 @@ Log Center 是一个集中式错误日志收集与 AI 自动修复平台,提 --- -## API 接口 +## 步骤 1:注册项目信息 -### 上报错误日志 +首次接入 Log Center 时,**必须先注册项目信息**。这是 Repair Agent 正常工作的前提。 -**POST** `/api/v1/logs/report` +### 注册方式 -#### 请求体 (JSON) +先上报一条初始化日志(触发项目自动创建),再调用 PUT 接口补全元信息: + +```bash +# 1. 上报初始化日志,触发项目自动创建 +curl -X POST "${LOG_CENTER_URL}/api/v1/logs/report" \ + -H "Content-Type: application/json" \ + -d '{ + "project_id": "your_project_id", + "environment": "production", + "level": "WARNING", + "error": { + "type": "ProjectInit", + "message": "Project registered to Log Center", + "stack_trace": ["Project initialization"] + }, + "repo_url": "https://gitea.airlabs.art/team/your_project.git" + }' + +# 2. 补全项目元信息 +curl -X PUT "${LOG_CENTER_URL}/api/v1/projects/your_project_id" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "项目显示名称", + "repo_url": "https://gitea.airlabs.art/team/your_project.git", + "local_path": "/absolute/path/to/project", + "description": "项目描述" + }' +``` + +### 各语言注册示例 + +#### Python + +```python +import requests +import os + +LOG_CENTER_URL = os.getenv("LOG_CENTER_URL", "http://localhost:8002") + +def register_project(): + """首次接入时调用,注册项目到 Log Center。""" + project_id = "your_project_id" + + # 1. 上报初始化日志触发项目创建 + requests.post(f"{LOG_CENTER_URL}/api/v1/logs/report", json={ + "project_id": project_id, + "environment": os.getenv("ENVIRONMENT", "production"), + "level": "WARNING", + "error": { + "type": "ProjectInit", + "message": "Project registered to Log Center", + "stack_trace": ["Project initialization"], + }, + "repo_url": "https://gitea.airlabs.art/team/your_project.git", + }, timeout=5) + + # 2. 补全项目元信息 + requests.put(f"{LOG_CENTER_URL}/api/v1/projects/{project_id}", json={ + "name": "项目显示名称", + "repo_url": "https://gitea.airlabs.art/team/your_project.git", + "local_path": "/absolute/path/to/project", + "description": "项目描述", + }, timeout=5) +``` + +#### JavaScript / TypeScript + +```typescript +const LOG_CENTER_URL = import.meta.env.VITE_LOG_CENTER_URL || 'http://localhost:8002'; + +async function registerProject() { + const projectId = 'your_project_id'; + + // 1. 上报初始化日志触发项目创建 + await fetch(`${LOG_CENTER_URL}/api/v1/logs/report`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + project_id: projectId, + environment: import.meta.env.MODE, + level: 'WARNING', + error: { + type: 'ProjectInit', + message: 'Project registered to Log Center', + stack_trace: ['Project initialization'], + }, + repo_url: 'https://gitea.airlabs.art/team/your_project.git', + }), + }); + + // 2. 补全项目元信息 + await fetch(`${LOG_CENTER_URL}/api/v1/projects/${projectId}`, { + method: 'PUT', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + name: '项目显示名称', + repo_url: 'https://gitea.airlabs.art/team/your_project.git', + local_path: '/absolute/path/to/project', + description: '项目描述', + }), + }); +} +``` + +### 项目元信息字段 + +| 字段 | 类型 | 必填 | 说明 | +|------|------|------|------| +| `project_id` | string | ✅ | 项目唯一标识,如 `rtc_backend`, `rtc_web` | +| `name` | string | ✅ | 项目显示名称 | +| `repo_url` | string | ✅ | Git 仓库地址(Repair Agent 克隆/推送代码用) | +| `local_path` | string | ✅ | 本地项目绝对路径(Repair Agent 在此目录执行修复) | +| `description` | string | ❌ | 项目描述 | + +--- + +## 步骤 2:接入日常运行错误上报 + +> `source: "runtime"`(默认值,可不传) + +在应用代码中集成全局异常捕获,运行时发生未处理异常时自动上报到 Log Center。 + +### 上报格式 ```json { "project_id": "rtc_backend", "environment": "production", "level": "ERROR", - "timestamp": "2026-01-30T10:30:00Z", - "version": "1.2.3", - "commit_hash": "abc1234", - "repo_url": "https://gitea.example.com/team/rtc_backend.git", "error": { "type": "ValueError", "message": "invalid literal for int() with base 10: 'abc'", @@ -48,117 +175,34 @@ Log Center 是一个集中式错误日志收集与 AI 自动修复平台,提 "stack_trace": [ "Traceback (most recent call last):", " File \"apps/users/views.py\", line 42, in get_user", - " user_id = int(request.GET['id'])", "ValueError: invalid literal for int() with base 10: 'abc'" ] }, "context": { "url": "/api/users/123", "method": "GET", - "user_id": "u_12345", - "request_id": "req_abc123" + "user_id": "u_12345" } } ``` -#### 字段说明 +### Runtime 字段说明 | 字段 | 类型 | 必填 | 说明 | |------|------|------|------| -| `project_id` | string | ✅ | 项目标识,如 `rtc_backend`, `rtc_web`, `airhub_app` | +| `project_id` | string | ✅ | 项目标识 | | `environment` | string | ✅ | 环境:`development`, `staging`, `production` | | `level` | string | ✅ | 日志级别:`ERROR`, `WARNING`, `CRITICAL` | -| `source` | string | ❌ | 来源:`runtime`(默认), `cicd`, `deployment` | -| `timestamp` | string | ❌ | ISO 8601 格式,不传则使用服务器时间 | +| `source` | string | ❌ | 默认 `runtime`,无需传 | +| `timestamp` | string | ❌ | ISO 8601 格式,不传则用服务器时间 | | `version` | string | ❌ | 应用版本号 | | `commit_hash` | string | ❌ | Git commit hash | -| `repo_url` | string | ❌ | 项目仓库地址,首次上报时传入可自动关联到项目 | | `error.type` | string | ✅ | 异常类型,如 `ValueError`, `TypeError` | | `error.message` | string | ✅ | 错误消息 | -| `error.file_path` | string | ❌ | 出错文件路径(runtime 必填,cicd/deployment 可选) | -| `error.line_number` | int | ❌ | 出错行号(runtime 必填,cicd/deployment 可选) | +| `error.file_path` | string | ✅ | 出错文件路径 | +| `error.line_number` | int | ✅ | 出错行号 | | `error.stack_trace` | array | ✅ | 堆栈信息(数组或字符串) | -| `context` | object | ❌ | 额外上下文信息(URL、用户ID等) | - -> **项目自动注册**: 首次上报日志时,系统会根据 `project_id` 自动创建项目记录。如果同时传入 `repo_url`,会自动关联仓库地址,供 Repair Agent 使用。 - -#### 响应 - -**成功 (200)** -```json -{ - "message": "Log reported", - "id": 123 -} -``` - -**已存在 (200)** - 重复错误自动去重 -```json -{ - "message": "Log deduplicated", - "id": 123, - "status": "NEW" -} -``` - ---- - -### 项目管理 API - -项目在首次日志上报时自动创建,之后可通过 API 或 Web 管理端编辑配置。 - -#### 获取项目列表 - -**GET** `/api/v1/projects` - -```json -{ - "projects": [ - { - "id": 1, - "project_id": "rtc_backend", - "name": "RTC 后端", - "repo_url": "https://gitea.example.com/team/rtc_backend.git", - "local_path": "/home/dev/projects/rtc_backend", - "description": "Django 后端服务", - "created_at": "2026-01-15T08:00:00", - "updated_at": "2026-02-20T10:30:00" - } - ] -} -``` - -#### 获取项目详情 - -**GET** `/api/v1/projects/{project_id}` - -返回单个项目的完整信息。 - -#### 编辑项目配置 - -**PUT** `/api/v1/projects/{project_id}` - -```json -{ - "name": "RTC 后端", - "repo_url": "https://gitea.example.com/team/rtc_backend.git", - "local_path": "/home/dev/projects/rtc_backend", - "description": "Django 后端服务" -} -``` - -| 字段 | 类型 | 说明 | -|------|------|------| -| `name` | string | 项目显示名称 | -| `repo_url` | string | Git 仓库地址(Repair Agent 克隆/推送代码用) | -| `local_path` | string | 本地项目路径(Repair Agent 在此目录执行修复) | -| `description` | string | 项目描述 | - -> **注意**: `repo_url` 和 `local_path` 是 Repair Agent 正常工作的关键配置。未配置时 Agent 将无法执行 Git 操作或定位项目代码。可在 Web 管理端的「项目管理」页面中配置。 - ---- - -## 接入示例 +| `context` | object | ❌ | 额外上下文信息 | ### Python (Django / FastAPI) @@ -170,7 +214,7 @@ import os LOG_CENTER_URL = os.getenv("LOG_CENTER_URL", "http://localhost:8002") def report_error(exc, context=None): - """上报错误到 Log Center""" + """上报运行时错误到 Log Center""" tb = traceback.extract_tb(exc.__traceback__) last_frame = tb[-1] if tb else None @@ -178,7 +222,6 @@ def report_error(exc, context=None): "project_id": "rtc_backend", "environment": os.getenv("ENVIRONMENT", "development"), "level": "ERROR", - "repo_url": os.getenv("REPO_URL", ""), # 可选:关联仓库地址 "error": { "type": type(exc).__name__, "message": str(exc), @@ -193,55 +236,49 @@ def report_error(exc, context=None): requests.post( f"{LOG_CENTER_URL}/api/v1/logs/report", json=payload, - timeout=3 # 快速失败,不影响主业务 + timeout=3 ) except Exception: pass # 静默失败,不影响主业务 ``` -#### Django 集成位置 - -修改 `utils/exceptions.py` 的 `custom_exception_handler`: +**Django 集成位置** — 修改 `utils/exceptions.py` 的 `custom_exception_handler`: ```python def custom_exception_handler(exc, context): - # 上报到 Log Center (异步,不阻塞响应) + # 上报到 Log Center report_error(exc, { "view": str(context.get("view")), "request_path": context.get("request").path if context.get("request") else None, }) - # ... 原有逻辑不变 ... ``` ---- +**FastAPI 集成位置** — 添加全局异常处理器: + +```python +from fastapi import Request +from fastapi.responses import JSONResponse + +@app.exception_handler(Exception) +async def global_exception_handler(request: Request, exc: Exception): + await report_error(exc, context={ + "url": str(request.url), + "method": request.method, + }) + return JSONResponse(status_code=500, content={"detail": "Internal Server Error"}) +``` ### JavaScript / TypeScript (React / Vue) ```typescript const LOG_CENTER_URL = import.meta.env.VITE_LOG_CENTER_URL || 'http://localhost:8002'; -interface ErrorPayload { - project_id: string; - environment: string; - level: string; - repo_url?: string; - error: { - type: string; - message: string; - file_path: string; - line_number: number; - stack_trace: string[]; - }; - context?: Record; -} - export function reportError(error: Error, context?: Record) { - // 解析堆栈信息 const stackLines = error.stack?.split('\n') || []; const match = stackLines[1]?.match(/at\s+.*\s+\((.+):(\d+):\d+\)/); - const payload: ErrorPayload = { + const payload = { project_id: 'rtc_web', environment: import.meta.env.MODE, level: 'ERROR', @@ -259,12 +296,9 @@ export function reportError(error: Error, context?: Record) { }, }; - // 使用 sendBeacon 确保页面关闭时也能发送 + const blob = new Blob([JSON.stringify(payload)], { type: 'application/json' }); if (navigator.sendBeacon) { - navigator.sendBeacon( - `${LOG_CENTER_URL}/api/v1/logs/report`, - JSON.stringify(payload) - ); + navigator.sendBeacon(`${LOG_CENTER_URL}/api/v1/logs/report`, blob); } else { fetch(`${LOG_CENTER_URL}/api/v1/logs/report`, { method: 'POST', @@ -276,27 +310,40 @@ export function reportError(error: Error, context?: Record) { } ``` -#### Axios 拦截器集成 - -修改 `src/api/request.ts`: +**全局错误捕获** — 在 `main.tsx` / `main.ts` 入口文件中: ```typescript -request.interceptors.response.use( - (response) => { /* ... */ }, - (error: AxiosError) => { - // 上报到 Log Center - reportError(error, { - url: error.config?.url, - method: error.config?.method, - status: error.response?.status, - }); +// JS 运行时异常 +window.onerror = (_message, source, lineno, colno, error) => { + if (error) reportError(error, { source, lineno, colno }); +}; - // ... 原有逻辑不变 ... - } -); +// 未处理的 Promise rejection +window.onunhandledrejection = (event: PromiseRejectionEvent) => { + const error = event.reason instanceof Error + ? event.reason + : new Error(String(event.reason)); + reportError(error, { type: 'unhandledrejection' }); +}; ``` ---- +**Axios 拦截器** — 在 `api.ts` / `request.ts` 中(仅上报 5xx 服务端错误): + +```typescript +api.interceptors.response.use( + (response) => response, + (error: AxiosError) => { + if (error.response && error.response.status >= 500) { + reportError(error, { + api_url: error.config?.url, + method: error.config?.method, + status: error.response.status, + }); + } + return Promise.reject(error); + }, +); +``` ### Flutter (Dart) @@ -311,14 +358,12 @@ const logCenterUrl = String.fromEnvironment( Future reportError(dynamic error, StackTrace stackTrace, {Map? context}) async { final stackLines = stackTrace.toString().split('\n'); - // 解析第一行获取文件和行号 final match = RegExp(r'#0\s+.*\((.+):(\d+):\d+\)').firstMatch(stackLines.first); final payload = { 'project_id': 'airhub_app', 'environment': const String.fromEnvironment('ENVIRONMENT', defaultValue: 'development'), 'level': 'ERROR', - 'repo_url': 'https://gitea.example.com/team/airhub_app.git', 'error': { 'type': error.runtimeType.toString(), 'message': error.toString(), @@ -341,7 +386,7 @@ Future reportError(dynamic error, StackTrace stackTrace, {Map `source: "cicd"` + +在 Gitea Actions 流水线中,为每个关键步骤添加失败上报,构建/测试/Lint 失败时自动上报到 Log Center。 + +### 上报格式 + +```json +{ + "project_id": "rtc_backend", + "environment": "cicd", + "level": "ERROR", + "source": "cicd", + "commit_hash": "abc1234", + "error": { + "type": "DockerBuildError", + "message": "Docker build failed", + "file_path": null, + "line_number": null, + "stack_trace": ["Build step failed. Check CI logs for details."] + }, + "context": { + "workflow_name": "Build and Deploy", + "job_name": "build", + "step_name": "Build Docker Image", + "run_id": "123", + "branch": "main", + "repository": "team/rtc_backend", + "run_url": "https://gitea.airlabs.art/team/rtc_backend/actions/runs/123" + } +} +``` + +### CI/CD 特有字段 + +| 字段 | 说明 | +|------|------| +| `source` | **必须**设为 `"cicd"` | +| `environment` | 设为 `"cicd"` | +| `error.type` | 推荐值:`DockerBuildError`, `NpmBuildError`, `TestFailure`, `LintError`, `CIBuildError` | +| `error.file_path` | 可为 `null` | +| `error.line_number` | 可为 `null` | +| `context.workflow_name` | 工作流名称 | +| `context.job_name` | Job 名称 | +| `context.step_name` | 失败的步骤名称 | +| `context.run_id` | 运行 ID | +| `context.run_url` | CI 运行详情链接 | +| `context.branch` | 分支名 | + +### Gitea Actions 集成方式 + +为每个关键步骤添加 `id`,然后在末尾添加条件上报步骤: + +```yaml +name: Build and Deploy + +on: + push: + branches: [main] + +env: + LOG_CENTER_URL: https://qiyuan-log-center-api.airlabs.art + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + # 关键步骤:添加 id + - name: Build Docker Image + id: build + run: docker build -t myapp:latest . + + - name: Run Tests + id: test + run: docker run myapp:latest python -m pytest + + - name: Deploy + id: deploy + run: kubectl apply -f k8s/ + + # ===== 失败上报步骤(放在所有关键步骤之后) ===== + + - name: Report Build Failure + if: failure() && steps.build.outcome == 'failure' + run: | + curl -s -X POST "${LOG_CENTER_URL}/api/v1/logs/report" \ + -H "Content-Type: application/json" \ + -d '{ + "project_id": "'"${GITHUB_REPOSITORY##*/}"'", + "environment": "cicd", + "level": "ERROR", + "source": "cicd", + "commit_hash": "'"$GITHUB_SHA"'", + "error": { + "type": "DockerBuildError", + "message": "Docker build failed", + "file_path": null, + "line_number": null, + "stack_trace": ["Docker build step failed. Check CI logs."] + }, + "context": { + "workflow_name": "'"$GITHUB_WORKFLOW"'", + "job_name": "'"$GITHUB_JOB"'", + "step_name": "Build Docker Image", + "run_id": "'"$GITHUB_RUN_ID"'", + "branch": "'"$GITHUB_REF_NAME"'", + "repository": "'"$GITHUB_REPOSITORY"'", + "run_url": "'"$GITHUB_SERVER_URL"'/'"$GITHUB_REPOSITORY"'/actions/runs/'"$GITHUB_RUN_ID"'" + } + }' --connect-timeout 5 --max-time 10 || true + + - name: Report Test Failure + if: failure() && steps.test.outcome == 'failure' + run: | + curl -s -X POST "${LOG_CENTER_URL}/api/v1/logs/report" \ + -H "Content-Type: application/json" \ + -d '{ + "project_id": "'"${GITHUB_REPOSITORY##*/}"'", + "environment": "cicd", + "level": "ERROR", + "source": "cicd", + "commit_hash": "'"$GITHUB_SHA"'", + "error": { + "type": "TestFailure", + "message": "Tests failed in CI pipeline", + "file_path": null, + "line_number": null, + "stack_trace": ["Test step failed. Check CI logs."] + }, + "context": { + "workflow_name": "'"$GITHUB_WORKFLOW"'", + "job_name": "'"$GITHUB_JOB"'", + "step_name": "Run Tests", + "run_id": "'"$GITHUB_RUN_ID"'", + "branch": "'"$GITHUB_REF_NAME"'", + "repository": "'"$GITHUB_REPOSITORY"'", + "run_url": "'"$GITHUB_SERVER_URL"'/'"$GITHUB_REPOSITORY"'/actions/runs/'"$GITHUB_RUN_ID"'" + } + }' --connect-timeout 5 --max-time 10 || true +``` + +### 使用 report-cicd-error.sh 脚本 + +项目提供了通用上报脚本 `scripts/report-cicd-error.sh`(需要 `jq`),可在 CI 步骤中使用: + +```bash +# 用法: ./scripts/report-cicd-error.sh +./scripts/report-cicd-error.sh rtc_backend "Build Docker Image" "Docker build failed: exit code 1" +./scripts/report-cicd-error.sh rtc_backend "Run Tests" /tmp/test-output.log +``` + +脚本会自动: +- 根据步骤名推断 `error_type`(DockerBuildError / NpmBuildError / TestFailure / LintError) +- 读取 Gitea Actions 环境变量填充 context +- 如果传入文件路径,读取最后 100 行作为 stack_trace + +--- + +## 步骤 4:接入 K8s 部署错误上报 + +> `source: "deployment"` + +通过 K8s Pod 健康监控 CronJob,定时扫描集群中异常 Pod 并上报到 Log Center。 + +### 上报格式 + +```json +{ + "project_id": "rtc_backend", + "environment": "production", + "level": "CRITICAL", + "source": "deployment", + "error": { + "type": "CrashLoopBackOff", + "message": "CrashLoopBackOff: back-off restarting failed container (pod: rtc-backend-xxx, container: api)", + "file_path": null, + "line_number": null, + "stack_trace": ["...容器崩溃前的日志(最后 50 行)..."] + }, + "context": { + "namespace": "default", + "pod_name": "rtc-backend-xxx-yyy", + "container_name": "api", + "deployment_name": "rtc-backend", + "restart_count": 5, + "node_name": "node-1" + } +} +``` + +### Deployment 特有字段 + +| 字段 | 说明 | +|------|------| +| `source` | **必须**设为 `"deployment"` | +| `level` | 建议 `"CRITICAL"`,Pod 异常通常较严重 | +| `error.type` | 取自 K8s 状态:`CrashLoopBackOff`, `OOMKilled`, `ImagePullBackOff`, `ErrImagePull` 等 | +| `error.file_path` | 可为 `null` | +| `error.line_number` | 可为 `null` | +| `error.stack_trace` | 容器崩溃前的日志输出 | +| `context.namespace` | K8s 命名空间 | +| `context.pod_name` | Pod 名称 | +| `context.deployment_name` | Deployment 名称(用于指纹去重) | +| `context.restart_count` | 重启次数 | +| `context.node_name` | 节点名 | + +### 监控的异常状态 + +| 状态 | 说明 | +|------|------| +| `CrashLoopBackOff` | 容器反复崩溃重启 | +| `OOMKilled` | 内存溢出被杀 | +| `ImagePullBackOff` / `ErrImagePull` | 拉取镜像失败 | +| `CreateContainerConfigError` | 容器配置错误 | +| `RunContainerError` | 容器启动失败 | + +### 接入方式:添加 Pod label 映射 + +K8s Monitor CronJob 已在集群中运行,每 5 分钟扫描一次。新项目接入只需在 `k8s-monitor/monitor.py` 的 `APP_TO_PROJECT` 字典中添加映射: + +```python +# k8s-monitor/monitor.py +APP_TO_PROJECT = { + "rtc-backend": "rtc_backend", # Pod 的 app label -> project_id + "rtc-backend-dev": "rtc_backend", + "rtc-web": "rtc_web", + "rtc-web-dev": "rtc_web", + "log-center-api": "log_center_api", + "log-center-web": "log_center_web", + # 新项目在此添加映射 + "your-app": "your_project_id", +} +``` + +确保你的 K8s Deployment 有 `app` label: + +```yaml +metadata: + labels: + app: your-app # 与 APP_TO_PROJECT 中的 key 一致 +``` + +### CronJob 部署配置 + +如果集群中尚未部署 Monitor,使用以下配置: + +```yaml +# k8s/monitor-cronjob.yaml +apiVersion: batch/v1 +kind: CronJob +metadata: + name: pod-health-monitor +spec: + schedule: "*/5 * * * *" + jobTemplate: + spec: + template: + spec: + serviceAccountName: pod-monitor + containers: + - name: monitor + image: your-registry/k8s-pod-monitor:latest + env: + - name: LOG_CENTER_URL + value: "https://qiyuan-log-center-api.airlabs.art" + - name: MONITOR_NAMESPACE + value: "default" + restartPolicy: OnFailure +``` + +--- + ## 错误去重机制 -Log Center 使用 **指纹(fingerprint)** 对错误进行去重,按来源使用不同的指纹策略: +Log Center 使用 **指纹(fingerprint)** 对错误进行去重,三类来源使用不同的指纹策略: | 来源 | 指纹组成 | |------|----------| @@ -369,7 +689,7 @@ Log Center 使用 **指纹(fingerprint)** 对错误进行去重,按来源使 | `cicd` | `MD5(project_id \| cicd \| error_type \| job_name \| step_name)` | | `deployment` | `MD5(project_id \| deployment \| error_type \| namespace \| deployment_name)` | -相同指纹的错误只会记录一次。如果已修复的错误再次出现,系统会自动重新打开(回归检测)。 +相同指纹的错误只记录一次。已修复的错误再次出现会自动重新打开(回归检测)。 --- @@ -391,39 +711,47 @@ NEW → VERIFYING → PENDING_FIX → FIXING → FIXED → VERIFIED → DEPLOYED | `FIXED` | 已修复,待验证 | | `VERIFIED` | 已验证修复 | | `DEPLOYED` | 已部署上线 | -| `FIX_FAILED` | 修复失败(失败原因会记录到数据库并在 Web 端展示) | +| `FIX_FAILED` | 修复失败 | --- -## Web 管理端 +## API 参考 -### 项目管理 +### 上报错误日志 -访问 Web 管理端的「项目管理」页面,可以: +**POST** `/api/v1/logs/report` -- 查看所有已注册项目及其配置状态 -- 编辑项目的**仓库地址**(`repo_url`)和**本地路径**(`local_path`) -- 未配置的字段会标红提示 +**响应:** -> Repair Agent 依赖这两个配置来定位项目代码和执行 Git 操作。请确保在接入后及时配置。 +```json +// 新错误 +{"message": "Log reported", "id": 123} -### 缺陷追踪 +// 重复错误(去重) +{"message": "Log deduplicated", "id": 123, "status": "NEW"} -- **缺陷列表**: 按项目、来源、状态筛选,修复失败的缺陷会直接显示失败原因 -- **缺陷详情**: 查看完整错误信息、堆栈、上下文,以及修复历史记录 -- **修复报告**: 查看每轮 AI 修复的详细过程(分析、代码变更、测试结果、失败原因) +// 回归(已修复的错误再次出现) +{"message": "Regression detected, reopened", "id": 123} +``` + +### 项目管理 API + +| 方法 | 路径 | 说明 | +|------|------|------| +| GET | `/api/v1/projects` | 获取项目列表 | +| GET | `/api/v1/projects/{project_id}` | 获取项目详情 | +| PUT | `/api/v1/projects/{project_id}` | 编辑项目配置 | --- ## 最佳实践 -1. **首次接入时传入 `repo_url`**: 在日志上报中包含仓库地址,省去手动配置步骤 -2. **设置超时**: 上报请求设置 3 秒超时,避免影响主业务 -3. **静默失败**: 上报失败不应影响用户体验 -4. **异步上报**: 使用异步方式上报,不阻塞主流程 -5. **添加上下文**: 尽量添加有用的上下文信息(用户ID、请求URL等) -6. **环境区分**: 正确设置 `environment` 字段区分开发/生产 -7. **配置本地路径**: 接入后在 Web 端配置 `local_path`,使 Repair Agent 能正确定位代码 +1. **设置超时**: 上报请求设置 3 秒超时,避免影响主业务 +2. **静默失败**: 上报失败不应影响用户体验,所有 catch 块静默处理 +3. **异步上报**: 使用异步方式上报,不阻塞主流程 +4. **添加上下文**: 尽量添加有用的上下文信息(用户ID、请求URL等) +5. **环境区分**: 正确设置 `environment` 字段区分开发/生产 +6. **CI/CD 用 `|| true`**: 上报步骤失败不应阻断流水线 --- @@ -434,7 +762,6 @@ NEW → VERIFYING → PENDING_FIX → FIXING → FIXED → VERIFIED → DEPLOYED # .env LOG_CENTER_URL=http://localhost:8002 ENVIRONMENT=development -REPO_URL=https://gitea.example.com/team/rtc_backend.git # 可选 ``` ### JavaScript 项目 @@ -450,8 +777,14 @@ flutter run --dart-define=LOG_CENTER_URL=http://localhost:8002 flutter run --dart-define=ENVIRONMENT=development ``` +### Gitea Actions +```yaml +env: + LOG_CENTER_URL: https://qiyuan-log-center-api.airlabs.art +``` + --- -## API 文档 +## 完整 API 文档 -完整 API 文档请访问: [http://localhost:8002/docs](http://localhost:8002/docs) +访问: [http://localhost:8002/docs](http://localhost:8002/docs) diff --git a/web/src/api.ts b/web/src/api.ts index 7be77b0..3ccfdd4 100644 --- a/web/src/api.ts +++ b/web/src/api.ts @@ -1,4 +1,4 @@ -import axios from 'axios'; +import axios, { AxiosError } from 'axios'; const API_BASE = import.meta.env.VITE_API_BASE_URL || 'https://qiyuan-log-center-api.airlabs.art'; @@ -7,6 +7,58 @@ const api = axios.create({ timeout: 10000, }); +// ==================== 自身错误上报 ==================== + +export function reportError(error: Error, context?: Record) { + const stackLines = error.stack?.split('\n') || []; + const match = stackLines[1]?.match(/at\s+.*\s+\((.+):(\d+):\d+\)/); + + const payload = { + project_id: 'log_center_web', + environment: import.meta.env.MODE, + level: 'ERROR', + error: { + type: error.name, + message: error.message, + file_path: match?.[1] || 'unknown', + line_number: parseInt(match?.[2] || '0'), + stack_trace: stackLines, + }, + context: { + url: window.location.href, + userAgent: navigator.userAgent, + ...context, + }, + }; + + const blob = new Blob([JSON.stringify(payload)], { type: 'application/json' }); + if (navigator.sendBeacon) { + navigator.sendBeacon(`${API_BASE}/api/v1/logs/report`, blob); + } else { + fetch(`${API_BASE}/api/v1/logs/report`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(payload), + keepalive: true, + }).catch(() => {}); + } +} + +// Axios 拦截器:上报 5xx 服务端错误 +api.interceptors.response.use( + (response) => response, + (error: AxiosError) => { + if (error.response && error.response.status >= 500) { + reportError(error, { + api_url: error.config?.url, + method: error.config?.method, + status: error.response.status, + }); + } + return Promise.reject(error); + }, +); + // Types export interface ErrorLog { id: number; diff --git a/web/src/main.tsx b/web/src/main.tsx index bef5202..18725ce 100644 --- a/web/src/main.tsx +++ b/web/src/main.tsx @@ -2,6 +2,22 @@ import { StrictMode } from 'react' import { createRoot } from 'react-dom/client' import './index.css' import App from './App.tsx' +import { reportError } from './api' + +// 全局错误捕获:JS 运行时异常 +window.onerror = (_message, source, lineno, colno, error) => { + if (error) { + reportError(error, { source, lineno, colno }) + } +} + +// 全局错误捕获:未处理的 Promise rejection +window.onunhandledrejection = (event: PromiseRejectionEvent) => { + const error = event.reason instanceof Error + ? event.reason + : new Error(String(event.reason)) + reportError(error, { type: 'unhandledrejection' }) +} createRoot(document.getElementById('root')!).render(