All checks were successful
Build and Deploy Log Center / build-and-deploy (push) Successful in 2m24s
新增两种日志来源(cicd / deployment),使日志中台覆盖"构建→部署→运行"全链路: 后端变更: - models.py: 新增 LogSource 枚举和 source 字段,file_path/line_number 改为可选 - main.py: 按来源生成不同指纹策略,所有查询端点支持 source 筛选,仪表盘新增来源分布统计 - database.py: 新增 4 条迁移 SQL(source 字段、索引、字段可空) - task_manager.py: 修复 Agent 仅拉取 runtime 来源的缺陷 新增组件: - k8s-monitor/: K8s Pod 健康监控脚本(Python),每 5 分钟检测异常 Pod 并上报 - k8s/monitor-cronjob.yaml: CronJob + RBAC 部署清单 - scripts/report-cicd-error.sh: CI/CD 错误上报 Bash 脚本 - scripts/gitea-actions-example.yaml: Gitea Actions 集成示例 前端变更: - api.ts: 类型定义更新,支持 source 字段 - BugList.tsx: 新增来源筛选标签页和来源列 - BugDetail.tsx: 按来源条件渲染(CI/CD 信息、部署信息),非 runtime 禁用修复按钮 - Dashboard.tsx: 新增来源分布表格 - index.css: 来源标签样式(source-badge) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
313 lines
11 KiB
Python
313 lines
11 KiB
Python
from fastapi import FastAPI, Depends, HTTPException, Query
|
|
from fastapi.middleware.cors import CORSMiddleware
|
|
from sqlmodel.ext.asyncio.session import AsyncSession
|
|
from sqlmodel import select, func
|
|
from .database import init_db, get_session
|
|
from .models import ErrorLog, ErrorLogCreate, LogStatus, TaskStatusUpdate, RepairTask, RepairTaskCreate
|
|
from datetime import datetime, timedelta
|
|
from typing import Optional, List
|
|
import hashlib
|
|
import json
|
|
|
|
app = FastAPI(title="Log Center & AIOps Control Plane")
|
|
|
|
# CORS for frontend
|
|
app.add_middleware(
|
|
CORSMiddleware,
|
|
allow_origins=["*"], # In production, restrict to your domain
|
|
allow_credentials=True,
|
|
allow_methods=["*"],
|
|
allow_headers=["*"],
|
|
)
|
|
|
|
@app.on_event("startup")
|
|
async def on_startup():
|
|
await init_db()
|
|
|
|
def generate_fingerprint(log: ErrorLogCreate) -> str:
|
|
source = log.source
|
|
|
|
if source == "cicd":
|
|
ctx = log.context or {}
|
|
raw = f"{log.project_id}|cicd|{log.error.get('type')}|{ctx.get('job_name', 'unknown')}|{ctx.get('step_name', 'unknown')}"
|
|
elif source == "deployment":
|
|
ctx = log.context or {}
|
|
raw = f"{log.project_id}|deployment|{log.error.get('type')}|{ctx.get('namespace', 'default')}|{ctx.get('deployment_name', 'unknown')}"
|
|
else:
|
|
raw = f"{log.project_id}|{log.error.get('type')}|{log.error.get('file_path')}|{log.error.get('line_number')}"
|
|
|
|
return hashlib.md5(raw.encode()).hexdigest()
|
|
|
|
# ==================== Log Reporting ====================
|
|
@app.post("/api/v1/logs/report", tags=["Logs"])
|
|
async def report_log(log_data: ErrorLogCreate, session: AsyncSession = Depends(get_session)):
|
|
fingerprint = generate_fingerprint(log_data)
|
|
|
|
# Check deduplication
|
|
statement = select(ErrorLog).where(ErrorLog.fingerprint == fingerprint)
|
|
results = await session.exec(statement)
|
|
existing_log = results.first()
|
|
|
|
if existing_log:
|
|
# If exists and not resolved, just ignore or update count (implied)
|
|
if existing_log.status not in [LogStatus.DEPLOYED, LogStatus.FIXED, LogStatus.VERIFIED]:
|
|
return {"message": "Log deduplicated", "id": existing_log.id, "status": existing_log.status}
|
|
# If it was resolved but happened again -> Regression! Reset to NEW?
|
|
existing_log.status = LogStatus.NEW
|
|
existing_log.timestamp = log_data.timestamp or datetime.utcnow()
|
|
existing_log.retry_count = 0 # Reset retries for new occurrence
|
|
session.add(existing_log)
|
|
await session.commit()
|
|
await session.refresh(existing_log)
|
|
return {"message": "Regression detected, reopened", "id": existing_log.id}
|
|
|
|
# Create new
|
|
new_log = ErrorLog(
|
|
project_id=log_data.project_id,
|
|
environment=log_data.environment,
|
|
level=log_data.level,
|
|
source=log_data.source,
|
|
error_type=log_data.error.get("type"),
|
|
error_message=log_data.error.get("message"),
|
|
file_path=log_data.error.get("file_path"),
|
|
line_number=log_data.error.get("line_number"),
|
|
stack_trace=log_data.error.get("stack_trace"),
|
|
context=log_data.context,
|
|
version=log_data.version,
|
|
commit_hash=log_data.commit_hash,
|
|
fingerprint=fingerprint,
|
|
timestamp=log_data.timestamp or datetime.utcnow()
|
|
)
|
|
|
|
session.add(new_log)
|
|
await session.commit()
|
|
await session.refresh(new_log)
|
|
|
|
return {"message": "Log reported", "id": new_log.id}
|
|
|
|
# ==================== Agent Tasks ====================
|
|
@app.get("/api/v1/tasks/pending", tags=["Tasks"])
|
|
async def get_pending_tasks(project_id: str = None, source: Optional[str] = None, session: AsyncSession = Depends(get_session)):
|
|
query = select(ErrorLog).where(ErrorLog.status == LogStatus.NEW)
|
|
if project_id:
|
|
query = query.where(ErrorLog.project_id == project_id)
|
|
if source:
|
|
query = query.where(ErrorLog.source == source)
|
|
|
|
results = await session.exec(query)
|
|
return results.all()
|
|
|
|
@app.put("/api/v1/tasks/{task_id}/status", tags=["Tasks"])
|
|
async def update_task_status(
|
|
task_id: int,
|
|
status_update: TaskStatusUpdate,
|
|
session: AsyncSession = Depends(get_session)
|
|
):
|
|
statement = select(ErrorLog).where(ErrorLog.id == task_id)
|
|
results = await session.exec(statement)
|
|
task = results.first()
|
|
|
|
if not task:
|
|
raise HTTPException(status_code=404, detail="Task not found")
|
|
|
|
task.status = status_update.status
|
|
# We could log the message to a history table if needed
|
|
|
|
session.add(task)
|
|
await session.commit()
|
|
await session.refresh(task)
|
|
|
|
|
|
return {"message": "Status updated", "id": task.id, "status": task.status}
|
|
|
|
|
|
# ==================== Repair Reports ====================
|
|
@app.post("/api/v1/repair/reports", tags=["Repair"])
|
|
async def create_repair_report(report: RepairTaskCreate, session: AsyncSession = Depends(get_session)):
|
|
"""Upload a new repair report"""
|
|
# 1. Create repair task record
|
|
repair_task = RepairTask.from_orm(report)
|
|
session.add(repair_task)
|
|
|
|
# 2. Update error log status (optional, but good for consistency)
|
|
if report.status in [LogStatus.FIXED, LogStatus.FIX_FAILED]:
|
|
log_stmt = select(ErrorLog).where(ErrorLog.id == report.error_log_id)
|
|
results = await session.exec(log_stmt)
|
|
error_log = results.first()
|
|
if error_log:
|
|
error_log.status = report.status
|
|
session.add(error_log)
|
|
|
|
await session.commit()
|
|
await session.refresh(repair_task)
|
|
return {"message": "Report uploaded", "id": repair_task.id}
|
|
|
|
@app.get("/api/v1/repair/reports", tags=["Repair"])
|
|
async def get_repair_reports(
|
|
page: int = Query(1, ge=1),
|
|
page_size: int = Query(20, ge=1, le=100),
|
|
project_id: Optional[str] = None,
|
|
error_log_id: Optional[int] = None,
|
|
session: AsyncSession = Depends(get_session)
|
|
):
|
|
"""Get repair reports list, optionally filtered by project or bug"""
|
|
query = select(RepairTask).order_by(RepairTask.created_at.desc())
|
|
|
|
if project_id:
|
|
query = query.where(RepairTask.project_id == project_id)
|
|
if error_log_id:
|
|
query = query.where(RepairTask.error_log_id == error_log_id)
|
|
|
|
offset = (page - 1) * page_size
|
|
query = query.offset(offset).limit(page_size)
|
|
|
|
results = await session.exec(query)
|
|
tasks = results.all()
|
|
|
|
# Get total
|
|
count_query = select(func.count(RepairTask.id))
|
|
if project_id:
|
|
count_query = count_query.where(RepairTask.project_id == project_id)
|
|
if error_log_id:
|
|
count_query = count_query.where(RepairTask.error_log_id == error_log_id)
|
|
count_result = await session.exec(count_query)
|
|
total = count_result.one()
|
|
|
|
return {
|
|
"items": tasks,
|
|
"total": total,
|
|
"page": page,
|
|
"page_size": page_size,
|
|
"total_pages": (total + page_size - 1) // page_size
|
|
}
|
|
|
|
@app.get("/api/v1/repair/reports/{report_id}", tags=["Repair"])
|
|
async def get_repair_report_detail(report_id: int, session: AsyncSession = Depends(get_session)):
|
|
"""Get detailed repair report"""
|
|
statement = select(RepairTask).where(RepairTask.id == report_id)
|
|
results = await session.exec(statement)
|
|
task = results.first()
|
|
|
|
if not task:
|
|
raise HTTPException(status_code=404, detail="Report not found")
|
|
|
|
return task
|
|
|
|
|
|
|
|
# ==================== Dashboard APIs ====================
|
|
@app.get("/api/v1/dashboard/stats", tags=["Dashboard"])
|
|
async def get_dashboard_stats(source: Optional[str] = None, session: AsyncSession = Depends(get_session)):
|
|
"""Get overall statistics for dashboard"""
|
|
today = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)
|
|
|
|
def _apply_source(q):
|
|
return q.where(ErrorLog.source == source) if source else q
|
|
|
|
# Total bugs
|
|
total_query = _apply_source(select(func.count(ErrorLog.id)))
|
|
total_result = await session.exec(total_query)
|
|
total_bugs = total_result.one()
|
|
|
|
# Today's new bugs
|
|
today_query = _apply_source(select(func.count(ErrorLog.id)).where(ErrorLog.timestamp >= today))
|
|
today_result = await session.exec(today_query)
|
|
today_bugs = today_result.one()
|
|
|
|
# Count by status
|
|
status_counts = {}
|
|
for status in LogStatus:
|
|
count_query = _apply_source(select(func.count(ErrorLog.id)).where(ErrorLog.status == status))
|
|
count_result = await session.exec(count_query)
|
|
status_counts[status.value] = count_result.one()
|
|
|
|
# Fixed rate = (FIXED + VERIFIED + DEPLOYED) / Total
|
|
fixed_count = status_counts.get("FIXED", 0) + status_counts.get("VERIFIED", 0) + status_counts.get("DEPLOYED", 0)
|
|
fix_rate = round((fixed_count / total_bugs * 100), 2) if total_bugs > 0 else 0
|
|
|
|
# Source distribution
|
|
from .models import LogSource
|
|
source_counts = {}
|
|
for src in LogSource:
|
|
sq = select(func.count(ErrorLog.id)).where(ErrorLog.source == src.value)
|
|
sr = await session.exec(sq)
|
|
source_counts[src.value] = sr.one()
|
|
|
|
return {
|
|
"total_bugs": total_bugs,
|
|
"today_bugs": today_bugs,
|
|
"fix_rate": fix_rate,
|
|
"status_distribution": status_counts,
|
|
"source_distribution": source_counts,
|
|
}
|
|
|
|
@app.get("/api/v1/bugs", tags=["Dashboard"])
|
|
async def get_bugs_list(
|
|
page: int = Query(1, ge=1),
|
|
page_size: int = Query(20, ge=1, le=100),
|
|
status: Optional[LogStatus] = None,
|
|
project_id: Optional[str] = None,
|
|
source: Optional[str] = None,
|
|
session: AsyncSession = Depends(get_session)
|
|
):
|
|
"""Get paginated list of bugs with optional filters"""
|
|
query = select(ErrorLog).order_by(ErrorLog.timestamp.desc())
|
|
|
|
if status:
|
|
query = query.where(ErrorLog.status == status)
|
|
if project_id:
|
|
query = query.where(ErrorLog.project_id == project_id)
|
|
if source:
|
|
query = query.where(ErrorLog.source == source)
|
|
|
|
# Pagination
|
|
offset = (page - 1) * page_size
|
|
query = query.offset(offset).limit(page_size)
|
|
|
|
results = await session.exec(query)
|
|
bugs = results.all()
|
|
|
|
# Get total count for pagination info
|
|
count_query = select(func.count(ErrorLog.id))
|
|
if status:
|
|
count_query = count_query.where(ErrorLog.status == status)
|
|
if project_id:
|
|
count_query = count_query.where(ErrorLog.project_id == project_id)
|
|
if source:
|
|
count_query = count_query.where(ErrorLog.source == source)
|
|
count_result = await session.exec(count_query)
|
|
total = count_result.one()
|
|
|
|
return {
|
|
"items": bugs,
|
|
"total": total,
|
|
"page": page,
|
|
"page_size": page_size,
|
|
"total_pages": (total + page_size - 1) // page_size
|
|
}
|
|
|
|
@app.get("/api/v1/bugs/{bug_id}", tags=["Dashboard"])
|
|
async def get_bug_detail(bug_id: int, session: AsyncSession = Depends(get_session)):
|
|
"""Get detailed information about a specific bug"""
|
|
statement = select(ErrorLog).where(ErrorLog.id == bug_id)
|
|
results = await session.exec(statement)
|
|
bug = results.first()
|
|
|
|
if not bug:
|
|
raise HTTPException(status_code=404, detail="Bug not found")
|
|
|
|
return bug
|
|
|
|
@app.get("/api/v1/projects", tags=["Dashboard"])
|
|
async def get_projects(session: AsyncSession = Depends(get_session)):
|
|
"""Get list of all unique project IDs"""
|
|
query = select(ErrorLog.project_id).distinct()
|
|
results = await session.exec(query)
|
|
projects = results.all()
|
|
return {"projects": projects}
|
|
|
|
@app.get("/", tags=["Health"])
|
|
async def health_check():
|
|
return {"status": "ok"}
|
|
|