feat(repair-agent): add triage and retry flow for FIX_FAILED bugs

- Add fetch_failed_bugs() to task_manager - Add triage_bug() to claude_service for AI-based bug classification - Add retry_failed_project() to core with triage→fix pipeline - Add retry CLI command to __main__.py Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-24 13:01:27 +08:00 · 2026-02-24 13:01:27 +08:00 · d58ca4b131
commit d58ca4b131
parent 3cae8b083c
4 changed files with 254 additions and 0 deletions
--- a/repair_agent/main.py
+++ b/repair_agent/main.py
@ -109,6 +109,75 @@ def fix_one(
    engine.close()


+@app.command()
+def retry(
+    project: str = typer.Option(None, "--project", "-p", help="筛选项目ID"),
+    test: bool = typer.Option(True, "--test/--no-test", help="是否运行测试"),
+    commit: bool = typer.Option(False, "--commit", "-c", help="是否自动提交"),
+):
+    """
+    重新处理所有 FIX_FAILED 的 Bug。
+
+    流程：先分诊（triage）判断 Bug 是否为代码缺陷，
+    不可修复的标记为 CANNOT_REPRODUCE，可修复的重新尝试修复。
+
+    示例:
+      python -m repair_agent retry                    # 处理所有 FIX_FAILED
+      python -m repair_agent retry -p rtc_backend     # 只处理 rtc_backend
+    """
+    console.print("[bold blue]开始处理 FIX_FAILED Bug[/bold blue]")
+
+    # 先展示待处理列表
+    task_manager = TaskManager()
+    failed_bugs = task_manager.fetch_failed_bugs(project)
+    task_manager.close()
+
+    if not failed_bugs:
+        console.print("[yellow]没有 FIX_FAILED 的 Bug[/yellow]")
+        return
+
+    table = Table(title="FIX_FAILED Bug 列表")
+    table.add_column("ID", style="cyan")
+    table.add_column("项目", style="green")
+    table.add_column("错误类型", style="red")
+    table.add_column("消息", style="white", max_width=50)
+    table.add_column("文件", style="blue")
+
+    for bug in failed_bugs:
+        table.add_row(
+            str(bug.id),
+            bug.project_id,
+            bug.error.type,
+            bug.error.message[:50] + "..." if len(bug.error.message) > 50 else bug.error.message,
+            bug.error.file_path or "-",
+        )
+
+    console.print(table)
+    console.print(f"\n共 {len(failed_bugs)} 个 Bug 待处理，开始分诊...\n")
+
+    engine = RepairEngine()
+    result = engine.retry_failed_project(
+        project_id=project,
+        run_tests=test,
+        auto_commit=commit,
+    )
+
+    console.print(f"\n[bold]处理结果:[/bold]")
+    console.print(f"  总计: {result.total}")
+    console.print(f"  [green]成功: {result.success_count}[/green]")
+    console.print(f"  [red]失败: {result.failed_count}[/red]")
+
+    if result.results:
+        console.print("\n[bold]详细结果:[/bold]")
+        for r in result.results:
+            status_icon = "[green]✓[/green]" if r.success else "[red]✗[/red]"
+            console.print(f"  {status_icon} Bug #{r.bug_id}: {r.message}")
+            if r.modified_files:
+                console.print(f"      修改文件: {', '.join(r.modified_files)}")
+
+    engine.close()
+
+
@app.command()
 def status():
    """查看配置状态"""
--- a/repair_agent/agent/claude_service.py
+++ b/repair_agent/agent/claude_service.py
@ -185,6 +185,39 @@ class ClaudeService:
        logger.info(f"开始第 {round_num} 轮修复 {len(bugs)} 个 Bug...")
        return self.execute_prompt(prompt, project_path)

+    def triage_bug(self, bug: Bug, project_path: str) -> tuple[bool, str]:
+        """
+        分诊 Bug：判断是否为可修复的代码缺陷。
+
+        输出中包含 VERDICT:FIX 或 VERDICT:CANNOT_REPRODUCE。
+        """
+        prompt = f"""你是一个 Bug 分诊专家。请分析以下 Bug，判断它是否是一个需要修复的**代码缺陷**。
+
+{bug.format_for_prompt()}
+
+## 判断规则
+
+属于 **无法复现 / 不需要修复** 的情况（CANNOT_REPRODUCE）：
+1. JWT Token 过期、认证失败 — 正常认证流程，不是代码 Bug
+2. HTTP 405 Method Not Allowed — 客户端请求了错误的方法
+3. 第三方库内部错误且 file_path 指向 site-packages / sdk — 非项目代码
+4. 瞬态网络错误、加载中断（如 PlatformException: Loading interrupted）
+5. 客户端传参错误导致的验证失败
+6. 错误堆栈中没有项目代码帧（全在框架/三方库中）
+
+属于 **需要修复** 的情况（FIX）：
+1. 堆栈中有项目代码（apps/ 或 lib/ 开头）且错误原因明确
+2. 数据库约束错误（IntegrityError）由项目代码逻辑引起
+3. TypeError / AttributeError 出现在项目视图或模型中
+
+请先用 Grep/Read 查看相关源文件确认当前代码状态，然后给出判断。
+
+**最后一行必须输出以下格式之一（只输出一个）：**
+VERDICT:FIX
+VERDICT:CANNOT_REPRODUCE
+"""
+        return self.execute_prompt(prompt, project_path, allowed_tools="Read,Grep,Glob")
+
    def analyze_bug(self, bug: Bug, project_path: str) -> tuple[bool, str]:
        """
        分析单个 Bug（不修复）
--- a/repair_agent/agent/core.py
+++ b/repair_agent/agent/core.py
@ -224,6 +224,114 @@ class RepairEngine:
            results=results,
        )
    
+    def retry_failed_project(
+        self,
+        project_id: Optional[str] = None,
+        run_tests: bool = True,
+        auto_commit: bool = False,
+    ) -> BatchFixResult:
+        """
+        处理 FIX_FAILED 状态的 Bug：先分诊，再修复。
+
+        流程：
+        1. 获取所有 FIX_FAILED Bug
+        2. 逐个分诊（triage）：判断是否为可修复的代码缺陷
+        3. 不可修复的标记为 CANNOT_REPRODUCE
+        4. 可修复的重置为 PENDING_FIX 后调用 fix_project 修复
+        """
+        logger.info(f"开始处理 FIX_FAILED Bug{f' (项目: {project_id})' if project_id else ''}")
+
+        failed_bugs = self.task_manager.fetch_failed_bugs(project_id)
+        if not failed_bugs:
+            logger.info("没有 FIX_FAILED 的 Bug")
+            return BatchFixResult(
+                project_id=project_id or "all",
+                total=0, success_count=0, failed_count=0, results=[],
+            )
+
+        results: list[FixResult] = []
+        bugs_to_fix: dict[str, list[Bug]] = {}  # project_id → bugs
+
+        # Step 1: 逐个分诊
+        for bug in failed_bugs:
+            logger.info(f"分诊 Bug #{bug.id} ({bug.error.type}: {bug.error.message[:60]})")
+
+            project_info = self.task_manager.get_project_info(bug.project_id)
+            project_path = (
+                (project_info and project_info.get("local_path"))
+                or settings.get_project_path(bug.project_id)
+            )
+
+            if not project_path:
+                logger.warning(f"Bug #{bug.id}: 未找到项目路径 {bug.project_id}，跳过")
+                results.append(FixResult(
+                    bug_id=bug.id, success=False,
+                    message=f"未找到项目路径: {bug.project_id}",
+                ))
+                continue
+
+            # 调用 Claude 分诊
+            self.task_manager.update_status(bug.id, BugStatus.VERIFYING)
+            success, output = self.claude_service.triage_bug(bug, project_path)
+
+            if not success:
+                logger.warning(f"Bug #{bug.id}: 分诊执行失败，保留 FIX_FAILED")
+                self.task_manager.update_status(
+                    bug.id, BugStatus.FIX_FAILED, f"分诊失败: {output[:200]}"
+                )
+                results.append(FixResult(
+                    bug_id=bug.id, success=False, message="分诊执行失败",
+                ))
+                continue
+
+            # 解析判决
+            if "VERDICT:CANNOT_REPRODUCE" in output:
+                logger.info(f"Bug #{bug.id}: 判定为无法复现")
+                self.task_manager.update_status(
+                    bug.id, BugStatus.CANNOT_REPRODUCE,
+                    "AI 分诊判定：非代码缺陷或无法复现",
+                )
+                self._upload_round_report(
+                    bug=bug, project_id=bug.project_id, round_num=0,
+                    ai_analysis=output, diff="", modified_files=[],
+                    test_output="", test_passed=False,
+                    failure_reason="AI 分诊：无法复现",
+                    status=BugStatus.CANNOT_REPRODUCE,
+                )
+                results.append(FixResult(
+                    bug_id=bug.id, success=True,
+                    message="标记为 CANNOT_REPRODUCE",
+                ))
+            elif "VERDICT:FIX" in output:
+                logger.info(f"Bug #{bug.id}: 判定为可修复，加入修复队列")
+                self.task_manager.update_status(bug.id, BugStatus.PENDING_FIX)
+                bugs_to_fix.setdefault(bug.project_id, []).append(bug)
+            else:
+                logger.warning(f"Bug #{bug.id}: 分诊输出无 VERDICT 标记，默认加入修复队列")
+                self.task_manager.update_status(bug.id, BugStatus.PENDING_FIX)
+                bugs_to_fix.setdefault(bug.project_id, []).append(bug)
+
+        # Step 2: 按项目批量修复
+        for pid, bugs in bugs_to_fix.items():
+            logger.info(f"开始修复项目 {pid} 的 {len(bugs)} 个 Bug")
+            fix_result = self.fix_project(
+                project_id=pid,
+                run_tests=run_tests,
+                auto_commit=auto_commit,
+            )
+            results.extend(fix_result.results)
+
+        success_count = sum(1 for r in results if r.success)
+        total = len(failed_bugs)
+
+        return BatchFixResult(
+            project_id=project_id or "all",
+            total=total,
+            success_count=success_count,
+            failed_count=total - success_count,
+            results=results,
+        )
+
    def _upload_round_report(
        self,
        bug: Bug,
--- a/repair_agent/agent/task_manager.py
+++ b/repair_agent/agent/task_manager.py
@ -73,6 +73,50 @@ class TaskManager:
        logger.info(f"获取到 {len(bugs)} 个待修复 Bug（NEW + PENDING_FIX）")
        return bugs
    
+    def fetch_failed_bugs(self, project_id: Optional[str] = None) -> list[Bug]:
+        """
+        获取修复失败的 Bug 列表（FIX_FAILED 状态）
+        """
+        bugs: list[Bug] = []
+        try:
+            params: dict[str, str] = {"status": "FIX_FAILED", "source": "runtime"}
+            if project_id:
+                params["project_id"] = project_id
+
+            response = self.client.get(
+                f"{self.base_url}/api/v1/bugs",
+                params=params,
+            )
+            response.raise_for_status()
+
+            data = response.json()
+            for item in data.get("items", []):
+                stack_trace = item.get("stack_trace")
+                if isinstance(stack_trace, str):
+                    stack_trace = stack_trace.split("\n")
+
+                bugs.append(Bug(
+                    id=item["id"],
+                    project_id=item["project_id"],
+                    environment=item.get("environment", "production"),
+                    level=item.get("level", "ERROR"),
+                    error={
+                        "type": item.get("error_type", "Unknown"),
+                        "message": item.get("error_message", ""),
+                        "file_path": item.get("file_path"),
+                        "line_number": item.get("line_number"),
+                        "stack_trace": stack_trace,
+                    },
+                    context=item.get("context"),
+                    status=BugStatus.FIX_FAILED,
+                    retry_count=item.get("retry_count", 0),
+                ))
+        except httpx.HTTPError as e:
+            logger.error(f"获取 FIX_FAILED Bug 列表失败: {e}")
+
+        logger.info(f"获取到 {len(bugs)} 个 FIX_FAILED Bug")
+        return bugs
+
    def update_status(self, bug_id: int, status: BugStatus, message: str = "") -> bool:
        """
        更新 Bug 状态