diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ac2571d --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +__pycache__/ +*.pyc +*.egg-info/ +dist/ +build/ +.ruff_cache/ +.pytest_cache/ diff --git a/src/diff_fox/review/resolver.py b/src/diff_fox/review/resolver.py index 09a27c6..c83dc2e 100644 --- a/src/diff_fox/review/resolver.py +++ b/src/diff_fox/review/resolver.py @@ -1,30 +1,117 @@ """Comment resolution for previously flagged DiffFox findings. -After a re-review, replies to old DiffFox comments that are no longer -flagged, acknowledging user replies if present. +After a re-review, uses LLM verification to check whether each old finding +was actually fixed in the current code before marking it as resolved. """ +import asyncio import logging +from typing import Literal -from diff_fox.models import Finding -from diff_fox.scm.base import SCMProvider +import anthropic +from pydantic import BaseModel + +from diff_fox.llm import get_structured_output +from diff_fox.scm.base import DiffFoxComment, SCMProvider logger = logging.getLogger(__name__) +MAX_CONCURRENT_CHECKS = 5 +MAX_COMMENTS_TO_CHECK = 20 +CODE_WINDOW = 30 # lines above and below the comment's line + +# Bot reply markers — match the exact prefix to avoid false matches with user text +_BOT_RESOLVED_MARKERS = ("\u2705 **Addressed**", "\u2705 **Acknowledged**") + + +class ResolutionVerdict(BaseModel): + """LLM verdict on whether a previously flagged issue was fixed.""" + + verdict: Literal["fixed", "not_fixed", "uncertain"] = "uncertain" + reasoning: str = "" + + +RESOLUTION_SYSTEM_PROMPT = """\ +You are a code review resolution agent. Your ONLY job is to determine whether +a previously flagged issue has been FIXED in the current code. + +You will receive: +1. The original finding (the review comment body) +2. The current code around the location where the finding was flagged + +Rules: +- Mark as "fixed" ONLY if you have clear evidence the issue described in the + finding is no longer present in the current code +- Mark as "not_fixed" if the issue described in the finding is still present +- Mark as "uncertain" if you cannot determine from the available code +- Be conservative: if in doubt, use "not_fixed" or "uncertain" +- A formatting change (whitespace, line wrapping) does NOT fix a logic issue +- Focus on the SUBSTANCE of the finding, not superficial code changes +""" + + +async def _check_resolution( + comment: DiffFoxComment, + repo: str, + head_sha: str, + scm: SCMProvider, + client: anthropic.AsyncAnthropic, + model: str, +) -> ResolutionVerdict: + """Use LLM to verify whether a single finding was fixed.""" + try: + file_content = await scm.get_file_content(repo, comment["path"], head_sha) + except Exception: + # File no longer exists → the code was removed, finding is addressed + return ResolutionVerdict(verdict="fixed", reasoning="File no longer exists") + + # Extract code window around the comment's line + lines = file_content.content.splitlines() + center = max(0, comment["line"] - 1) # 0-indexed + start = max(0, center - CODE_WINDOW) + end = min(len(lines), center + CODE_WINDOW + 1) + numbered_lines = [f"{i + 1}: {lines[i]}" for i in range(start, end)] + code_snippet = "\n".join(numbered_lines) + + user_message = ( + f"\n" + f"File: {comment['path']}:{comment['line']}\n" + f"{comment['body']}\n" + f"\n\n" + f'\n' + f"{code_snippet}\n" + f"" + ) + + try: + result, _ = await get_structured_output( + client, + model, + RESOLUTION_SYSTEM_PROMPT, + user_message, + ResolutionVerdict, + timeout=30.0, + max_tokens=512, + ) + return result + except Exception: + logger.debug("LLM resolution check failed for comment %d", comment["id"]) + return ResolutionVerdict(verdict="uncertain", reasoning="LLM check failed") + async def resolve_addressed_comments( - new_findings: list[Finding], repo: str, pr_number: int, + head_sha: str, scm: SCMProvider, + client: anthropic.AsyncAnthropic, + model: str, ) -> int: - """Reply to old DiffFox comments that are no longer flagged. + """Verify and resolve old DiffFox comments using LLM verification. - Finds all comments from previous DiffFox reviews, checks if each - is still present in the new findings, and replies appropriately. - - If a user has replied in the thread, their reply is acknowledged - as context rather than blindly marking as addressed. + For each old DiffFox comment, checks whether the issue was actually + fixed in the current code before posting an "Addressed" reply. Returns the number of comments resolved. """ @@ -37,36 +124,64 @@ async def resolve_addressed_comments( if not old_comments: return 0 - # Build lookup sets: (path, line) for location match + (path, title_prefix) for content match - new_locations: set[tuple[str, int]] = set() - new_titles: set[tuple[str, str]] = set() - for f in new_findings: - for line in range(f.line_start, f.line_end + 1): - new_locations.add((f.file_path, line)) - new_titles.add((f.file_path, f.title.lower().strip()[:40])) - - resolved_count = 0 + # Filter to comments that need checking + candidates: list[DiffFoxComment] = [] for comment in old_comments: - # Skip comments with no valid line (can't match reliably) + # Skip comments with no valid line if not comment.get("line"): continue - # Skip if location still matches a new finding - if (comment["path"], comment["line"]) in new_locations: + # Skip if already resolved (match exact bot reply prefix, not bare keywords) + all_replies = comment.get("all_replies", []) + already_resolved = any( + reply.startswith(marker) for reply in all_replies for marker in _BOT_RESOLVED_MARKERS + ) + if already_resolved: continue - # Also check title-based match (handles line shifts after rebase) - body_first_line = comment["body"].split("\n")[0].lower() - still_flagged = any( - title in body_first_line for _, title in new_titles if _ == comment["path"] + candidates.append(comment) + + if not candidates: + return 0 + + if len(candidates) > MAX_COMMENTS_TO_CHECK: + logger.warning( + "Capping resolution checks at %d (found %d candidates)", + MAX_COMMENTS_TO_CHECK, + len(candidates), ) - if still_flagged: + candidates = candidates[:MAX_COMMENTS_TO_CHECK] + + logger.info("Checking %d old comments for resolution", len(candidates)) + + # Run LLM verification concurrently (capped) + semaphore = asyncio.Semaphore(MAX_CONCURRENT_CHECKS) + + async def check_with_limit(c: DiffFoxComment) -> tuple[DiffFoxComment, ResolutionVerdict]: + async with semaphore: + verdict = await _check_resolution(c, repo, head_sha, scm, client, model) + return c, verdict + + results = await asyncio.gather( + *(check_with_limit(c) for c in candidates), + return_exceptions=True, + ) + + # Post replies for confirmed fixes + resolved_count = 0 + for result in results: + if isinstance(result, BaseException): + logger.debug("Resolution check failed: %s", result) continue - # Skip if already resolved — check ALL replies (including bot's own previous replies) - all_replies = comment.get("all_replies", []) - all_reply_text = " ".join(all_replies) - if "Addressed" in all_reply_text or "Acknowledged" in all_reply_text: + comment, verdict = result + if verdict.verdict != "fixed": + logger.debug( + "Comment %d not resolved (%s): %s", + comment["id"], + verdict.verdict, + verdict.reasoning, + ) continue user_replies = comment.get("user_replies", []) @@ -77,7 +192,7 @@ async def resolve_addressed_comments( repo, pr_number, comment["id"], - f"\u2705 **Acknowledged** — this issue is no longer detected. " + f"\u2705 **Acknowledged** \u2014 this issue has been fixed. " f"Noted your feedback: _{reply_summary}_", ) else: @@ -85,7 +200,7 @@ async def resolve_addressed_comments( repo, pr_number, comment["id"], - "\u2705 **Addressed** — this issue is no longer detected in the latest review.", + "\u2705 **Addressed** \u2014 this issue has been fixed in the current code.", ) resolved_count += 1 except Exception: diff --git a/src/diff_fox/run_review.py b/src/diff_fox/run_review.py index b69114f..bbf8126 100644 --- a/src/diff_fox/run_review.py +++ b/src/diff_fox/run_review.py @@ -248,7 +248,9 @@ async def run_review( # 17. Resolve old DiffFox comments that are no longer flagged if post_comments: - addressed = await resolve_addressed_comments(ranked, repo, pr_number, scm) + addressed = await resolve_addressed_comments( + repo, pr_number, pr.head_sha, scm, client, model + ) if addressed: logger.info("Resolved %d previously flagged comments", addressed) diff --git a/src/diff_fox/scm/github.py b/src/diff_fox/scm/github.py index 61eb8ca..7dc4908 100644 --- a/src/diff_fox/scm/github.py +++ b/src/diff_fox/scm/github.py @@ -131,8 +131,13 @@ async def get_diff(self, repo: str, pr_number: int) -> list[DiffFile]: return parse_diff_files(files_data) async def get_pr_commits(self, repo: str, pr_number: int) -> list[CommitInfo]: - """Fetch all commits for a pull request.""" - data = await self._get_paginated(f"/repos/{repo}/pulls/{pr_number}/commits") + """Fetch commits for a pull request (single page, max 50).""" + data = await self._get( + f"/repos/{repo}/pulls/{pr_number}/commits", + params={"per_page": 50}, + ) + if not isinstance(data, list): + return [] return [CommitInfo(sha=c["sha"], message=c["commit"]["message"]) for c in data] async def get_file_content(self, repo: str, path: str, ref: str) -> FileContent: