From 22bc43743b739eca75b7757e3ba4c2eab32d0fdb Mon Sep 17 00:00:00 2001 From: openhands Date: Thu, 5 Mar 2026 14:02:55 +0000 Subject: [PATCH 1/3] Migrate PR review plugin to extensions repository - Delete agent_script.py and prompt.py (now in extensions repo) - Update action.yml to load scripts from OpenHands/extensions - Change sdk-repo/sdk-version inputs to extensions-repo/extensions-version - Update README and workflow.yml to reflect new structure - Install openhands-sdk and openhands-tools from PyPI Related to #2282 Co-authored-by: openhands --- .github/actions/pr-review/action.yml | 25 +- .../02_pr_review/README.md | 26 +- .../02_pr_review/agent_script.py | 903 ------------------ .../02_pr_review/prompt.py | 112 --- .../02_pr_review/workflow.yml | 4 +- 5 files changed, 28 insertions(+), 1042 deletions(-) delete mode 100644 examples/03_github_workflows/02_pr_review/agent_script.py delete mode 100644 examples/03_github_workflows/02_pr_review/prompt.py diff --git a/.github/actions/pr-review/action.yml b/.github/actions/pr-review/action.yml index 15ebfebd6c..8b584a6b80 100644 --- a/.github/actions/pr-review/action.yml +++ b/.github/actions/pr-review/action.yml @@ -24,12 +24,12 @@ inputs: feedback focusing on data structures, simplicity, and pragmatism)" required: false default: roasted - sdk-repo: - description: GitHub repository for the SDK (owner/repo) + extensions-repo: + description: GitHub repository for extensions (owner/repo) required: false - default: OpenHands/software-agent-sdk - sdk-version: - description: Git ref to use for the SDK (tag, branch, or commit SHA, e.g., v1.0.0, main, or abc1234) + default: OpenHands/extensions + extensions-version: + description: Git ref to use for extensions (tag, branch, or commit SHA, e.g., v1.0.0, main, or abc1234) required: false default: main llm-api-key: @@ -46,12 +46,12 @@ inputs: runs: using: composite steps: - - name: Checkout software-agent-sdk repository + - name: Checkout extensions repository uses: actions/checkout@v4 with: - repository: ${{ inputs.sdk-repo }} - ref: ${{ inputs.sdk-version }} - path: software-agent-sdk + repository: ${{ inputs.extensions-repo }} + ref: ${{ inputs.extensions-version }} + path: extensions - name: Checkout PR repository uses: actions/checkout@v4 @@ -85,7 +85,8 @@ runs: - name: Install OpenHands dependencies shell: bash run: | - uv pip install --system ./software-agent-sdk/openhands-sdk ./software-agent-sdk/openhands-tools lmnr + # Install openhands SDK and tools from PyPI + uv pip install --system openhands-sdk openhands-tools lmnr - name: Check required configuration and select model id: select-model @@ -111,7 +112,7 @@ runs: echo "PR Number: ${{ github.event.pull_request.number }}" echo "PR Title: ${{ github.event.pull_request.title }}" echo "Repository: ${{ github.repository }}" - echo "SDK Version: ${{ inputs.sdk-version }}" + echo "Extensions Version: ${{ inputs.extensions-version }}" echo "Available models: $MODELS_LIST" echo "Selected LLM model: $SELECTED_MODEL" if [ -n "${{ inputs.llm-base-url }}" ]; then @@ -135,7 +136,7 @@ runs: REPO_NAME: ${{ github.repository }} run: | cd pr-repo - uv run python ../software-agent-sdk/examples/03_github_workflows/02_pr_review/agent_script.py + uv run python ../extensions/plugins/pr-review/scripts/agent_script.py - name: Upload logs as artifact uses: actions/upload-artifact@v4 diff --git a/examples/03_github_workflows/02_pr_review/README.md b/examples/03_github_workflows/02_pr_review/README.md index ff4f864927..77edf2b6e9 100644 --- a/examples/03_github_workflows/02_pr_review/README.md +++ b/examples/03_github_workflows/02_pr_review/README.md @@ -2,12 +2,12 @@ This example demonstrates how to set up a GitHub Actions workflow for automated pull request reviews using the OpenHands agent SDK. When a PR is labeled with `review-this` or when openhands-agent is added as a reviewer, OpenHands will analyze the changes and provide detailed, constructive feedback. +**Note**: The actual review scripts now live in the [OpenHands/extensions](https://github.com/OpenHands/extensions/tree/main/plugins/pr-review) repository. This directory contains the GitHub Action that references those scripts. + ## Files -- **`action.yml`**: Symlink to the composite GitHub Action (`.github/actions/pr-review/action.yml`) +- **`action.yml`**: Composite GitHub Action that loads scripts from the extensions repository - **`workflow.yml`**: Example GitHub Actions workflow file that uses the composite action -- **`agent_script.py`**: Python script that runs the OpenHands agent for PR review -- **`prompt.py`**: The prompt asking the agent to write the PR review - **`evaluate_review.py`**: Script to evaluate review effectiveness when PR is closed - **`README.md`**: This documentation file @@ -40,7 +40,7 @@ This example demonstrates how to set up a GitHub Actions workflow for automated - Potential issues and security concerns - Specific improvement suggestions - **GitHub API Integration**: Uses the GitHub API to post inline review comments directly on specific lines of code -- **Version Control**: Use `sdk-version` to pin to a specific version tag or branch +- **Version Control**: Use `extensions-version` to pin to a specific version tag or branch of the extensions repository ## Setup @@ -75,10 +75,10 @@ Edit `.github/workflows/pr-review-by-openhands.yml` to customize the inputs: llm-base-url: '' # Review style: roasted (other option: standard) review-style: roasted - # SDK git ref to use (tag, branch, or commit SHA, e.g., 'v1.0.0', 'main', or 'abc1234') - sdk-version: main - # Optional: override the SDK repo (owner/repo) if you forked it - sdk-repo: OpenHands/software-agent-sdk + # Extensions git ref to use (tag, branch, or commit SHA, e.g., 'v1.0.0', 'main', or 'abc1234') + extensions-version: main + # Optional: override the extensions repo (owner/repo) if you forked it + extensions-repo: OpenHands/extensions # Secrets llm-api-key: ${{ secrets.LLM_API_KEY }} github-token: ${{ secrets.GITHUB_TOKEN }} @@ -174,11 +174,11 @@ See the [software-agent-sdk's own code-review skill](https://github.com/OpenHand ## Composite Action -This workflow uses a reusable composite action located at `.github/actions/pr-review/action.yml` in the software-agent-sdk repository. The composite action handles: +This workflow uses a reusable composite action located in this directory (`action.yml`). The composite action handles: -- Checking out the SDK at the specified version +- Checking out the extensions repository at the specified version - Setting up Python and dependencies -- Running the PR review agent +- Running the PR review agent (from extensions repo) - Uploading logs as artifacts ### Action Inputs @@ -188,8 +188,8 @@ This workflow uses a reusable composite action located at `.github/actions/pr-re | `llm-model` | LLM model(s) - can be comma-separated for A/B testing | No | `anthropic/claude-sonnet-4-5-20250929` | | `llm-base-url` | LLM base URL (optional) | No | `''` | | `review-style` | Review style: 'standard' or 'roasted' | No | `roasted` | -| `sdk-version` | Git ref for SDK (tag, branch, or commit SHA) | No | `main` | -| `sdk-repo` | SDK repository (owner/repo) | No | `OpenHands/software-agent-sdk` | +| `extensions-version` | Git ref for extensions (tag, branch, or commit SHA) | No | `main` | +| `extensions-repo` | Extensions repository (owner/repo) | No | `OpenHands/extensions` | | `llm-api-key` | LLM API key | Yes | - | | `github-token` | GitHub token for API access | Yes | - | | `lmnr-api-key` | Laminar API key for observability (optional) | No | - | diff --git a/examples/03_github_workflows/02_pr_review/agent_script.py b/examples/03_github_workflows/02_pr_review/agent_script.py deleted file mode 100644 index 7ee67dc46b..0000000000 --- a/examples/03_github_workflows/02_pr_review/agent_script.py +++ /dev/null @@ -1,903 +0,0 @@ -#!/usr/bin/env python3 -""" -Example: PR Review Agent - -This script runs OpenHands agent to review a pull request and provide -fine-grained review comments. The agent has full repository access and uses -bash commands to analyze changes in context and post detailed review feedback -directly via `gh` or the GitHub API. - -This example demonstrates how to use skills for code review: -- `/codereview` - Standard code review skill -- `/codereview-roasted` - Linus Torvalds style brutally honest review - -The agent posts inline review comments on specific lines of code using the -GitHub API, rather than posting one giant comment under the PR. - -The agent also considers previous review context including: -- Existing review comments and their resolution status -- Previous review decisions (APPROVED, CHANGES_REQUESTED, etc.) -- Review threads (resolved and unresolved) - -Designed for use with GitHub Actions workflows triggered by PR labels. - -Environment Variables: - LLM_API_KEY: API key for the LLM (required) - LLM_MODEL: Language model to use (default: anthropic/claude-sonnet-4-5-20250929) - LLM_BASE_URL: Optional base URL for LLM API - GITHUB_TOKEN: GitHub token for API access (required) - PR_NUMBER: Pull request number (required) - PR_TITLE: Pull request title (required) - PR_BODY: Pull request body (optional) - PR_BASE_BRANCH: Base branch name (required) - PR_HEAD_BRANCH: Head branch name (required) - REPO_NAME: Repository name in format owner/repo (required) - REVIEW_STYLE: Review style ('standard' or 'roasted', default: 'standard') - -For setup instructions, usage examples, and GitHub Actions integration, -see README.md in this directory. -""" - -from __future__ import annotations - -import json -import os -import sys -import time -import urllib.error -import urllib.request -from collections.abc import Callable -from pathlib import Path -from typing import Any - -from lmnr import Laminar - -from openhands.sdk import LLM, Agent, AgentContext, Conversation, get_logger -from openhands.sdk.context.skills import load_project_skills -from openhands.sdk.conversation import get_agent_final_response -from openhands.sdk.git.utils import run_git_command -from openhands.tools.preset.default import get_default_condenser, get_default_tools - - -# Add the script directory to Python path so we can import prompt.py -script_dir = Path(__file__).parent -sys.path.insert(0, str(script_dir)) - -from prompt import format_prompt # noqa: E402 - - -logger = get_logger(__name__) - -# Maximum total diff size -MAX_TOTAL_DIFF = 100000 -# Maximum size for review context to avoid overwhelming the prompt -# Keeps context under ~7500 tokens (assuming ~4 chars/token average) -MAX_REVIEW_CONTEXT = 30000 -# Maximum time (seconds) for GraphQL pagination to prevent hanging on slow APIs -MAX_PAGINATION_TIME = 120 - - -def _get_required_env(name: str) -> str: - value = os.getenv(name) - if not value: - raise ValueError(f"{name} environment variable is required") - return value - - -def _call_github_api( - url: str, - method: str = "GET", - data: dict[str, Any] | None = None, - accept: str = "application/vnd.github+json", -) -> Any: - """Make a GitHub API request (REST or GraphQL). - - This function handles both REST API calls and GraphQL queries (via the /graphql - endpoint). The function name reflects this dual purpose. - - Args: - url: Full API URL or path (will be prefixed with api.github.com if needed) - method: HTTP method (GET, POST, etc.) - data: JSON data to send (for POST/PUT requests, including GraphQL queries) - accept: Accept header value - - Returns: - Parsed JSON response or raw text for diff requests - """ - token = _get_required_env("GITHUB_TOKEN") - - if not url.startswith("http"): - url = f"https://api.github.com{url}" - - request = urllib.request.Request(url, method=method) - request.add_header("Accept", accept) - request.add_header("Authorization", f"Bearer {token}") - request.add_header("X-GitHub-Api-Version", "2022-11-28") - - if data: - request.add_header("Content-Type", "application/json") - request.data = json.dumps(data).encode("utf-8") - - try: - with urllib.request.urlopen(request, timeout=60) as response: - raw_data = response.read() - if "diff" in accept: - return raw_data.decode("utf-8", errors="replace") - return json.loads(raw_data.decode("utf-8")) - except urllib.error.HTTPError as e: - details = (e.read() or b"").decode("utf-8", errors="replace").strip() - raise RuntimeError( - f"GitHub API request failed: HTTP {e.code} {e.reason}. {details}" - ) from e - except urllib.error.URLError as e: - raise RuntimeError(f"GitHub API request failed: {e.reason}") from e - except json.JSONDecodeError as e: - raise RuntimeError(f"GitHub API returned invalid JSON: {e}") from e - - -def get_pr_reviews(pr_number: str, max_reviews: int = 100) -> list[dict[str, Any]]: - """Fetch the latest reviews for a PR using GraphQL. - - Uses GraphQL with `last` to fetch the most recent reviews directly, - avoiding the need to paginate through all reviews from oldest to newest. - - Args: - pr_number: The PR number - max_reviews: Maximum number of reviews to return (default: 100) - - Returns a list of review objects containing: - - id: Review ID - - user: Author information - - body: Review body text - - state: APPROVED, CHANGES_REQUESTED, COMMENTED, DISMISSED, PENDING - - submitted_at: When the review was submitted - """ - repo = _get_required_env("REPO_NAME") - owner, repo_name = repo.split("/") - - # Use GraphQL to fetch the latest reviews directly - # `last: N` fetches the N most recent items - query = """ - query( - $owner: String! - $repo: String! - $pr_number: Int! - $count: Int! - $cursor: String - ) { - repository(owner: $owner, name: $repo) { - pullRequest(number: $pr_number) { - reviews(last: $count, before: $cursor) { - pageInfo { - hasPreviousPage - startCursor - } - nodes { - id - author { - login - } - body - state - submittedAt - } - } - } - } - } - """ - - all_reviews: list[dict[str, Any]] = [] - cursor = None - start_time = time.time() - page_count = 0 - - while len(all_reviews) < max_reviews: - # Check for pagination timeout - elapsed = time.time() - start_time - if elapsed > MAX_PAGINATION_TIME: - logger.warning( - f"Reviews pagination timeout after {elapsed:.1f}s, " - f"fetched {len(all_reviews)} reviews across {page_count} pages" - ) - break - - # Fetch up to remaining needed reviews - remaining = max_reviews - len(all_reviews) - fetch_count = min(remaining, 100) # GraphQL max is 100 per request - - variables = { - "owner": owner, - "repo": repo_name, - "pr_number": int(pr_number), - "count": fetch_count, - "cursor": cursor, - } - - result = _call_github_api( - "https://api.github.com/graphql", - method="POST", - data={"query": query, "variables": variables}, - ) - - if "errors" in result: - logger.warning(f"GraphQL errors fetching reviews: {result['errors']}") - break - - pr_data = result.get("data", {}).get("repository", {}).get("pullRequest") - if not pr_data: - break - - reviews_data = pr_data.get("reviews", {}) - nodes = reviews_data.get("nodes", []) - page_count += 1 - - if not nodes: - break - - # Convert GraphQL format to REST-like format for compatibility - for node in nodes: - author = node.get("author") or {} - all_reviews.append( - { - "id": node.get("id"), - "user": {"login": author.get("login", "unknown")}, - "body": node.get("body", ""), - "state": node.get("state", "UNKNOWN"), - "submitted_at": node.get("submittedAt"), - } - ) - - logger.debug( - f"Fetched page {page_count} with {len(nodes)} reviews " - f"(total: {len(all_reviews)})" - ) - - page_info = reviews_data.get("pageInfo", {}) - if not page_info.get("hasPreviousPage"): - break - cursor = page_info.get("startCursor") - - # Reviews are fetched newest-first with `last`, reverse to get chronological order - # (oldest first) for consistent display - return list(reversed(all_reviews)) - - -def get_review_threads_graphql(pr_number: str) -> list[dict[str, Any]]: - """Fetch the latest review threads with resolution status using GraphQL API. - - The REST API doesn't expose thread resolution status, so we use GraphQL. - Uses `last` to fetch the most recent threads first, ensuring we get the - latest discussions rather than the oldest ones. - - Note: This query fetches up to 100 review threads per page, each with up to - 50 comments. For PRs exceeding these limits, older threads/comments may be - omitted. We paginate through threads but not through comments within threads. - - Returns a list of thread objects containing: - - id: Thread ID - - isResolved: Whether the thread is resolved - - isOutdated: Whether the thread is outdated (code changed) - - path: File path - - line: Line number - - comments: List of comments in the thread (up to 50 per thread) - """ - repo = _get_required_env("REPO_NAME") - owner, repo_name = repo.split("/") - - # Use `last` to fetch the most recent threads first - # `before: $cursor` paginates backwards through older threads - query = """ - query($owner: String!, $repo: String!, $pr_number: Int!, $cursor: String) { - repository(owner: $owner, name: $repo) { - pullRequest(number: $pr_number) { - reviewThreads(last: 100, before: $cursor) { - pageInfo { - hasPreviousPage - startCursor - } - nodes { - id - isResolved - isOutdated - path - line - comments(first: 50) { - nodes { - id - author { - login - } - body - createdAt - } - } - } - } - } - } - } - """ - - threads: list[dict[str, Any]] = [] - cursor = None - start_time = time.time() - page_count = 0 - has_more_pages = False - - while True: - # Check for overall pagination timeout - elapsed = time.time() - start_time - if elapsed > MAX_PAGINATION_TIME: - logger.warning( - f"GraphQL pagination timeout after {elapsed:.1f}s, " - f"fetched {len(threads)} threads across {page_count} pages" - ) - break - - variables = { - "owner": owner, - "repo": repo_name, - "pr_number": int(pr_number), - "cursor": cursor, - } - - result = _call_github_api( - "https://api.github.com/graphql", - method="POST", - data={"query": query, "variables": variables}, - ) - - if "errors" in result: - logger.warning(f"GraphQL errors: {result['errors']}") - break - - pr_data = result.get("data", {}).get("repository", {}).get("pullRequest") - if not pr_data: - break - - review_threads = pr_data.get("reviewThreads", {}) - nodes = review_threads.get("nodes", []) - threads.extend(nodes) - page_count += 1 - - logger.debug( - f"Fetched page {page_count} with {len(nodes)} threads " - f"(total: {len(threads)})" - ) - - page_info = review_threads.get("pageInfo", {}) - has_more_pages = page_info.get("hasPreviousPage", False) - if not has_more_pages: - break - cursor = page_info.get("startCursor") - - # Warn only if there are actually more pages we didn't fetch - if has_more_pages: - logger.warning( - f"Review threads limited to {len(threads)} threads. " - "Some threads may be omitted for PRs with extensive review history." - ) - - # Threads are fetched newest-first with `last`, reverse to get chronological order - return list(reversed(threads)) - - -def format_review_context( - reviews: list[dict[str, Any]], - threads: list[dict[str, Any]], - max_size: int = MAX_REVIEW_CONTEXT, -) -> str: - """Format review history into a context string for the agent. - - Args: - reviews: List of review objects from get_pr_reviews() - threads: List of thread objects from get_review_threads_graphql() - max_size: Maximum size of the formatted context - - Returns: - Formatted markdown string with review history - """ - if not reviews and not threads: - return "" - - sections: list[str] = [] - current_size = 0 - - def _add_section(section: str) -> bool: - """Add a section if it fits within max_size. Returns True if added.""" - nonlocal current_size - section_size = len(section) + 1 # +1 for newline separator - if current_size + section_size > max_size: - return False - sections.append(section) - current_size += section_size - return True - - # Format reviews (high-level review decisions) - if reviews: - review_lines: list[str] = ["### Previous Reviews\n"] - for review in reviews: - user_data = review.get("user") or {} - user = user_data.get("login", "unknown") - state = review.get("state") or "UNKNOWN" - body = (review.get("body") or "").strip() - - # Map state to emoji for visual clarity - state_emoji = { - "APPROVED": "✅", - "CHANGES_REQUESTED": "🔴", - "COMMENTED": "💬", - "DISMISSED": "❌", - "PENDING": "⏳", - }.get(state, "❓") - - review_lines.append(f"- {state_emoji} **{user}** ({state})") - if body: - # Indent the body and truncate if too long - body_preview = body[:500] + "..." if len(body) > 500 else body - indented = "\n".join(f" > {line}" for line in body_preview.split("\n")) - review_lines.append(indented) - review_lines.append("") - - review_section = "\n".join(review_lines) - if not _add_section(review_section): - # Even reviews section doesn't fit, return truncation message - return ( - f"... [review context truncated, " - f"content exceeds {max_size:,} chars] ..." - ) - - # Format review threads with resolution status - if threads: - resolved_threads = [t for t in threads if t.get("isResolved")] - unresolved_threads = [t for t in threads if not t.get("isResolved")] - - # Unresolved threads (higher priority) - if unresolved_threads: - header = ( - "### Unresolved Review Threads\n\n" - "*These threads have not been resolved and may need attention:*\n" - ) - if not _add_section(header): - count = len(unresolved_threads) - sections.append( - f"\n... [truncated, {count} unresolved threads omitted] ..." - ) - else: - threads_added = 0 - for thread in unresolved_threads: - thread_lines = _format_thread(thread) - thread_section = "\n".join(thread_lines) - if not _add_section(thread_section): - remaining = len(unresolved_threads) - threads_added - sections.append( - f"\n... [truncated, {remaining} unresolved " - "threads omitted] ..." - ) - break - threads_added += 1 - - # Resolved threads (lower priority, add if space remains) - if resolved_threads and current_size < max_size: - header = ( - "### Resolved Review Threads\n\n" - "*These threads have been resolved but provide context:*\n" - ) - if _add_section(header): - threads_added = 0 - for thread in resolved_threads: - thread_lines = _format_thread(thread) - thread_section = "\n".join(thread_lines) - if not _add_section(thread_section): - remaining = len(resolved_threads) - threads_added - sections.append( - f"\n... [truncated, {remaining} resolved " - "threads omitted] ..." - ) - break - threads_added += 1 - - return "\n".join(sections) - - -def _format_thread(thread: dict[str, Any]) -> list[str]: - """Format a single review thread. - - Args: - thread: Thread object from GraphQL - - Returns: - List of formatted lines - """ - lines: list[str] = [] - path = thread.get("path", "unknown") - line_num = thread.get("line") - is_outdated = thread.get("isOutdated", False) - is_resolved = thread.get("isResolved", False) - - # Thread header - status = "✅ RESOLVED" if is_resolved else "⚠️ UNRESOLVED" - outdated = " (outdated)" if is_outdated else "" - location = f"{path}" - if line_num: - location += f":{line_num}" - - lines.append(f"**{location}**{outdated} - {status}") - - # Thread comments - comments_data = thread.get("comments") or {} - comments = comments_data.get("nodes") or [] - for comment in comments: - author_data = comment.get("author") or {} - author = author_data.get("login", "unknown") - body = (comment.get("body") or "").strip() - if body: - # Truncate individual comments if too long - body_preview = body[:300] + "..." if len(body) > 300 else body - indented = "\n".join(f" > {line}" for line in body_preview.split("\n")) - lines.append(f" - **{author}**:") - lines.append(indented) - - lines.append("") - return lines - - -def _fetch_with_fallback( - name: str, fetch_fn: Callable[[], list[dict[str, Any]]] -) -> list[dict[str, Any]]: - """Fetch data with error handling and logging. - - Args: - name: Name of the data being fetched (for logging) - fetch_fn: Function to call to fetch the data - - Returns: - Fetched data or empty list on error - """ - try: - data = fetch_fn() - logger.info(f"Fetched {len(data)} {name}") - return data - except Exception as e: - logger.warning(f"Failed to fetch {name}: {e}") - return [] - - -def get_pr_review_context(pr_number: str) -> str: - """Get all review context for a PR. - - Fetches reviews and review threads, then formats them into a context string. - - Args: - pr_number: The PR number - - Returns: - Formatted review context string, or empty string if no context - """ - reviews = _fetch_with_fallback("reviews", lambda: get_pr_reviews(pr_number)) - threads = _fetch_with_fallback( - "review threads", lambda: get_review_threads_graphql(pr_number) - ) - - return format_review_context(reviews, threads) - - -def get_pr_diff_via_github_api(pr_number: str) -> str: - """Fetch the PR diff exactly as GitHub renders it. - - Uses the GitHub REST API "Get a pull request" endpoint with an `Accept` - header requesting diff output. - - This avoids depending on local git refs (often stale/missing in - `pull_request_target` checkouts). - """ - - repo = _get_required_env("REPO_NAME") - token = _get_required_env("GITHUB_TOKEN") - - url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}" - request = urllib.request.Request(url) - request.add_header("Accept", "application/vnd.github.v3.diff") - request.add_header("Authorization", f"Bearer {token}") - request.add_header("X-GitHub-Api-Version", "2022-11-28") - - try: - with urllib.request.urlopen(request, timeout=60) as response: - data = response.read() - except urllib.error.HTTPError as e: - details = (e.read() or b"").decode("utf-8", errors="replace").strip() - raise RuntimeError( - f"GitHub diff API request failed: HTTP {e.code} {e.reason}. {details}" - ) from e - except urllib.error.URLError as e: - raise RuntimeError(f"GitHub diff API request failed: {e.reason}") from e - - return data.decode("utf-8", errors="replace") - - -def truncate_text(diff_text: str, max_total: int = MAX_TOTAL_DIFF) -> str: - if len(diff_text) <= max_total: - return diff_text - - total_chars = len(diff_text) - return ( - diff_text[:max_total] - + f"\n\n... [total diff truncated, {total_chars:,} chars total, " - + f"showing first {max_total:,}] ..." - ) - - -def get_truncated_pr_diff() -> str: - """Get the PR diff with truncation. - - This uses GitHub as the source of truth so the review matches the PR's - "Files changed" view. - """ - - pr_number = _get_required_env("PR_NUMBER") - diff_text = get_pr_diff_via_github_api(pr_number) - return truncate_text(diff_text) - - -def get_head_commit_sha(repo_dir: Path | None = None) -> str: - """ - Get the SHA of the HEAD commit. - - Args: - repo_dir: Path to the repository (defaults to cwd) - - Returns: - The commit SHA - """ - if repo_dir is None: - repo_dir = Path.cwd() - - return run_git_command(["git", "rev-parse", "HEAD"], repo_dir).strip() - - -def main(): - """Run the PR review agent.""" - logger.info("Starting PR review process...") - - # Validate required environment variables - required_vars = [ - "LLM_API_KEY", - "GITHUB_TOKEN", - "PR_NUMBER", - "PR_TITLE", - "PR_BASE_BRANCH", - "PR_HEAD_BRANCH", - "REPO_NAME", - ] - - missing_vars = [var for var in required_vars if not os.getenv(var)] - if missing_vars: - logger.error(f"Missing required environment variables: {missing_vars}") - sys.exit(1) - - github_token = os.getenv("GITHUB_TOKEN") - - # Get PR information - pr_info = { - "number": os.getenv("PR_NUMBER"), - "title": os.getenv("PR_TITLE"), - "body": os.getenv("PR_BODY", ""), - "repo_name": os.getenv("REPO_NAME"), - "base_branch": os.getenv("PR_BASE_BRANCH"), - "head_branch": os.getenv("PR_HEAD_BRANCH"), - } - - # Get review style - default to standard - review_style = os.getenv("REVIEW_STYLE", "standard").lower() - if review_style not in ("standard", "roasted"): - logger.warning(f"Unknown REVIEW_STYLE '{review_style}', using 'standard'") - review_style = "standard" - - logger.info(f"Reviewing PR #{pr_info['number']}: {pr_info['title']}") - logger.info(f"Review style: {review_style}") - - try: - pr_diff = get_truncated_pr_diff() - logger.info(f"Got PR diff with {len(pr_diff)} characters") - - # Get the HEAD commit SHA for inline comments - commit_id = get_head_commit_sha() - logger.info(f"HEAD commit SHA: {commit_id}") - - # Fetch previous review context (comments, threads, resolution status) - pr_number = pr_info.get("number", "") - review_context = get_pr_review_context(pr_number) - if review_context: - logger.info(f"Got review context with {len(review_context)} characters") - else: - logger.info("No previous review context found") - - # Create the review prompt using the template - # Include the skill trigger keyword to activate the appropriate skill - skill_trigger = ( - "/codereview" if review_style == "standard" else "/codereview-roasted" - ) - prompt = format_prompt( - skill_trigger=skill_trigger, - title=pr_info.get("title", "N/A"), - body=pr_info.get("body") or "No description provided", - repo_name=pr_info.get("repo_name", "N/A"), - base_branch=pr_info.get("base_branch", "main"), - head_branch=pr_info.get("head_branch", "N/A"), - pr_number=pr_number, - commit_id=commit_id, - diff=pr_diff, - review_context=review_context, - ) - - # Configure LLM - api_key = os.getenv("LLM_API_KEY") - model = os.getenv("LLM_MODEL", "anthropic/claude-sonnet-4-5-20250929") - base_url = os.getenv("LLM_BASE_URL") - - llm_config = { - "model": model, - "api_key": api_key, - "usage_id": "pr_review_agent", - "drop_params": True, - } - - if base_url: - llm_config["base_url"] = base_url - - llm = LLM(**llm_config) - - # Get the current working directory as workspace - cwd = os.getcwd() - - # Load project-specific skills from the repository being reviewed - # This includes AGENTS.md, .cursorrules, and skills from .agents/skills/ - project_skills = load_project_skills(cwd) - logger.info( - f"Loaded {len(project_skills)} project skills: " - f"{[s.name for s in project_skills]}" - ) - - # Create AgentContext with public skills enabled and project skills - # Public skills from https://github.com/OpenHands/extensions include: - # - /codereview: Standard code review skill - # - /codereview-roasted: Linus Torvalds style brutally honest review - # Project skills include repo-specific guidance (AGENTS.md, etc.) - agent_context = AgentContext( - load_public_skills=True, - skills=project_skills, - ) - - # Create agent with default tools and agent context - # Note: agent_context must be passed at initialization since Agent is frozen - agent = Agent( - llm=llm, - tools=get_default_tools(enable_browser=False), # CLI mode - no browser - agent_context=agent_context, - system_prompt_kwargs={"cli_mode": True}, - condenser=get_default_condenser( - llm=llm.model_copy(update={"usage_id": "condenser"}) - ), - ) - - # Create conversation with secrets for masking - # These secrets will be masked in agent output to prevent accidental exposure - secrets = {} - if api_key: - secrets["LLM_API_KEY"] = api_key - if github_token: - secrets["GITHUB_TOKEN"] = github_token - - conversation = Conversation( - agent=agent, - workspace=cwd, - secrets=secrets, - ) - - logger.info("Starting PR review analysis...") - logger.info("Agent received the PR diff in the initial message") - logger.info(f"Using skill trigger: {skill_trigger}") - logger.info("Agent will post inline review comments directly via GitHub API") - - # Send the prompt and run the agent - # The agent will analyze the code and post inline review comments - # directly to the PR using the GitHub API - conversation.send_message(prompt) - conversation.run() - - # The agent should have posted review comments via GitHub API - # Log the final response for debugging purposes - review_content = get_agent_final_response(conversation.state.events) - if review_content: - logger.info(f"Agent final response: {len(review_content)} characters") - - # Print cost information for CI output - metrics = conversation.conversation_stats.get_combined_metrics() - print("\n=== PR Review Cost Summary ===") - print(f"Total Cost: ${metrics.accumulated_cost:.6f}") - if metrics.accumulated_token_usage: - token_usage = metrics.accumulated_token_usage - print(f"Prompt Tokens: {token_usage.prompt_tokens}") - print(f"Completion Tokens: {token_usage.completion_tokens}") - if token_usage.cache_read_tokens > 0: - print(f"Cache Read Tokens: {token_usage.cache_read_tokens}") - if token_usage.cache_write_tokens > 0: - print(f"Cache Write Tokens: {token_usage.cache_write_tokens}") - - # Capture and store trace context for delayed evaluation - # When the PR is merged/closed, we can use this context to add the - # evaluation span to the same trace, enabling signals to analyze both - # the original review and evaluation together. - # Note: Laminar methods gracefully handle the uninitialized case by - # returning None or early-returning, so no try/except needed. - trace_id = Laminar.get_trace_id() - # Use model_dump(mode='json') to ensure UUIDs are serialized as strings - # for JSON compatibility. get_laminar_span_context_dict() returns UUID - # objects which are not JSON serializable. - laminar_span_context = Laminar.get_laminar_span_context() - span_context = ( - laminar_span_context.model_dump(mode="json") - if laminar_span_context - else None - ) - - if trace_id and laminar_span_context: - # Set trace metadata within an active span context - # Using start_as_current_span with parent_span_context to continue the trace - with Laminar.start_as_current_span( - name="pr-review-metadata", - parent_span_context=laminar_span_context, - ) as _: - # Set trace metadata within this active span context - # Include model for A/B testing analysis - pr_url = f"https://github.com/{pr_info['repo_name']}/pull/{pr_info['number']}" - Laminar.set_trace_metadata( - { - "pr_number": pr_info["number"], - "repo_name": pr_info["repo_name"], - "pr_url": pr_url, - "workflow_phase": "review", - "review_style": review_style, - "model": model, - } - ) - - # Store trace context in file for GitHub artifact upload - # This allows the evaluation workflow to add its span to this trace - # The span_context includes trace_id, span_id, and span_path needed - # to continue the trace across separate workflow runs. - trace_data = { - "trace_id": str(trace_id), - "span_context": span_context, - "pr_number": pr_info["number"], - "repo_name": pr_info["repo_name"], - "commit_id": commit_id, - "review_style": review_style, - "model": model, - } - with open("laminar_trace_info.json", "w") as f: - json.dump(trace_data, f, indent=2) - logger.info(f"Laminar trace ID: {trace_id}") - logger.info(f"Model used: {model}") - if span_context: - logger.info("Laminar span context captured for trace continuation") - print("\n=== Laminar Trace ===") - print(f"Trace ID: {trace_id}") - - # Ensure trace is flushed to Laminar before workflow ends - Laminar.flush() - else: - logger.warning( - "No Laminar trace ID found - observability may not be enabled" - ) - - logger.info("PR review completed successfully") - - except Exception as e: - logger.error(f"PR review failed: {e}") - sys.exit(1) - - -if __name__ == "__main__": - main() diff --git a/examples/03_github_workflows/02_pr_review/prompt.py b/examples/03_github_workflows/02_pr_review/prompt.py deleted file mode 100644 index 31cbd2dd1c..0000000000 --- a/examples/03_github_workflows/02_pr_review/prompt.py +++ /dev/null @@ -1,112 +0,0 @@ -""" -PR Review Prompt Template - -This module contains the prompt template used by the OpenHands agent -for conducting pull request reviews. - -The template uses skill triggers: -- {skill_trigger} will be replaced with '/codereview' or '/codereview-roasted' -- /github-pr-review provides instructions for posting review comments via GitHub API - -The template includes: -- {diff} - The complete git diff for the PR (may be truncated for large files) -- {pr_number} - The PR number -- {commit_id} - The HEAD commit SHA -- {review_context} - Previous review comments and thread resolution status -""" - -# Template for when there is review context available -_REVIEW_CONTEXT_SECTION = """ -## Previous Review History - -The following shows previous reviews and review threads on this PR. Pay attention to: -- **Unresolved threads**: These issues may still need to be addressed -- **Resolved threads**: These provide context on what was already discussed -- **Previous review decisions**: See what other reviewers have said - -{review_context} - -When reviewing, consider: -1. Don't repeat comments that have already been made and are still relevant -2. If an issue is still unresolved in the code, you may reference it -3. If resolved, don't bring it up unless the fix introduced new problems -4. Focus on NEW issues in the current diff that haven't been discussed yet -""" - -PROMPT = """{skill_trigger} -/github-pr-review - -When posting a review, keep the review body brief unless your active review -instructions require a longer structured format. - -Review the PR changes below and identify issues that need to be addressed. - -## Pull Request Information -- **Title**: {title} -- **Description**: {body} -- **Repository**: {repo_name} -- **Base Branch**: {base_branch} -- **Head Branch**: {head_branch} -- **PR Number**: {pr_number} -- **Commit ID**: {commit_id} -{review_context_section} -## Git Diff - -```diff -{diff} -``` - -Analyze the changes and post your review using the GitHub API. -""" - - -def format_prompt( - skill_trigger: str, - title: str, - body: str, - repo_name: str, - base_branch: str, - head_branch: str, - pr_number: str, - commit_id: str, - diff: str, - review_context: str = "", -) -> str: - """Format the PR review prompt with all parameters. - - Args: - skill_trigger: The skill trigger (e.g., '/codereview' or '/codereview-roasted') - title: PR title - body: PR description - repo_name: Repository name (owner/repo) - base_branch: Base branch name - head_branch: Head branch name - pr_number: PR number - commit_id: HEAD commit SHA - diff: Git diff content - review_context: Formatted previous review context. If empty or whitespace-only, - the review context section is omitted from the prompt. - - Returns: - Formatted prompt string - """ - # Only include the review context section if there is actual context - if review_context and review_context.strip(): - review_context_section = _REVIEW_CONTEXT_SECTION.format( - review_context=review_context - ) - else: - review_context_section = "" - - return PROMPT.format( - skill_trigger=skill_trigger, - title=title, - body=body, - repo_name=repo_name, - base_branch=base_branch, - head_branch=head_branch, - pr_number=pr_number, - commit_id=commit_id, - review_context_section=review_context_section, - diff=diff, - ) diff --git a/examples/03_github_workflows/02_pr_review/workflow.yml b/examples/03_github_workflows/02_pr_review/workflow.yml index aab54c8c53..944bb04e9f 100644 --- a/examples/03_github_workflows/02_pr_review/workflow.yml +++ b/examples/03_github_workflows/02_pr_review/workflow.yml @@ -49,8 +49,8 @@ jobs: llm-base-url: '' # Review style: roasted (other option: standard) review-style: roasted - # SDK version to use (version tag or branch name) - sdk-version: main + # Extensions version to use (version tag or branch name) + extensions-version: main # Secrets llm-api-key: ${{ secrets.LLM_API_KEY }} github-token: ${{ secrets.GITHUB_TOKEN }} From 8a8e28b00850dc3e8522594a977dc8f24d957156 Mon Sep 17 00:00:00 2001 From: openhands Date: Mon, 9 Mar 2026 23:21:27 +0000 Subject: [PATCH 2/3] Remove pr-review action and inline logic into workflows --- .github/actions/pr-review/action.yml | 158 ------------------ .github/workflows/pr-review-by-openhands.yml | 91 ++++++++-- .../02_pr_review/README.md | 80 ++++----- .../02_pr_review/action.yml | 1 - .../02_pr_review/workflow.yml | 82 +++++++-- 5 files changed, 178 insertions(+), 234 deletions(-) delete mode 100644 .github/actions/pr-review/action.yml delete mode 120000 examples/03_github_workflows/02_pr_review/action.yml diff --git a/.github/actions/pr-review/action.yml b/.github/actions/pr-review/action.yml deleted file mode 100644 index 8b584a6b80..0000000000 --- a/.github/actions/pr-review/action.yml +++ /dev/null @@ -1,158 +0,0 @@ ---- -name: OpenHands PR Review -description: Automated PR review using OpenHands agent -author: OpenHands - -branding: - icon: code - color: blue - -inputs: - llm-model: - description: > - LLM model to use for the review. Can be a comma-separated list - for A/B testing - one model will be randomly selected per review. - Example: 'model-a' or 'model-a,model-b,model-c' - required: false - default: anthropic/claude-sonnet-4-5-20250929 - llm-base-url: - description: LLM base URL (optional, for custom LLM endpoints) - required: false - default: '' - review-style: - description: "Review style: 'standard' (balanced review covering style, readability, and security) or 'roasted' (Linus Torvalds-style brutally honest - feedback focusing on data structures, simplicity, and pragmatism)" - required: false - default: roasted - extensions-repo: - description: GitHub repository for extensions (owner/repo) - required: false - default: OpenHands/extensions - extensions-version: - description: Git ref to use for extensions (tag, branch, or commit SHA, e.g., v1.0.0, main, or abc1234) - required: false - default: main - llm-api-key: - description: LLM API key (required) - required: true - github-token: - description: GitHub token for API access (required) - required: true - lmnr-api-key: - description: Laminar API key for observability (optional) - required: false - default: '' - -runs: - using: composite - steps: - - name: Checkout extensions repository - uses: actions/checkout@v4 - with: - repository: ${{ inputs.extensions-repo }} - ref: ${{ inputs.extensions-version }} - path: extensions - - - name: Checkout PR repository - uses: actions/checkout@v4 - with: - repository: ${{ github.event.pull_request.head.repo.full_name }} - ref: ${{ github.event.pull_request.head.ref }} - fetch-depth: 0 - persist-credentials: false - path: pr-repo - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.12' - - # Security: this workflow executes untrusted PR content (diff/title/body) via an - # LLM-powered reviewer agent that can run Bash. GitHub Actions caches are shared - # across workflows within a repository and can enable cache-poisoning pivots into - # more-privileged workflows. Keep caching disabled here. - - name: Install uv - uses: astral-sh/setup-uv@v6 - with: - enable-cache: false - - - name: Install GitHub CLI - shell: bash - run: | - sudo apt-get update - sudo apt-get install -y gh - - - name: Install OpenHands dependencies - shell: bash - run: | - # Install openhands SDK and tools from PyPI - uv pip install --system openhands-sdk openhands-tools lmnr - - - name: Check required configuration and select model - id: select-model - shell: bash - env: - LLM_API_KEY: ${{ inputs.llm-api-key }} - GITHUB_TOKEN: ${{ inputs.github-token }} - run: | - if [ -z "$LLM_API_KEY" ]; then - echo "Error: llm-api-key is required." - exit 1 - fi - if [ -z "$GITHUB_TOKEN" ]; then - echo "Error: github-token is required." - exit 1 - fi - - # Select one model randomly from the comma-separated list - MODELS_LIST="${{ inputs.llm-model }}" - SELECTED_MODEL=$(echo "$MODELS_LIST" | tr ',' '\n' | shuf -n 1 | xargs) - echo "selected_model=$SELECTED_MODEL" >> $GITHUB_OUTPUT - - echo "PR Number: ${{ github.event.pull_request.number }}" - echo "PR Title: ${{ github.event.pull_request.title }}" - echo "Repository: ${{ github.repository }}" - echo "Extensions Version: ${{ inputs.extensions-version }}" - echo "Available models: $MODELS_LIST" - echo "Selected LLM model: $SELECTED_MODEL" - if [ -n "${{ inputs.llm-base-url }}" ]; then - echo "LLM base URL: ${{ inputs.llm-base-url }}" - fi - - - name: Run PR review - shell: bash - env: - LLM_MODEL: ${{ steps.select-model.outputs.selected_model }} - LLM_BASE_URL: ${{ inputs.llm-base-url }} - REVIEW_STYLE: ${{ inputs.review-style }} - LLM_API_KEY: ${{ inputs.llm-api-key }} - GITHUB_TOKEN: ${{ inputs.github-token }} - LMNR_PROJECT_API_KEY: ${{ inputs.lmnr-api-key }} - PR_NUMBER: ${{ github.event.pull_request.number }} - PR_TITLE: ${{ github.event.pull_request.title }} - PR_BODY: ${{ github.event.pull_request.body }} - PR_BASE_BRANCH: ${{ github.event.pull_request.base.ref }} - PR_HEAD_BRANCH: ${{ github.event.pull_request.head.ref }} - REPO_NAME: ${{ github.repository }} - run: | - cd pr-repo - uv run python ../extensions/plugins/pr-review/scripts/agent_script.py - - - name: Upload logs as artifact - uses: actions/upload-artifact@v4 - if: always() - with: - name: openhands-pr-review-logs - path: | - *.log - output/ - retention-days: 7 - - - name: Upload Laminar trace info for evaluation - uses: actions/upload-artifact@v4 - if: success() - with: - name: pr-review-trace-${{ github.event.pull_request.number }} - path: pr-repo/laminar_trace_info.json - retention-days: 30 - if-no-files-found: ignore diff --git a/.github/workflows/pr-review-by-openhands.yml b/.github/workflows/pr-review-by-openhands.yml index ec8dc3786c..668dc695b1 100644 --- a/.github/workflows/pr-review-by-openhands.yml +++ b/.github/workflows/pr-review-by-openhands.yml @@ -37,17 +37,82 @@ jobs: cancel-in-progress: true runs-on: ubuntu-24.04 steps: - - name: Run PR Review - uses: OpenHands/software-agent-sdk/.github/actions/pr-review@main + - name: Checkout extensions repository + uses: actions/checkout@v4 with: - # LLM model(s) to use. Can be comma-separated for A/B testing - # - one model will be randomly selected per review - llm-model: litellm_proxy/claude-sonnet-4-5-20250929 - llm-base-url: https://llm-proxy.app.all-hands.dev - # Review style: roasted (other option: standard) - review-style: roasted - # Use the PR's head commit SHA to test SDK changes on the SDK repo itself - sdk-version: ${{ github.event.pull_request.head.sha }} - llm-api-key: ${{ secrets.LLM_API_KEY }} - github-token: ${{ secrets.ALLHANDS_BOT_GITHUB_PAT }} - lmnr-api-key: ${{ secrets.LMNR_SKILLS_API_KEY }} + repository: OpenHands/extensions + ref: main + path: extensions + + - name: Checkout PR repository + uses: actions/checkout@v4 + with: + repository: ${{ github.event.pull_request.head.repo.full_name }} + ref: ${{ github.event.pull_request.head.ref }} + fetch-depth: 0 + persist-credentials: false + path: pr-repo + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + # Security: this workflow executes untrusted PR content (diff/title/body) via an + # LLM-powered reviewer agent that can run Bash. GitHub Actions caches are shared + # across workflows within a repository and can enable cache-poisoning pivots into + # more-privileged workflows. Keep caching disabled here. + - name: Install uv + uses: astral-sh/setup-uv@v6 + with: + enable-cache: false + + - name: Install GitHub CLI + shell: bash + run: | + sudo apt-get update + sudo apt-get install -y gh + + - name: Install OpenHands dependencies + shell: bash + run: | + # Install openhands SDK and tools from PyPI + uv pip install --system openhands-sdk openhands-tools lmnr + + - name: Run PR review + shell: bash + env: + LLM_MODEL: litellm_proxy/claude-sonnet-4-5-20250929 + LLM_BASE_URL: https://llm-proxy.app.all-hands.dev + REVIEW_STYLE: roasted + LLM_API_KEY: ${{ secrets.LLM_API_KEY }} + GITHUB_TOKEN: ${{ secrets.ALLHANDS_BOT_GITHUB_PAT }} + LMNR_PROJECT_API_KEY: ${{ secrets.LMNR_SKILLS_API_KEY }} + PR_NUMBER: ${{ github.event.pull_request.number }} + PR_TITLE: ${{ github.event.pull_request.title }} + PR_BODY: ${{ github.event.pull_request.body }} + PR_BASE_BRANCH: ${{ github.event.pull_request.base.ref }} + PR_HEAD_BRANCH: ${{ github.event.pull_request.head.ref }} + REPO_NAME: ${{ github.repository }} + run: | + cd pr-repo + uv run python ../extensions/plugins/pr-review/scripts/agent_script.py + + - name: Upload logs as artifact + uses: actions/upload-artifact@v4 + if: always() + with: + name: openhands-pr-review-logs + path: | + *.log + output/ + retention-days: 7 + + - name: Upload Laminar trace info for evaluation + uses: actions/upload-artifact@v4 + if: success() + with: + name: pr-review-trace-${{ github.event.pull_request.number }} + path: pr-repo/laminar_trace_info.json + retention-days: 30 + if-no-files-found: ignore diff --git a/examples/03_github_workflows/02_pr_review/README.md b/examples/03_github_workflows/02_pr_review/README.md index 77edf2b6e9..883ec2a4c9 100644 --- a/examples/03_github_workflows/02_pr_review/README.md +++ b/examples/03_github_workflows/02_pr_review/README.md @@ -2,12 +2,11 @@ This example demonstrates how to set up a GitHub Actions workflow for automated pull request reviews using the OpenHands agent SDK. When a PR is labeled with `review-this` or when openhands-agent is added as a reviewer, OpenHands will analyze the changes and provide detailed, constructive feedback. -**Note**: The actual review scripts now live in the [OpenHands/extensions](https://github.com/OpenHands/extensions/tree/main/plugins/pr-review) repository. This directory contains the GitHub Action that references those scripts. +**Note**: The actual review scripts now live in the [OpenHands/extensions](https://github.com/OpenHands/extensions/tree/main/plugins/pr-review) repository. This directory contains an example workflow that references those scripts. ## Files -- **`action.yml`**: Composite GitHub Action that loads scripts from the extensions repository -- **`workflow.yml`**: Example GitHub Actions workflow file that uses the composite action +- **`workflow.yml`**: Example GitHub Actions workflow file that runs the PR review agent - **`evaluate_review.py`**: Script to evaluate review effectiveness when PR is closed - **`README.md`**: This documentation file @@ -63,25 +62,20 @@ Set the following secrets in your GitHub repository settings: ### 3. Customize the workflow (optional) -Edit `.github/workflows/pr-review-by-openhands.yml` to customize the inputs: +Edit `.github/workflows/pr-review-by-openhands.yml` to customize the environment variables: ```yaml -- name: Run PR Review - uses: ./.github/actions/pr-review - with: - # LLM model(s) to use. Can be comma-separated for A/B testing - # - one model will be randomly selected per review - llm-model: anthropic/claude-sonnet-4-5-20250929 - llm-base-url: '' - # Review style: roasted (other option: standard) - review-style: roasted - # Extensions git ref to use (tag, branch, or commit SHA, e.g., 'v1.0.0', 'main', or 'abc1234') - extensions-version: main - # Optional: override the extensions repo (owner/repo) if you forked it - extensions-repo: OpenHands/extensions - # Secrets - llm-api-key: ${{ secrets.LLM_API_KEY }} - github-token: ${{ secrets.GITHUB_TOKEN }} + - name: Run PR review + shell: bash + env: + # Customize these variables as needed + LLM_MODEL: anthropic/claude-3-5-sonnet-20240620 + LLM_BASE_URL: '' + REVIEW_STYLE: roasted + # Secrets + LLM_API_KEY: ${{ secrets.LLM_API_KEY }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + LMNR_PROJECT_API_KEY: ${{ secrets.LMNR_PROJECT_API_KEY }} ``` ### 4. Create the review label @@ -172,27 +166,22 @@ You are a code reviewer for this project. Follow these guidelines: See the [software-agent-sdk's own code-review skill](https://github.com/OpenHands/software-agent-sdk/blob/main/.agents/skills/code-review.md) for a complete example of a custom code review skill. -## Composite Action +## Workflow Configuration -This workflow uses a reusable composite action located in this directory (`action.yml`). The composite action handles: +The workflow is configured using environment variables in the `Run PR review` step. -- Checking out the extensions repository at the specified version -- Setting up Python and dependencies -- Running the PR review agent (from extensions repo) -- Uploading logs as artifacts +### Environment Variables -### Action Inputs +| Variable | Description | Default Example | +|----------|-------------|---------| +| `LLM_MODEL` | LLM model(s) - can be comma-separated for A/B testing | `anthropic/claude-3-5-sonnet-20240620` | +| `LLM_BASE_URL` | LLM base URL (optional) | `''` | +| `REVIEW_STYLE` | Review style: 'standard' or 'roasted' | `roasted` | +| `LLM_API_KEY` | LLM API key | `${{ secrets.LLM_API_KEY }}` | +| `GITHUB_TOKEN` | GitHub token for API access | `${{ secrets.GITHUB_TOKEN }}` | +| `LMNR_PROJECT_API_KEY` | Laminar API key for observability (optional) | `${{ secrets.LMNR_PROJECT_API_KEY }}` | -| Input | Description | Required | Default | -|-------|-------------|----------|---------| -| `llm-model` | LLM model(s) - can be comma-separated for A/B testing | No | `anthropic/claude-sonnet-4-5-20250929` | -| `llm-base-url` | LLM base URL (optional) | No | `''` | -| `review-style` | Review style: 'standard' or 'roasted' | No | `roasted` | -| `extensions-version` | Git ref for extensions (tag, branch, or commit SHA) | No | `main` | -| `extensions-repo` | Extensions repository (owner/repo) | No | `OpenHands/extensions` | -| `llm-api-key` | LLM API key | Yes | - | -| `github-token` | GitHub token for API access | Yes | - | -| `lmnr-api-key` | Laminar API key for observability (optional) | No | - | +To use a specific version of the extensions repository, modify the `Checkout extensions repository` step in the workflow file. ## A/B Testing with Multiple Models @@ -200,16 +189,17 @@ The PR review workflow supports A/B testing different LLM models. When multiple ### Configuration -Specify multiple models as a comma-separated list in the `llm-model` parameter: +Specify multiple models as a comma-separated list in the `LLM_MODEL` environment variable: ```yaml -- name: Run PR Review - uses: ./.github/actions/pr-review - with: - # Multiple models for A/B testing - one will be randomly selected - llm-model: 'litellm_proxy/claude-sonnet-4-5-20250929,litellm_proxy/gpt-4.1-2025-04-14' - llm-api-key: ${{ secrets.LLM_API_KEY }} - github-token: ${{ secrets.GITHUB_TOKEN }} + - name: Run PR review + shell: bash + env: + # Multiple models for A/B testing - one will be randomly selected + LLM_MODEL: 'anthropic/claude-3-5-sonnet-20240620,gpt-4' + LLM_API_KEY: ${{ secrets.LLM_API_KEY }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # ... other variables ``` ### Observability diff --git a/examples/03_github_workflows/02_pr_review/action.yml b/examples/03_github_workflows/02_pr_review/action.yml deleted file mode 120000 index 5be4828323..0000000000 --- a/examples/03_github_workflows/02_pr_review/action.yml +++ /dev/null @@ -1 +0,0 @@ -../../../.github/actions/pr-review/action.yml \ No newline at end of file diff --git a/examples/03_github_workflows/02_pr_review/workflow.yml b/examples/03_github_workflows/02_pr_review/workflow.yml index 944bb04e9f..908d9f4d5c 100644 --- a/examples/03_github_workflows/02_pr_review/workflow.yml +++ b/examples/03_github_workflows/02_pr_review/workflow.yml @@ -32,25 +32,73 @@ jobs: github.event.requested_reviewer.login == 'openhands-agent' runs-on: ubuntu-latest steps: - - name: Checkout for composite action + - name: Checkout extensions repository uses: actions/checkout@v4 with: - repository: OpenHands/software-agent-sdk - # Use a specific version tag or branch (e.g., 'v1.0.0' or 'main') + repository: OpenHands/extensions ref: main - sparse-checkout: .github/actions/pr-review + path: extensions - - name: Run PR Review - uses: ./.github/actions/pr-review + - name: Checkout PR repository + uses: actions/checkout@v4 + with: + repository: ${{ github.event.pull_request.head.repo.full_name }} + ref: ${{ github.event.pull_request.head.ref }} + fetch-depth: 0 + persist-credentials: false + path: pr-repo + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + # Security: this workflow executes untrusted PR content (diff/title/body) via an + # LLM-powered reviewer agent that can run Bash. GitHub Actions caches are shared + # across workflows within a repository and can enable cache-poisoning pivots into + # more-privileged workflows. Keep caching disabled here. + - name: Install uv + uses: astral-sh/setup-uv@v6 + with: + enable-cache: false + + - name: Install GitHub CLI + shell: bash + run: | + sudo apt-get update + sudo apt-get install -y gh + + - name: Install OpenHands dependencies + shell: bash + run: | + # Install openhands SDK and tools from PyPI + uv pip install --system openhands-sdk openhands-tools lmnr + + - name: Run PR review + shell: bash + env: + LLM_MODEL: anthropic/claude-3-5-sonnet-20240620 + LLM_BASE_URL: '' + REVIEW_STYLE: roasted + LLM_API_KEY: ${{ secrets.LLM_API_KEY }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + LMNR_PROJECT_API_KEY: ${{ secrets.LMNR_PROJECT_API_KEY }} + PR_NUMBER: ${{ github.event.pull_request.number }} + PR_TITLE: ${{ github.event.pull_request.title }} + PR_BODY: ${{ github.event.pull_request.body }} + PR_BASE_BRANCH: ${{ github.event.pull_request.base.ref }} + PR_HEAD_BRANCH: ${{ github.event.pull_request.head.ref }} + REPO_NAME: ${{ github.repository }} + run: | + cd pr-repo + uv run python ../extensions/plugins/pr-review/scripts/agent_script.py + + - name: Upload logs as artifact + uses: actions/upload-artifact@v4 + if: always() with: - # LLM model(s) to use. Can be comma-separated for A/B testing - # - one model will be randomly selected per review - llm-model: anthropic/claude-sonnet-4-5-20250929 - llm-base-url: '' - # Review style: roasted (other option: standard) - review-style: roasted - # Extensions version to use (version tag or branch name) - extensions-version: main - # Secrets - llm-api-key: ${{ secrets.LLM_API_KEY }} - github-token: ${{ secrets.GITHUB_TOKEN }} + name: openhands-pr-review-logs + path: | + *.log + output/ + retention-days: 7 From d17e67c73aee1e58ba40ccbf500d99f616763807 Mon Sep 17 00:00:00 2001 From: openhands Date: Mon, 9 Mar 2026 23:41:11 +0000 Subject: [PATCH 3/3] Remove evaluate_review.py and update workflows to use extensions repo --- .github/workflows/pr-review-evaluation.yml | 16 +- .../02_pr_review/README.md | 1 - .../02_pr_review/evaluate_review.py | 442 ------------------ 3 files changed, 8 insertions(+), 451 deletions(-) delete mode 100644 examples/03_github_workflows/02_pr_review/evaluate_review.py diff --git a/.github/workflows/pr-review-evaluation.yml b/.github/workflows/pr-review-evaluation.yml index 33f4560971..1b080d54ff 100644 --- a/.github/workflows/pr-review-evaluation.yml +++ b/.github/workflows/pr-review-evaluation.yml @@ -57,22 +57,22 @@ jobs: echo "This PR may not have been reviewed by the agent, skipping evaluation" fi - - name: Checkout software-agent-sdk repository + - name: Checkout extensions repository if: steps.check-trace.outputs.trace_exists == 'true' - uses: actions/checkout@v5 + uses: actions/checkout@v4 with: - repository: OpenHands/software-agent-sdk - path: software-agent-sdk + repository: OpenHands/extensions + path: extensions - name: Set up Python if: steps.check-trace.outputs.trace_exists == 'true' - uses: actions/setup-python@v6 + uses: actions/setup-python@v5 with: - python-version: '3.13' + python-version: '3.12' - name: Install uv if: steps.check-trace.outputs.trace_exists == 'true' - uses: astral-sh/setup-uv@v7 + uses: astral-sh/setup-uv@v6 with: enable-cache: true @@ -92,7 +92,7 @@ jobs: cp trace-info/laminar_trace_info.json . # Run the evaluation script - uv run python software-agent-sdk/examples/03_github_workflows/02_pr_review/evaluate_review.py + uv run python extensions/plugins/pr-review/workflows/evaluate_review.py - name: Upload evaluation logs uses: actions/upload-artifact@v5 diff --git a/examples/03_github_workflows/02_pr_review/README.md b/examples/03_github_workflows/02_pr_review/README.md index 883ec2a4c9..bca2f5cc99 100644 --- a/examples/03_github_workflows/02_pr_review/README.md +++ b/examples/03_github_workflows/02_pr_review/README.md @@ -7,7 +7,6 @@ This example demonstrates how to set up a GitHub Actions workflow for automated ## Files - **`workflow.yml`**: Example GitHub Actions workflow file that runs the PR review agent -- **`evaluate_review.py`**: Script to evaluate review effectiveness when PR is closed - **`README.md`**: This documentation file ## Features diff --git a/examples/03_github_workflows/02_pr_review/evaluate_review.py b/examples/03_github_workflows/02_pr_review/evaluate_review.py deleted file mode 100644 index 9f8cf14362..0000000000 --- a/examples/03_github_workflows/02_pr_review/evaluate_review.py +++ /dev/null @@ -1,442 +0,0 @@ -#!/usr/bin/env python3 -""" -PR Review Evaluation Script - -This script runs when a PR is merged or closed to evaluate how well the -review comments were addressed. It creates an evaluation trace in Laminar -that can be processed by a signal to determine review effectiveness. - -The evaluation flow: -1. Read the original trace ID from the artifact -2. Fetch PR review comments and thread discussion from GitHub -3. Fetch the final patch/diff -4. Create an evaluation span with all context -5. Optionally score the original trace - -Environment Variables: - LMNR_PROJECT_API_KEY: Laminar project API key (required) - GITHUB_TOKEN: GitHub token for API access (required) - PR_NUMBER: Pull request number (required) - REPO_NAME: Repository name in format owner/repo (required) - PR_MERGED: Whether the PR was merged ('true' or 'false') -""" - -import json - -# Configure logging -import logging -import os -import sys -import urllib.error -import urllib.request -from pathlib import Path - -from lmnr import Laminar, LaminarClient - - -logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") -logger = logging.getLogger(__name__) - - -def _get_required_env(name: str) -> str: - """Get a required environment variable or raise an error.""" - value = os.getenv(name) - if not value: - raise ValueError(f"{name} environment variable is required") - return value - - -def _get_github_headers() -> dict[str, str]: - """Get headers for GitHub API requests.""" - token = _get_required_env("GITHUB_TOKEN") - return { - "Accept": "application/vnd.github.v3+json", - "Authorization": f"Bearer {token}", - "X-GitHub-Api-Version": "2022-11-28", - } - - -def _get_agent_usernames() -> set[str]: - """Get the set of agent usernames to identify agent comments. - - Configurable via AGENT_USERNAMES environment variable (comma-separated). - Defaults to 'openhands-agent,all-hands-bot'. - """ - usernames = os.getenv("AGENT_USERNAMES", "openhands-agent,all-hands-bot") - return set(name.strip() for name in usernames.split(",") if name.strip()) - - -def _handle_github_api_error(e: urllib.error.HTTPError, context: str) -> None: - """Handle GitHub API errors with rate limit awareness.""" - if e.code == 429: - retry_after = e.headers.get("Retry-After", "60") - logger.warning(f"Rate limited by GitHub API. Retry after {retry_after}s") - logger.error(f"Failed to {context}: HTTP {e.code}") - - -def fetch_pr_review_comments(repo: str, pr_number: str) -> list[dict]: - """Fetch all review comments on a PR. - - This includes inline code review comments, not regular PR comments. - """ - url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}/comments" - request = urllib.request.Request(url, headers=_get_github_headers()) - - try: - with urllib.request.urlopen(request, timeout=60) as response: - return json.loads(response.read().decode("utf-8")) - except urllib.error.HTTPError as e: - _handle_github_api_error(e, "fetch review comments") - return [] - - -def fetch_pr_issue_comments(repo: str, pr_number: str) -> list[dict]: - """Fetch issue-style comments on a PR (the main thread).""" - url = f"https://api.github.com/repos/{repo}/issues/{pr_number}/comments" - request = urllib.request.Request(url, headers=_get_github_headers()) - - try: - with urllib.request.urlopen(request, timeout=60) as response: - return json.loads(response.read().decode("utf-8")) - except urllib.error.HTTPError as e: - _handle_github_api_error(e, "fetch issue comments") - return [] - - -def fetch_pr_reviews(repo: str, pr_number: str) -> list[dict]: - """Fetch all reviews on a PR (approve, request changes, comment).""" - url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}/reviews" - request = urllib.request.Request(url, headers=_get_github_headers()) - - try: - with urllib.request.urlopen(request, timeout=60) as response: - return json.loads(response.read().decode("utf-8")) - except urllib.error.HTTPError as e: - _handle_github_api_error(e, "fetch reviews") - return [] - - -def fetch_pr_diff(repo: str, pr_number: str) -> str: - """Fetch the final diff of the PR.""" - url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}" - headers = _get_github_headers() - headers["Accept"] = "application/vnd.github.v3.diff" - - request = urllib.request.Request(url, headers=headers) - - try: - with urllib.request.urlopen(request, timeout=60) as response: - return response.read().decode("utf-8", errors="replace") - except urllib.error.HTTPError as e: - _handle_github_api_error(e, "fetch PR diff") - return "" - - -def fetch_pr_info(repo: str, pr_number: str) -> dict: - """Fetch PR metadata.""" - url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}" - request = urllib.request.Request(url, headers=_get_github_headers()) - - try: - with urllib.request.urlopen(request, timeout=60) as response: - return json.loads(response.read().decode("utf-8")) - except urllib.error.HTTPError as e: - _handle_github_api_error(e, "fetch PR info") - return {} - - -def extract_agent_comments( - review_comments: list[dict], issue_comments: list[dict], reviews: list[dict] -) -> list[dict]: - """Extract comments made by the review agent. - - Agent usernames are configurable via AGENT_USERNAMES environment variable. - """ - agent_users = _get_agent_usernames() - agent_comments = [] - - # Review comments (inline code comments) - for comment in review_comments: - if comment.get("user", {}).get("login") in agent_users: - agent_comments.append( - { - "type": "review_comment", - "id": comment.get("id"), - "body": comment.get("body", ""), - "path": comment.get("path"), - "line": comment.get("line") or comment.get("original_line"), - "created_at": comment.get("created_at"), - } - ) - - # Issue comments (main thread) - for comment in issue_comments: - if comment.get("user", {}).get("login") in agent_users: - agent_comments.append( - { - "type": "issue_comment", - "id": comment.get("id"), - "body": comment.get("body", ""), - "created_at": comment.get("created_at"), - } - ) - - # Review bodies - for review in reviews: - if review.get("user", {}).get("login") in agent_users and review.get("body"): - agent_comments.append( - { - "type": "review", - "id": review.get("id"), - "body": review.get("body", ""), - "state": review.get("state"), - "created_at": review.get("submitted_at"), - } - ) - - return agent_comments - - -def extract_human_responses( - review_comments: list[dict], - issue_comments: list[dict], - agent_users: set[str] | None = None, -) -> list[dict]: - """Extract comments/responses from humans (non-agent users). - - Agent usernames are configurable via AGENT_USERNAMES environment variable. - """ - if agent_users is None: - agent_users = _get_agent_usernames() - human_responses = [] - - for comment in review_comments: - if comment.get("user", {}).get("login") not in agent_users: - human_responses.append( - { - "type": "review_comment", - "user": comment.get("user", {}).get("login"), - "body": comment.get("body", ""), - "in_reply_to_id": comment.get("in_reply_to_id"), - "created_at": comment.get("created_at"), - } - ) - - for comment in issue_comments: - if comment.get("user", {}).get("login") not in agent_users: - human_responses.append( - { - "type": "issue_comment", - "user": comment.get("user", {}).get("login"), - "body": comment.get("body", ""), - "created_at": comment.get("created_at"), - } - ) - - return human_responses - - -def truncate_text(text: str, max_chars: int = 50000) -> str: - """Truncate text to stay within reasonable API payload limits. - - Max 50k chars chosen to stay well under typical API payload limits - while preserving enough context for evaluation. This keeps the - evaluation trace size manageable for Laminar processing. - """ - if len(text) <= max_chars: - return text - return text[:max_chars] + f"\n\n... [truncated, {len(text)} total chars]" - - -def main(): - """Run the PR review evaluation.""" - logger.info("Starting PR review evaluation...") - - # Get required environment variables - pr_number = _get_required_env("PR_NUMBER") - repo_name = _get_required_env("REPO_NAME") - pr_merged = os.getenv("PR_MERGED", "false").lower() == "true" - - logger.info(f"Evaluating PR #{pr_number} in {repo_name}") - logger.info(f"PR was merged: {pr_merged}") - - # Read original trace info from artifact - trace_info_path = Path("laminar_trace_info.json") - original_trace_id = None - original_span_context = None - original_trace_data = {} - - if trace_info_path.exists(): - with open(trace_info_path) as f: - original_trace_data = json.load(f) - original_trace_id = original_trace_data.get("trace_id") - original_span_context = original_trace_data.get("span_context") - logger.info(f"Original trace ID: {original_trace_id}") - if original_span_context: - logger.info( - "Found span context - will add evaluation to original trace" - ) - else: - logger.info("No span context - evaluation will create standalone trace") - else: - logger.warning( - "No trace info file found - evaluation will create standalone trace" - ) - - # Fetch PR data from GitHub - logger.info("Fetching PR data from GitHub...") - review_comments = fetch_pr_review_comments(repo_name, pr_number) - issue_comments = fetch_pr_issue_comments(repo_name, pr_number) - reviews = fetch_pr_reviews(repo_name, pr_number) - final_diff = fetch_pr_diff(repo_name, pr_number) - pr_info = fetch_pr_info(repo_name, pr_number) - - logger.info(f"Found {len(review_comments)} review comments") - logger.info(f"Found {len(issue_comments)} issue comments") - logger.info(f"Found {len(reviews)} reviews") - - # Extract agent comments and human responses - agent_comments = extract_agent_comments(review_comments, issue_comments, reviews) - human_responses = extract_human_responses(review_comments, issue_comments) - - logger.info(f"Agent made {len(agent_comments)} comments") - logger.info(f"Humans made {len(human_responses)} responses") - - # Initialize Laminar for tracing - Laminar.initialize() - - # Create evaluation context - evaluation_context = { - "pr_number": pr_number, - "repo_name": repo_name, - "pr_merged": pr_merged, - "pr_title": pr_info.get("title", ""), - "pr_state": pr_info.get("state", ""), - "original_trace_id": original_trace_id, - "agent_comments": agent_comments, - "human_responses": human_responses, - "final_diff": truncate_text(final_diff), - "total_review_comments": len(review_comments), - "total_issue_comments": len(issue_comments), - } - - # Create an evaluation span that can be processed by a Laminar signal - # The signal will analyze the agent comments vs final diff to determine - # which suggestions were addressed. - # - # IMPORTANT: If we have the original span context, we use parent_span_context - # to add this span as a child of the original trace. This allows Laminar - # signals to operate on the complete trace (review + evaluation) together. - with Laminar.start_as_current_span( - name="pr_review_evaluation", - input=evaluation_context, - tags=["pr-review-evaluation"], - parent_span_context=original_span_context, - ): - # Set trace metadata for filtering and linking - Laminar.set_trace_metadata( - { - "original_trace_id": original_trace_id or "none", - "evaluation_type": "pr_review_effectiveness", - "pr_number": pr_number, - "repo_name": repo_name, - "pr_merged": str(pr_merged), - } - ) - - # Log summary for visibility - summary = { - "pr": f"{repo_name}#{pr_number}", - "merged": pr_merged, - "agent_comments_count": len(agent_comments), - "human_responses_count": len(human_responses), - "diff_length": len(final_diff), - } - logger.info(f"Evaluation summary: {json.dumps(summary)}") - - # Set output with key metrics - Laminar.set_span_output( - { - "summary": summary, - "ready_for_signal": True, - } - ) - - # Capture trace ID while inside the span context - # (get_trace_id() returns None outside a span context) - eval_trace_id = Laminar.get_trace_id() - - # Flush to ensure span is sent - Laminar.flush() - - # If we have the original trace ID, we can also score it directly - # This provides immediate feedback without waiting for signal processing - if original_trace_id: - try: - client = LaminarClient() - - # PLACEHOLDER SCORE: This is a simple engagement metric, NOT a measure - # of review effectiveness. The actual effectiveness score will come from - # the Laminar signal which analyzes whether suggestions were implemented. - # - # This score only indicates: - # - Whether humans responded to agent comments (engagement) - # - Whether the PR was merged (completion) - # - # It does NOT measure: - # - Whether agent suggestions were actually helpful - # - Whether suggestions were implemented in the final code - # - Quality of the review feedback - preliminary_score = 0.0 - if agent_comments: - engagement_ratio = min(len(human_responses) / len(agent_comments), 1.0) - preliminary_score = engagement_ratio * 0.5 # Scale to 0-0.5 - - if pr_merged: - preliminary_score += 0.3 - - client.evaluators.score( - name="review_engagement", - trace_id=original_trace_id, - score=preliminary_score, - metadata={ - "agent_comments": len(agent_comments), - "human_responses": len(human_responses), - "pr_merged": pr_merged, - "note": "Placeholder - signal provides effectiveness analysis", - "score_type": "engagement_only", - }, - ) - logger.info( - f"Added preliminary score {preliminary_score:.2f} " - f"to original trace {original_trace_id}" - ) - - # Tag the original trace to indicate evaluation was done - client.tags.tag(original_trace_id, ["evaluated", f"pr-{pr_number}"]) - logger.info(f"Tagged original trace {original_trace_id}") - - except Exception as e: - logger.warning(f"Failed to score original trace: {e}") - # Don't fail the workflow if scoring fails - - # Print evaluation summary - print("\n=== PR Review Evaluation ===") - print(f"PR: {repo_name}#{pr_number}") - print(f"Merged: {pr_merged}") - print(f"Agent Comments: {len(agent_comments)}") - print(f"Human Responses: {len(human_responses)}") - if original_trace_id: - print(f"Original Review Trace: {original_trace_id}") - if eval_trace_id: - print(f"Evaluation Trace: {eval_trace_id}") - - logger.info("PR review evaluation completed successfully") - - -if __name__ == "__main__": - try: - main() - except Exception as e: - logger.error(f"Evaluation failed: {e}") - sys.exit(1)