From 22bc43743b739eca75b7757e3ba4c2eab32d0fdb Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Thu, 5 Mar 2026 14:02:55 +0000
Subject: [PATCH 1/3] Migrate PR review plugin to extensions repository

- Delete agent_script.py and prompt.py (now in extensions repo)
- Update action.yml to load scripts from OpenHands/extensions
- Change sdk-repo/sdk-version inputs to extensions-repo/extensions-version
- Update README and workflow.yml to reflect new structure
- Install openhands-sdk and openhands-tools from PyPI

Related to #2282

Co-authored-by: openhands <openhands@all-hands.dev>
---
 .github/actions/pr-review/action.yml          |  25 +-
 .../02_pr_review/README.md                    |  26 +-
 .../02_pr_review/agent_script.py              | 903 ------------------
 .../02_pr_review/prompt.py                    | 112 ---
 .../02_pr_review/workflow.yml                 |   4 +-
 5 files changed, 28 insertions(+), 1042 deletions(-)
 delete mode 100644 examples/03_github_workflows/02_pr_review/agent_script.py
 delete mode 100644 examples/03_github_workflows/02_pr_review/prompt.py

diff --git a/.github/actions/pr-review/action.yml b/.github/actions/pr-review/action.yml
index 15ebfebd6c..8b584a6b80 100644
--- a/.github/actions/pr-review/action.yml
+++ b/.github/actions/pr-review/action.yml
@@ -24,12 +24,12 @@ inputs:
             feedback focusing on data structures, simplicity, and pragmatism)"
         required: false
         default: roasted
-    sdk-repo:
-        description: GitHub repository for the SDK (owner/repo)
+    extensions-repo:
+        description: GitHub repository for extensions (owner/repo)
         required: false
-        default: OpenHands/software-agent-sdk
-    sdk-version:
-        description: Git ref to use for the SDK (tag, branch, or commit SHA, e.g., v1.0.0, main, or abc1234)
+        default: OpenHands/extensions
+    extensions-version:
+        description: Git ref to use for extensions (tag, branch, or commit SHA, e.g., v1.0.0, main, or abc1234)
         required: false
         default: main
     llm-api-key:
@@ -46,12 +46,12 @@ inputs:
 runs:
     using: composite
     steps:
-        - name: Checkout software-agent-sdk repository
+        - name: Checkout extensions repository
           uses: actions/checkout@v4
           with:
-              repository: ${{ inputs.sdk-repo }}
-              ref: ${{ inputs.sdk-version }}
-              path: software-agent-sdk
+              repository: ${{ inputs.extensions-repo }}
+              ref: ${{ inputs.extensions-version }}
+              path: extensions
 
         - name: Checkout PR repository
           uses: actions/checkout@v4
@@ -85,7 +85,8 @@ runs:
         - name: Install OpenHands dependencies
           shell: bash
           run: |
-              uv pip install --system ./software-agent-sdk/openhands-sdk ./software-agent-sdk/openhands-tools lmnr
+              # Install openhands SDK and tools from PyPI
+              uv pip install --system openhands-sdk openhands-tools lmnr
 
         - name: Check required configuration and select model
           id: select-model
@@ -111,7 +112,7 @@ runs:
               echo "PR Number: ${{ github.event.pull_request.number }}"
               echo "PR Title: ${{ github.event.pull_request.title }}"
               echo "Repository: ${{ github.repository }}"
-              echo "SDK Version: ${{ inputs.sdk-version }}"
+              echo "Extensions Version: ${{ inputs.extensions-version }}"
               echo "Available models: $MODELS_LIST"
               echo "Selected LLM model: $SELECTED_MODEL"
               if [ -n "${{ inputs.llm-base-url }}" ]; then
@@ -135,7 +136,7 @@ runs:
               REPO_NAME: ${{ github.repository }}
           run: |
               cd pr-repo
-              uv run python ../software-agent-sdk/examples/03_github_workflows/02_pr_review/agent_script.py
+              uv run python ../extensions/plugins/pr-review/scripts/agent_script.py
 
         - name: Upload logs as artifact
           uses: actions/upload-artifact@v4
diff --git a/examples/03_github_workflows/02_pr_review/README.md b/examples/03_github_workflows/02_pr_review/README.md
index ff4f864927..77edf2b6e9 100644
--- a/examples/03_github_workflows/02_pr_review/README.md
+++ b/examples/03_github_workflows/02_pr_review/README.md
@@ -2,12 +2,12 @@
 
 This example demonstrates how to set up a GitHub Actions workflow for automated pull request reviews using the OpenHands agent SDK. When a PR is labeled with `review-this` or when openhands-agent is added as a reviewer, OpenHands will analyze the changes and provide detailed, constructive feedback.
 
+**Note**: The actual review scripts now live in the [OpenHands/extensions](https://github.com/OpenHands/extensions/tree/main/plugins/pr-review) repository. This directory contains the GitHub Action that references those scripts.
+
 ## Files
 
-- **`action.yml`**: Symlink to the composite GitHub Action (`.github/actions/pr-review/action.yml`)
+- **`action.yml`**: Composite GitHub Action that loads scripts from the extensions repository
 - **`workflow.yml`**: Example GitHub Actions workflow file that uses the composite action
-- **`agent_script.py`**: Python script that runs the OpenHands agent for PR review
-- **`prompt.py`**: The prompt asking the agent to write the PR review
 - **`evaluate_review.py`**: Script to evaluate review effectiveness when PR is closed
 - **`README.md`**: This documentation file
 
@@ -40,7 +40,7 @@ This example demonstrates how to set up a GitHub Actions workflow for automated
   - Potential issues and security concerns
   - Specific improvement suggestions
 - **GitHub API Integration**: Uses the GitHub API to post inline review comments directly on specific lines of code
-- **Version Control**: Use `sdk-version` to pin to a specific version tag or branch
+- **Version Control**: Use `extensions-version` to pin to a specific version tag or branch of the extensions repository
 
 ## Setup
 
@@ -75,10 +75,10 @@ Edit `.github/workflows/pr-review-by-openhands.yml` to customize the inputs:
       llm-base-url: ''
       # Review style: roasted (other option: standard)
       review-style: roasted
-      # SDK git ref to use (tag, branch, or commit SHA, e.g., 'v1.0.0', 'main', or 'abc1234')
-      sdk-version: main
-      # Optional: override the SDK repo (owner/repo) if you forked it
-      sdk-repo: OpenHands/software-agent-sdk
+      # Extensions git ref to use (tag, branch, or commit SHA, e.g., 'v1.0.0', 'main', or 'abc1234')
+      extensions-version: main
+      # Optional: override the extensions repo (owner/repo) if you forked it
+      extensions-repo: OpenHands/extensions
       # Secrets
       llm-api-key: ${{ secrets.LLM_API_KEY }}
       github-token: ${{ secrets.GITHUB_TOKEN }}
@@ -174,11 +174,11 @@ See the [software-agent-sdk's own code-review skill](https://github.com/OpenHand
 
 ## Composite Action
 
-This workflow uses a reusable composite action located at `.github/actions/pr-review/action.yml` in the software-agent-sdk repository. The composite action handles:
+This workflow uses a reusable composite action located in this directory (`action.yml`). The composite action handles:
 
-- Checking out the SDK at the specified version
+- Checking out the extensions repository at the specified version
 - Setting up Python and dependencies
-- Running the PR review agent
+- Running the PR review agent (from extensions repo)
 - Uploading logs as artifacts
 
 ### Action Inputs
@@ -188,8 +188,8 @@ This workflow uses a reusable composite action located at `.github/actions/pr-re
 | `llm-model` | LLM model(s) - can be comma-separated for A/B testing | No | `anthropic/claude-sonnet-4-5-20250929` |
 | `llm-base-url` | LLM base URL (optional) | No | `''` |
 | `review-style` | Review style: 'standard' or 'roasted' | No | `roasted` |
-| `sdk-version` | Git ref for SDK (tag, branch, or commit SHA) | No | `main` |
-| `sdk-repo` | SDK repository (owner/repo) | No | `OpenHands/software-agent-sdk` |
+| `extensions-version` | Git ref for extensions (tag, branch, or commit SHA) | No | `main` |
+| `extensions-repo` | Extensions repository (owner/repo) | No | `OpenHands/extensions` |
 | `llm-api-key` | LLM API key | Yes | - |
 | `github-token` | GitHub token for API access | Yes | - |
 | `lmnr-api-key` | Laminar API key for observability (optional) | No | - |
diff --git a/examples/03_github_workflows/02_pr_review/agent_script.py b/examples/03_github_workflows/02_pr_review/agent_script.py
deleted file mode 100644
index 7ee67dc46b..0000000000
--- a/examples/03_github_workflows/02_pr_review/agent_script.py
+++ /dev/null
@@ -1,903 +0,0 @@
-#!/usr/bin/env python3
-"""
-Example: PR Review Agent
-
-This script runs OpenHands agent to review a pull request and provide
-fine-grained review comments. The agent has full repository access and uses
-bash commands to analyze changes in context and post detailed review feedback
-directly via `gh` or the GitHub API.
-
-This example demonstrates how to use skills for code review:
-- `/codereview` - Standard code review skill
-- `/codereview-roasted` - Linus Torvalds style brutally honest review
-
-The agent posts inline review comments on specific lines of code using the
-GitHub API, rather than posting one giant comment under the PR.
-
-The agent also considers previous review context including:
-- Existing review comments and their resolution status
-- Previous review decisions (APPROVED, CHANGES_REQUESTED, etc.)
-- Review threads (resolved and unresolved)
-
-Designed for use with GitHub Actions workflows triggered by PR labels.
-
-Environment Variables:
-    LLM_API_KEY: API key for the LLM (required)
-    LLM_MODEL: Language model to use (default: anthropic/claude-sonnet-4-5-20250929)
-    LLM_BASE_URL: Optional base URL for LLM API
-    GITHUB_TOKEN: GitHub token for API access (required)
-    PR_NUMBER: Pull request number (required)
-    PR_TITLE: Pull request title (required)
-    PR_BODY: Pull request body (optional)
-    PR_BASE_BRANCH: Base branch name (required)
-    PR_HEAD_BRANCH: Head branch name (required)
-    REPO_NAME: Repository name in format owner/repo (required)
-    REVIEW_STYLE: Review style ('standard' or 'roasted', default: 'standard')
-
-For setup instructions, usage examples, and GitHub Actions integration,
-see README.md in this directory.
-"""
-
-from __future__ import annotations
-
-import json
-import os
-import sys
-import time
-import urllib.error
-import urllib.request
-from collections.abc import Callable
-from pathlib import Path
-from typing import Any
-
-from lmnr import Laminar
-
-from openhands.sdk import LLM, Agent, AgentContext, Conversation, get_logger
-from openhands.sdk.context.skills import load_project_skills
-from openhands.sdk.conversation import get_agent_final_response
-from openhands.sdk.git.utils import run_git_command
-from openhands.tools.preset.default import get_default_condenser, get_default_tools
-
-
-# Add the script directory to Python path so we can import prompt.py
-script_dir = Path(__file__).parent
-sys.path.insert(0, str(script_dir))
-
-from prompt import format_prompt  # noqa: E402
-
-
-logger = get_logger(__name__)
-
-# Maximum total diff size
-MAX_TOTAL_DIFF = 100000
-# Maximum size for review context to avoid overwhelming the prompt
-# Keeps context under ~7500 tokens (assuming ~4 chars/token average)
-MAX_REVIEW_CONTEXT = 30000
-# Maximum time (seconds) for GraphQL pagination to prevent hanging on slow APIs
-MAX_PAGINATION_TIME = 120
-
-
-def _get_required_env(name: str) -> str:
-    value = os.getenv(name)
-    if not value:
-        raise ValueError(f"{name} environment variable is required")
-    return value
-
-
-def _call_github_api(
-    url: str,
-    method: str = "GET",
-    data: dict[str, Any] | None = None,
-    accept: str = "application/vnd.github+json",
-) -> Any:
-    """Make a GitHub API request (REST or GraphQL).
-
-    This function handles both REST API calls and GraphQL queries (via the /graphql
-    endpoint). The function name reflects this dual purpose.
-
-    Args:
-        url: Full API URL or path (will be prefixed with api.github.com if needed)
-        method: HTTP method (GET, POST, etc.)
-        data: JSON data to send (for POST/PUT requests, including GraphQL queries)
-        accept: Accept header value
-
-    Returns:
-        Parsed JSON response or raw text for diff requests
-    """
-    token = _get_required_env("GITHUB_TOKEN")
-
-    if not url.startswith("http"):
-        url = f"https://api.github.com{url}"
-
-    request = urllib.request.Request(url, method=method)
-    request.add_header("Accept", accept)
-    request.add_header("Authorization", f"Bearer {token}")
-    request.add_header("X-GitHub-Api-Version", "2022-11-28")
-
-    if data:
-        request.add_header("Content-Type", "application/json")
-        request.data = json.dumps(data).encode("utf-8")
-
-    try:
-        with urllib.request.urlopen(request, timeout=60) as response:
-            raw_data = response.read()
-            if "diff" in accept:
-                return raw_data.decode("utf-8", errors="replace")
-            return json.loads(raw_data.decode("utf-8"))
-    except urllib.error.HTTPError as e:
-        details = (e.read() or b"").decode("utf-8", errors="replace").strip()
-        raise RuntimeError(
-            f"GitHub API request failed: HTTP {e.code} {e.reason}. {details}"
-        ) from e
-    except urllib.error.URLError as e:
-        raise RuntimeError(f"GitHub API request failed: {e.reason}") from e
-    except json.JSONDecodeError as e:
-        raise RuntimeError(f"GitHub API returned invalid JSON: {e}") from e
-
-
-def get_pr_reviews(pr_number: str, max_reviews: int = 100) -> list[dict[str, Any]]:
-    """Fetch the latest reviews for a PR using GraphQL.
-
-    Uses GraphQL with `last` to fetch the most recent reviews directly,
-    avoiding the need to paginate through all reviews from oldest to newest.
-
-    Args:
-        pr_number: The PR number
-        max_reviews: Maximum number of reviews to return (default: 100)
-
-    Returns a list of review objects containing:
-    - id: Review ID
-    - user: Author information
-    - body: Review body text
-    - state: APPROVED, CHANGES_REQUESTED, COMMENTED, DISMISSED, PENDING
-    - submitted_at: When the review was submitted
-    """
-    repo = _get_required_env("REPO_NAME")
-    owner, repo_name = repo.split("/")
-
-    # Use GraphQL to fetch the latest reviews directly
-    # `last: N` fetches the N most recent items
-    query = """
-    query(
-      $owner: String!
-      $repo: String!
-      $pr_number: Int!
-      $count: Int!
-      $cursor: String
-    ) {
-      repository(owner: $owner, name: $repo) {
-        pullRequest(number: $pr_number) {
-          reviews(last: $count, before: $cursor) {
-            pageInfo {
-              hasPreviousPage
-              startCursor
-            }
-            nodes {
-              id
-              author {
-                login
-              }
-              body
-              state
-              submittedAt
-            }
-          }
-        }
-      }
-    }
-    """
-
-    all_reviews: list[dict[str, Any]] = []
-    cursor = None
-    start_time = time.time()
-    page_count = 0
-
-    while len(all_reviews) < max_reviews:
-        # Check for pagination timeout
-        elapsed = time.time() - start_time
-        if elapsed > MAX_PAGINATION_TIME:
-            logger.warning(
-                f"Reviews pagination timeout after {elapsed:.1f}s, "
-                f"fetched {len(all_reviews)} reviews across {page_count} pages"
-            )
-            break
-
-        # Fetch up to remaining needed reviews
-        remaining = max_reviews - len(all_reviews)
-        fetch_count = min(remaining, 100)  # GraphQL max is 100 per request
-
-        variables = {
-            "owner": owner,
-            "repo": repo_name,
-            "pr_number": int(pr_number),
-            "count": fetch_count,
-            "cursor": cursor,
-        }
-
-        result = _call_github_api(
-            "https://api.github.com/graphql",
-            method="POST",
-            data={"query": query, "variables": variables},
-        )
-
-        if "errors" in result:
-            logger.warning(f"GraphQL errors fetching reviews: {result['errors']}")
-            break
-
-        pr_data = result.get("data", {}).get("repository", {}).get("pullRequest")
-        if not pr_data:
-            break
-
-        reviews_data = pr_data.get("reviews", {})
-        nodes = reviews_data.get("nodes", [])
-        page_count += 1
-
-        if not nodes:
-            break
-
-        # Convert GraphQL format to REST-like format for compatibility
-        for node in nodes:
-            author = node.get("author") or {}
-            all_reviews.append(
-                {
-                    "id": node.get("id"),
-                    "user": {"login": author.get("login", "unknown")},
-                    "body": node.get("body", ""),
-                    "state": node.get("state", "UNKNOWN"),
-                    "submitted_at": node.get("submittedAt"),
-                }
-            )
-
-        logger.debug(
-            f"Fetched page {page_count} with {len(nodes)} reviews "
-            f"(total: {len(all_reviews)})"
-        )
-
-        page_info = reviews_data.get("pageInfo", {})
-        if not page_info.get("hasPreviousPage"):
-            break
-        cursor = page_info.get("startCursor")
-
-    # Reviews are fetched newest-first with `last`, reverse to get chronological order
-    # (oldest first) for consistent display
-    return list(reversed(all_reviews))
-
-
-def get_review_threads_graphql(pr_number: str) -> list[dict[str, Any]]:
-    """Fetch the latest review threads with resolution status using GraphQL API.
-
-    The REST API doesn't expose thread resolution status, so we use GraphQL.
-    Uses `last` to fetch the most recent threads first, ensuring we get the
-    latest discussions rather than the oldest ones.
-
-    Note: This query fetches up to 100 review threads per page, each with up to
-    50 comments. For PRs exceeding these limits, older threads/comments may be
-    omitted. We paginate through threads but not through comments within threads.
-
-    Returns a list of thread objects containing:
-    - id: Thread ID
-    - isResolved: Whether the thread is resolved
-    - isOutdated: Whether the thread is outdated (code changed)
-    - path: File path
-    - line: Line number
-    - comments: List of comments in the thread (up to 50 per thread)
-    """
-    repo = _get_required_env("REPO_NAME")
-    owner, repo_name = repo.split("/")
-
-    # Use `last` to fetch the most recent threads first
-    # `before: $cursor` paginates backwards through older threads
-    query = """
-    query($owner: String!, $repo: String!, $pr_number: Int!, $cursor: String) {
-      repository(owner: $owner, name: $repo) {
-        pullRequest(number: $pr_number) {
-          reviewThreads(last: 100, before: $cursor) {
-            pageInfo {
-              hasPreviousPage
-              startCursor
-            }
-            nodes {
-              id
-              isResolved
-              isOutdated
-              path
-              line
-              comments(first: 50) {
-                nodes {
-                  id
-                  author {
-                    login
-                  }
-                  body
-                  createdAt
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-    """
-
-    threads: list[dict[str, Any]] = []
-    cursor = None
-    start_time = time.time()
-    page_count = 0
-    has_more_pages = False
-
-    while True:
-        # Check for overall pagination timeout
-        elapsed = time.time() - start_time
-        if elapsed > MAX_PAGINATION_TIME:
-            logger.warning(
-                f"GraphQL pagination timeout after {elapsed:.1f}s, "
-                f"fetched {len(threads)} threads across {page_count} pages"
-            )
-            break
-
-        variables = {
-            "owner": owner,
-            "repo": repo_name,
-            "pr_number": int(pr_number),
-            "cursor": cursor,
-        }
-
-        result = _call_github_api(
-            "https://api.github.com/graphql",
-            method="POST",
-            data={"query": query, "variables": variables},
-        )
-
-        if "errors" in result:
-            logger.warning(f"GraphQL errors: {result['errors']}")
-            break
-
-        pr_data = result.get("data", {}).get("repository", {}).get("pullRequest")
-        if not pr_data:
-            break
-
-        review_threads = pr_data.get("reviewThreads", {})
-        nodes = review_threads.get("nodes", [])
-        threads.extend(nodes)
-        page_count += 1
-
-        logger.debug(
-            f"Fetched page {page_count} with {len(nodes)} threads "
-            f"(total: {len(threads)})"
-        )
-
-        page_info = review_threads.get("pageInfo", {})
-        has_more_pages = page_info.get("hasPreviousPage", False)
-        if not has_more_pages:
-            break
-        cursor = page_info.get("startCursor")
-
-    # Warn only if there are actually more pages we didn't fetch
-    if has_more_pages:
-        logger.warning(
-            f"Review threads limited to {len(threads)} threads. "
-            "Some threads may be omitted for PRs with extensive review history."
-        )
-
-    # Threads are fetched newest-first with `last`, reverse to get chronological order
-    return list(reversed(threads))
-
-
-def format_review_context(
-    reviews: list[dict[str, Any]],
-    threads: list[dict[str, Any]],
-    max_size: int = MAX_REVIEW_CONTEXT,
-) -> str:
-    """Format review history into a context string for the agent.
-
-    Args:
-        reviews: List of review objects from get_pr_reviews()
-        threads: List of thread objects from get_review_threads_graphql()
-        max_size: Maximum size of the formatted context
-
-    Returns:
-        Formatted markdown string with review history
-    """
-    if not reviews and not threads:
-        return ""
-
-    sections: list[str] = []
-    current_size = 0
-
-    def _add_section(section: str) -> bool:
-        """Add a section if it fits within max_size. Returns True if added."""
-        nonlocal current_size
-        section_size = len(section) + 1  # +1 for newline separator
-        if current_size + section_size > max_size:
-            return False
-        sections.append(section)
-        current_size += section_size
-        return True
-
-    # Format reviews (high-level review decisions)
-    if reviews:
-        review_lines: list[str] = ["### Previous Reviews\n"]
-        for review in reviews:
-            user_data = review.get("user") or {}
-            user = user_data.get("login", "unknown")
-            state = review.get("state") or "UNKNOWN"
-            body = (review.get("body") or "").strip()
-
-            # Map state to emoji for visual clarity
-            state_emoji = {
-                "APPROVED": "✅",
-                "CHANGES_REQUESTED": "🔴",
-                "COMMENTED": "💬",
-                "DISMISSED": "❌",
-                "PENDING": "⏳",
-            }.get(state, "❓")
-
-            review_lines.append(f"- {state_emoji} **{user}** ({state})")
-            if body:
-                # Indent the body and truncate if too long
-                body_preview = body[:500] + "..." if len(body) > 500 else body
-                indented = "\n".join(f"  > {line}" for line in body_preview.split("\n"))
-                review_lines.append(indented)
-            review_lines.append("")
-
-        review_section = "\n".join(review_lines)
-        if not _add_section(review_section):
-            # Even reviews section doesn't fit, return truncation message
-            return (
-                f"... [review context truncated, "
-                f"content exceeds {max_size:,} chars] ..."
-            )
-
-    # Format review threads with resolution status
-    if threads:
-        resolved_threads = [t for t in threads if t.get("isResolved")]
-        unresolved_threads = [t for t in threads if not t.get("isResolved")]
-
-        # Unresolved threads (higher priority)
-        if unresolved_threads:
-            header = (
-                "### Unresolved Review Threads\n\n"
-                "*These threads have not been resolved and may need attention:*\n"
-            )
-            if not _add_section(header):
-                count = len(unresolved_threads)
-                sections.append(
-                    f"\n... [truncated, {count} unresolved threads omitted] ..."
-                )
-            else:
-                threads_added = 0
-                for thread in unresolved_threads:
-                    thread_lines = _format_thread(thread)
-                    thread_section = "\n".join(thread_lines)
-                    if not _add_section(thread_section):
-                        remaining = len(unresolved_threads) - threads_added
-                        sections.append(
-                            f"\n... [truncated, {remaining} unresolved "
-                            "threads omitted] ..."
-                        )
-                        break
-                    threads_added += 1
-
-        # Resolved threads (lower priority, add if space remains)
-        if resolved_threads and current_size < max_size:
-            header = (
-                "### Resolved Review Threads\n\n"
-                "*These threads have been resolved but provide context:*\n"
-            )
-            if _add_section(header):
-                threads_added = 0
-                for thread in resolved_threads:
-                    thread_lines = _format_thread(thread)
-                    thread_section = "\n".join(thread_lines)
-                    if not _add_section(thread_section):
-                        remaining = len(resolved_threads) - threads_added
-                        sections.append(
-                            f"\n... [truncated, {remaining} resolved "
-                            "threads omitted] ..."
-                        )
-                        break
-                    threads_added += 1
-
-    return "\n".join(sections)
-
-
-def _format_thread(thread: dict[str, Any]) -> list[str]:
-    """Format a single review thread.
-
-    Args:
-        thread: Thread object from GraphQL
-
-    Returns:
-        List of formatted lines
-    """
-    lines: list[str] = []
-    path = thread.get("path", "unknown")
-    line_num = thread.get("line")
-    is_outdated = thread.get("isOutdated", False)
-    is_resolved = thread.get("isResolved", False)
-
-    # Thread header
-    status = "✅ RESOLVED" if is_resolved else "⚠️ UNRESOLVED"
-    outdated = " (outdated)" if is_outdated else ""
-    location = f"{path}"
-    if line_num:
-        location += f":{line_num}"
-
-    lines.append(f"**{location}**{outdated} - {status}")
-
-    # Thread comments
-    comments_data = thread.get("comments") or {}
-    comments = comments_data.get("nodes") or []
-    for comment in comments:
-        author_data = comment.get("author") or {}
-        author = author_data.get("login", "unknown")
-        body = (comment.get("body") or "").strip()
-        if body:
-            # Truncate individual comments if too long
-            body_preview = body[:300] + "..." if len(body) > 300 else body
-            indented = "\n".join(f"  > {line}" for line in body_preview.split("\n"))
-            lines.append(f"  - **{author}**:")
-            lines.append(indented)
-
-    lines.append("")
-    return lines
-
-
-def _fetch_with_fallback(
-    name: str, fetch_fn: Callable[[], list[dict[str, Any]]]
-) -> list[dict[str, Any]]:
-    """Fetch data with error handling and logging.
-
-    Args:
-        name: Name of the data being fetched (for logging)
-        fetch_fn: Function to call to fetch the data
-
-    Returns:
-        Fetched data or empty list on error
-    """
-    try:
-        data = fetch_fn()
-        logger.info(f"Fetched {len(data)} {name}")
-        return data
-    except Exception as e:
-        logger.warning(f"Failed to fetch {name}: {e}")
-        return []
-
-
-def get_pr_review_context(pr_number: str) -> str:
-    """Get all review context for a PR.
-
-    Fetches reviews and review threads, then formats them into a context string.
-
-    Args:
-        pr_number: The PR number
-
-    Returns:
-        Formatted review context string, or empty string if no context
-    """
-    reviews = _fetch_with_fallback("reviews", lambda: get_pr_reviews(pr_number))
-    threads = _fetch_with_fallback(
-        "review threads", lambda: get_review_threads_graphql(pr_number)
-    )
-
-    return format_review_context(reviews, threads)
-
-
-def get_pr_diff_via_github_api(pr_number: str) -> str:
-    """Fetch the PR diff exactly as GitHub renders it.
-
-    Uses the GitHub REST API "Get a pull request" endpoint with an `Accept`
-    header requesting diff output.
-
-    This avoids depending on local git refs (often stale/missing in
-    `pull_request_target` checkouts).
-    """
-
-    repo = _get_required_env("REPO_NAME")
-    token = _get_required_env("GITHUB_TOKEN")
-
-    url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}"
-    request = urllib.request.Request(url)
-    request.add_header("Accept", "application/vnd.github.v3.diff")
-    request.add_header("Authorization", f"Bearer {token}")
-    request.add_header("X-GitHub-Api-Version", "2022-11-28")
-
-    try:
-        with urllib.request.urlopen(request, timeout=60) as response:
-            data = response.read()
-    except urllib.error.HTTPError as e:
-        details = (e.read() or b"").decode("utf-8", errors="replace").strip()
-        raise RuntimeError(
-            f"GitHub diff API request failed: HTTP {e.code} {e.reason}. {details}"
-        ) from e
-    except urllib.error.URLError as e:
-        raise RuntimeError(f"GitHub diff API request failed: {e.reason}") from e
-
-    return data.decode("utf-8", errors="replace")
-
-
-def truncate_text(diff_text: str, max_total: int = MAX_TOTAL_DIFF) -> str:
-    if len(diff_text) <= max_total:
-        return diff_text
-
-    total_chars = len(diff_text)
-    return (
-        diff_text[:max_total]
-        + f"\n\n... [total diff truncated, {total_chars:,} chars total, "
-        + f"showing first {max_total:,}] ..."
-    )
-
-
-def get_truncated_pr_diff() -> str:
-    """Get the PR diff with truncation.
-
-    This uses GitHub as the source of truth so the review matches the PR's
-    "Files changed" view.
-    """
-
-    pr_number = _get_required_env("PR_NUMBER")
-    diff_text = get_pr_diff_via_github_api(pr_number)
-    return truncate_text(diff_text)
-
-
-def get_head_commit_sha(repo_dir: Path | None = None) -> str:
-    """
-    Get the SHA of the HEAD commit.
-
-    Args:
-        repo_dir: Path to the repository (defaults to cwd)
-
-    Returns:
-        The commit SHA
-    """
-    if repo_dir is None:
-        repo_dir = Path.cwd()
-
-    return run_git_command(["git", "rev-parse", "HEAD"], repo_dir).strip()
-
-
-def main():
-    """Run the PR review agent."""
-    logger.info("Starting PR review process...")
-
-    # Validate required environment variables
-    required_vars = [
-        "LLM_API_KEY",
-        "GITHUB_TOKEN",
-        "PR_NUMBER",
-        "PR_TITLE",
-        "PR_BASE_BRANCH",
-        "PR_HEAD_BRANCH",
-        "REPO_NAME",
-    ]
-
-    missing_vars = [var for var in required_vars if not os.getenv(var)]
-    if missing_vars:
-        logger.error(f"Missing required environment variables: {missing_vars}")
-        sys.exit(1)
-
-    github_token = os.getenv("GITHUB_TOKEN")
-
-    # Get PR information
-    pr_info = {
-        "number": os.getenv("PR_NUMBER"),
-        "title": os.getenv("PR_TITLE"),
-        "body": os.getenv("PR_BODY", ""),
-        "repo_name": os.getenv("REPO_NAME"),
-        "base_branch": os.getenv("PR_BASE_BRANCH"),
-        "head_branch": os.getenv("PR_HEAD_BRANCH"),
-    }
-
-    # Get review style - default to standard
-    review_style = os.getenv("REVIEW_STYLE", "standard").lower()
-    if review_style not in ("standard", "roasted"):
-        logger.warning(f"Unknown REVIEW_STYLE '{review_style}', using 'standard'")
-        review_style = "standard"
-
-    logger.info(f"Reviewing PR #{pr_info['number']}: {pr_info['title']}")
-    logger.info(f"Review style: {review_style}")
-
-    try:
-        pr_diff = get_truncated_pr_diff()
-        logger.info(f"Got PR diff with {len(pr_diff)} characters")
-
-        # Get the HEAD commit SHA for inline comments
-        commit_id = get_head_commit_sha()
-        logger.info(f"HEAD commit SHA: {commit_id}")
-
-        # Fetch previous review context (comments, threads, resolution status)
-        pr_number = pr_info.get("number", "")
-        review_context = get_pr_review_context(pr_number)
-        if review_context:
-            logger.info(f"Got review context with {len(review_context)} characters")
-        else:
-            logger.info("No previous review context found")
-
-        # Create the review prompt using the template
-        # Include the skill trigger keyword to activate the appropriate skill
-        skill_trigger = (
-            "/codereview" if review_style == "standard" else "/codereview-roasted"
-        )
-        prompt = format_prompt(
-            skill_trigger=skill_trigger,
-            title=pr_info.get("title", "N/A"),
-            body=pr_info.get("body") or "No description provided",
-            repo_name=pr_info.get("repo_name", "N/A"),
-            base_branch=pr_info.get("base_branch", "main"),
-            head_branch=pr_info.get("head_branch", "N/A"),
-            pr_number=pr_number,
-            commit_id=commit_id,
-            diff=pr_diff,
-            review_context=review_context,
-        )
-
-        # Configure LLM
-        api_key = os.getenv("LLM_API_KEY")
-        model = os.getenv("LLM_MODEL", "anthropic/claude-sonnet-4-5-20250929")
-        base_url = os.getenv("LLM_BASE_URL")
-
-        llm_config = {
-            "model": model,
-            "api_key": api_key,
-            "usage_id": "pr_review_agent",
-            "drop_params": True,
-        }
-
-        if base_url:
-            llm_config["base_url"] = base_url
-
-        llm = LLM(**llm_config)
-
-        # Get the current working directory as workspace
-        cwd = os.getcwd()
-
-        # Load project-specific skills from the repository being reviewed
-        # This includes AGENTS.md, .cursorrules, and skills from .agents/skills/
-        project_skills = load_project_skills(cwd)
-        logger.info(
-            f"Loaded {len(project_skills)} project skills: "
-            f"{[s.name for s in project_skills]}"
-        )
-
-        # Create AgentContext with public skills enabled and project skills
-        # Public skills from https://github.com/OpenHands/extensions include:
-        # - /codereview: Standard code review skill
-        # - /codereview-roasted: Linus Torvalds style brutally honest review
-        # Project skills include repo-specific guidance (AGENTS.md, etc.)
-        agent_context = AgentContext(
-            load_public_skills=True,
-            skills=project_skills,
-        )
-
-        # Create agent with default tools and agent context
-        # Note: agent_context must be passed at initialization since Agent is frozen
-        agent = Agent(
-            llm=llm,
-            tools=get_default_tools(enable_browser=False),  # CLI mode - no browser
-            agent_context=agent_context,
-            system_prompt_kwargs={"cli_mode": True},
-            condenser=get_default_condenser(
-                llm=llm.model_copy(update={"usage_id": "condenser"})
-            ),
-        )
-
-        # Create conversation with secrets for masking
-        # These secrets will be masked in agent output to prevent accidental exposure
-        secrets = {}
-        if api_key:
-            secrets["LLM_API_KEY"] = api_key
-        if github_token:
-            secrets["GITHUB_TOKEN"] = github_token
-
-        conversation = Conversation(
-            agent=agent,
-            workspace=cwd,
-            secrets=secrets,
-        )
-
-        logger.info("Starting PR review analysis...")
-        logger.info("Agent received the PR diff in the initial message")
-        logger.info(f"Using skill trigger: {skill_trigger}")
-        logger.info("Agent will post inline review comments directly via GitHub API")
-
-        # Send the prompt and run the agent
-        # The agent will analyze the code and post inline review comments
-        # directly to the PR using the GitHub API
-        conversation.send_message(prompt)
-        conversation.run()
-
-        # The agent should have posted review comments via GitHub API
-        # Log the final response for debugging purposes
-        review_content = get_agent_final_response(conversation.state.events)
-        if review_content:
-            logger.info(f"Agent final response: {len(review_content)} characters")
-
-        # Print cost information for CI output
-        metrics = conversation.conversation_stats.get_combined_metrics()
-        print("\n=== PR Review Cost Summary ===")
-        print(f"Total Cost: ${metrics.accumulated_cost:.6f}")
-        if metrics.accumulated_token_usage:
-            token_usage = metrics.accumulated_token_usage
-            print(f"Prompt Tokens: {token_usage.prompt_tokens}")
-            print(f"Completion Tokens: {token_usage.completion_tokens}")
-            if token_usage.cache_read_tokens > 0:
-                print(f"Cache Read Tokens: {token_usage.cache_read_tokens}")
-            if token_usage.cache_write_tokens > 0:
-                print(f"Cache Write Tokens: {token_usage.cache_write_tokens}")
-
-        # Capture and store trace context for delayed evaluation
-        # When the PR is merged/closed, we can use this context to add the
-        # evaluation span to the same trace, enabling signals to analyze both
-        # the original review and evaluation together.
-        # Note: Laminar methods gracefully handle the uninitialized case by
-        # returning None or early-returning, so no try/except needed.
-        trace_id = Laminar.get_trace_id()
-        # Use model_dump(mode='json') to ensure UUIDs are serialized as strings
-        # for JSON compatibility. get_laminar_span_context_dict() returns UUID
-        # objects which are not JSON serializable.
-        laminar_span_context = Laminar.get_laminar_span_context()
-        span_context = (
-            laminar_span_context.model_dump(mode="json")
-            if laminar_span_context
-            else None
-        )
-
-        if trace_id and laminar_span_context:
-            # Set trace metadata within an active span context
-            # Using start_as_current_span with parent_span_context to continue the trace
-            with Laminar.start_as_current_span(
-                name="pr-review-metadata",
-                parent_span_context=laminar_span_context,
-            ) as _:
-                # Set trace metadata within this active span context
-                # Include model for A/B testing analysis
-                pr_url = f"https://github.com/{pr_info['repo_name']}/pull/{pr_info['number']}"
-                Laminar.set_trace_metadata(
-                    {
-                        "pr_number": pr_info["number"],
-                        "repo_name": pr_info["repo_name"],
-                        "pr_url": pr_url,
-                        "workflow_phase": "review",
-                        "review_style": review_style,
-                        "model": model,
-                    }
-                )
-
-            # Store trace context in file for GitHub artifact upload
-            # This allows the evaluation workflow to add its span to this trace
-            # The span_context includes trace_id, span_id, and span_path needed
-            # to continue the trace across separate workflow runs.
-            trace_data = {
-                "trace_id": str(trace_id),
-                "span_context": span_context,
-                "pr_number": pr_info["number"],
-                "repo_name": pr_info["repo_name"],
-                "commit_id": commit_id,
-                "review_style": review_style,
-                "model": model,
-            }
-            with open("laminar_trace_info.json", "w") as f:
-                json.dump(trace_data, f, indent=2)
-            logger.info(f"Laminar trace ID: {trace_id}")
-            logger.info(f"Model used: {model}")
-            if span_context:
-                logger.info("Laminar span context captured for trace continuation")
-            print("\n=== Laminar Trace ===")
-            print(f"Trace ID: {trace_id}")
-
-            # Ensure trace is flushed to Laminar before workflow ends
-            Laminar.flush()
-        else:
-            logger.warning(
-                "No Laminar trace ID found - observability may not be enabled"
-            )
-
-        logger.info("PR review completed successfully")
-
-    except Exception as e:
-        logger.error(f"PR review failed: {e}")
-        sys.exit(1)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/examples/03_github_workflows/02_pr_review/prompt.py b/examples/03_github_workflows/02_pr_review/prompt.py
deleted file mode 100644
index 31cbd2dd1c..0000000000
--- a/examples/03_github_workflows/02_pr_review/prompt.py
+++ /dev/null
@@ -1,112 +0,0 @@
-"""
-PR Review Prompt Template
-
-This module contains the prompt template used by the OpenHands agent
-for conducting pull request reviews.
-
-The template uses skill triggers:
-- {skill_trigger} will be replaced with '/codereview' or '/codereview-roasted'
-- /github-pr-review provides instructions for posting review comments via GitHub API
-
-The template includes:
-- {diff} - The complete git diff for the PR (may be truncated for large files)
-- {pr_number} - The PR number
-- {commit_id} - The HEAD commit SHA
-- {review_context} - Previous review comments and thread resolution status
-"""
-
-# Template for when there is review context available
-_REVIEW_CONTEXT_SECTION = """
-## Previous Review History
-
-The following shows previous reviews and review threads on this PR. Pay attention to:
-- **Unresolved threads**: These issues may still need to be addressed
-- **Resolved threads**: These provide context on what was already discussed
-- **Previous review decisions**: See what other reviewers have said
-
-{review_context}
-
-When reviewing, consider:
-1. Don't repeat comments that have already been made and are still relevant
-2. If an issue is still unresolved in the code, you may reference it
-3. If resolved, don't bring it up unless the fix introduced new problems
-4. Focus on NEW issues in the current diff that haven't been discussed yet
-"""
-
-PROMPT = """{skill_trigger}
-/github-pr-review
-
-When posting a review, keep the review body brief unless your active review
-instructions require a longer structured format.
-
-Review the PR changes below and identify issues that need to be addressed.
-
-## Pull Request Information
-- **Title**: {title}
-- **Description**: {body}
-- **Repository**: {repo_name}
-- **Base Branch**: {base_branch}
-- **Head Branch**: {head_branch}
-- **PR Number**: {pr_number}
-- **Commit ID**: {commit_id}
-{review_context_section}
-## Git Diff
-
-```diff
-{diff}
-```
-
-Analyze the changes and post your review using the GitHub API.
-"""
-
-
-def format_prompt(
-    skill_trigger: str,
-    title: str,
-    body: str,
-    repo_name: str,
-    base_branch: str,
-    head_branch: str,
-    pr_number: str,
-    commit_id: str,
-    diff: str,
-    review_context: str = "",
-) -> str:
-    """Format the PR review prompt with all parameters.
-
-    Args:
-        skill_trigger: The skill trigger (e.g., '/codereview' or '/codereview-roasted')
-        title: PR title
-        body: PR description
-        repo_name: Repository name (owner/repo)
-        base_branch: Base branch name
-        head_branch: Head branch name
-        pr_number: PR number
-        commit_id: HEAD commit SHA
-        diff: Git diff content
-        review_context: Formatted previous review context. If empty or whitespace-only,
-            the review context section is omitted from the prompt.
-
-    Returns:
-        Formatted prompt string
-    """
-    # Only include the review context section if there is actual context
-    if review_context and review_context.strip():
-        review_context_section = _REVIEW_CONTEXT_SECTION.format(
-            review_context=review_context
-        )
-    else:
-        review_context_section = ""
-
-    return PROMPT.format(
-        skill_trigger=skill_trigger,
-        title=title,
-        body=body,
-        repo_name=repo_name,
-        base_branch=base_branch,
-        head_branch=head_branch,
-        pr_number=pr_number,
-        commit_id=commit_id,
-        review_context_section=review_context_section,
-        diff=diff,
-    )
diff --git a/examples/03_github_workflows/02_pr_review/workflow.yml b/examples/03_github_workflows/02_pr_review/workflow.yml
index aab54c8c53..944bb04e9f 100644
--- a/examples/03_github_workflows/02_pr_review/workflow.yml
+++ b/examples/03_github_workflows/02_pr_review/workflow.yml
@@ -49,8 +49,8 @@ jobs:
                   llm-base-url: ''
                   # Review style: roasted (other option: standard)
                   review-style: roasted
-                  # SDK version to use (version tag or branch name)
-                  sdk-version: main
+                  # Extensions version to use (version tag or branch name)
+                  extensions-version: main
                   # Secrets
                   llm-api-key: ${{ secrets.LLM_API_KEY }}
                   github-token: ${{ secrets.GITHUB_TOKEN }}

From 8a8e28b00850dc3e8522594a977dc8f24d957156 Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Mon, 9 Mar 2026 23:21:27 +0000
Subject: [PATCH 2/3] Remove pr-review action and inline logic into workflows

---
 .github/actions/pr-review/action.yml          | 158 ------------------
 .github/workflows/pr-review-by-openhands.yml  |  91 ++++++++--
 .../02_pr_review/README.md                    |  80 ++++-----
 .../02_pr_review/action.yml                   |   1 -
 .../02_pr_review/workflow.yml                 |  82 +++++++--
 5 files changed, 178 insertions(+), 234 deletions(-)
 delete mode 100644 .github/actions/pr-review/action.yml
 delete mode 120000 examples/03_github_workflows/02_pr_review/action.yml

diff --git a/.github/actions/pr-review/action.yml b/.github/actions/pr-review/action.yml
deleted file mode 100644
index 8b584a6b80..0000000000
--- a/.github/actions/pr-review/action.yml
+++ /dev/null
@@ -1,158 +0,0 @@
----
-name: OpenHands PR Review
-description: Automated PR review using OpenHands agent
-author: OpenHands
-
-branding:
-    icon: code
-    color: blue
-
-inputs:
-    llm-model:
-        description: >
-            LLM model to use for the review. Can be a comma-separated list
-            for A/B testing - one model will be randomly selected per review.
-            Example: 'model-a' or 'model-a,model-b,model-c'
-        required: false
-        default: anthropic/claude-sonnet-4-5-20250929
-    llm-base-url:
-        description: LLM base URL (optional, for custom LLM endpoints)
-        required: false
-        default: ''
-    review-style:
-        description: "Review style: 'standard' (balanced review covering style, readability, and security) or 'roasted' (Linus Torvalds-style brutally honest
-            feedback focusing on data structures, simplicity, and pragmatism)"
-        required: false
-        default: roasted
-    extensions-repo:
-        description: GitHub repository for extensions (owner/repo)
-        required: false
-        default: OpenHands/extensions
-    extensions-version:
-        description: Git ref to use for extensions (tag, branch, or commit SHA, e.g., v1.0.0, main, or abc1234)
-        required: false
-        default: main
-    llm-api-key:
-        description: LLM API key (required)
-        required: true
-    github-token:
-        description: GitHub token for API access (required)
-        required: true
-    lmnr-api-key:
-        description: Laminar API key for observability (optional)
-        required: false
-        default: ''
-
-runs:
-    using: composite
-    steps:
-        - name: Checkout extensions repository
-          uses: actions/checkout@v4
-          with:
-              repository: ${{ inputs.extensions-repo }}
-              ref: ${{ inputs.extensions-version }}
-              path: extensions
-
-        - name: Checkout PR repository
-          uses: actions/checkout@v4
-          with:
-              repository: ${{ github.event.pull_request.head.repo.full_name }}
-              ref: ${{ github.event.pull_request.head.ref }}
-              fetch-depth: 0
-              persist-credentials: false
-              path: pr-repo
-
-        - name: Set up Python
-          uses: actions/setup-python@v5
-          with:
-              python-version: '3.12'
-
-        # Security: this workflow executes untrusted PR content (diff/title/body) via an
-        # LLM-powered reviewer agent that can run Bash. GitHub Actions caches are shared
-        # across workflows within a repository and can enable cache-poisoning pivots into
-        # more-privileged workflows. Keep caching disabled here.
-        - name: Install uv
-          uses: astral-sh/setup-uv@v6
-          with:
-              enable-cache: false
-
-        - name: Install GitHub CLI
-          shell: bash
-          run: |
-              sudo apt-get update
-              sudo apt-get install -y gh
-
-        - name: Install OpenHands dependencies
-          shell: bash
-          run: |
-              # Install openhands SDK and tools from PyPI
-              uv pip install --system openhands-sdk openhands-tools lmnr
-
-        - name: Check required configuration and select model
-          id: select-model
-          shell: bash
-          env:
-              LLM_API_KEY: ${{ inputs.llm-api-key }}
-              GITHUB_TOKEN: ${{ inputs.github-token }}
-          run: |
-              if [ -z "$LLM_API_KEY" ]; then
-                echo "Error: llm-api-key is required."
-                exit 1
-              fi
-              if [ -z "$GITHUB_TOKEN" ]; then
-                echo "Error: github-token is required."
-                exit 1
-              fi
-
-              # Select one model randomly from the comma-separated list
-              MODELS_LIST="${{ inputs.llm-model }}"
-              SELECTED_MODEL=$(echo "$MODELS_LIST" | tr ',' '\n' | shuf -n 1 | xargs)
-              echo "selected_model=$SELECTED_MODEL" >> $GITHUB_OUTPUT
-
-              echo "PR Number: ${{ github.event.pull_request.number }}"
-              echo "PR Title: ${{ github.event.pull_request.title }}"
-              echo "Repository: ${{ github.repository }}"
-              echo "Extensions Version: ${{ inputs.extensions-version }}"
-              echo "Available models: $MODELS_LIST"
-              echo "Selected LLM model: $SELECTED_MODEL"
-              if [ -n "${{ inputs.llm-base-url }}" ]; then
-                echo "LLM base URL: ${{ inputs.llm-base-url }}"
-              fi
-
-        - name: Run PR review
-          shell: bash
-          env:
-              LLM_MODEL: ${{ steps.select-model.outputs.selected_model }}
-              LLM_BASE_URL: ${{ inputs.llm-base-url }}
-              REVIEW_STYLE: ${{ inputs.review-style }}
-              LLM_API_KEY: ${{ inputs.llm-api-key }}
-              GITHUB_TOKEN: ${{ inputs.github-token }}
-              LMNR_PROJECT_API_KEY: ${{ inputs.lmnr-api-key }}
-              PR_NUMBER: ${{ github.event.pull_request.number }}
-              PR_TITLE: ${{ github.event.pull_request.title }}
-              PR_BODY: ${{ github.event.pull_request.body }}
-              PR_BASE_BRANCH: ${{ github.event.pull_request.base.ref }}
-              PR_HEAD_BRANCH: ${{ github.event.pull_request.head.ref }}
-              REPO_NAME: ${{ github.repository }}
-          run: |
-              cd pr-repo
-              uv run python ../extensions/plugins/pr-review/scripts/agent_script.py
-
-        - name: Upload logs as artifact
-          uses: actions/upload-artifact@v4
-          if: always()
-          with:
-              name: openhands-pr-review-logs
-              path: |
-                  *.log
-                  output/
-              retention-days: 7
-
-        - name: Upload Laminar trace info for evaluation
-          uses: actions/upload-artifact@v4
-          if: success()
-          with:
-              name: pr-review-trace-${{ github.event.pull_request.number }}
-              path: pr-repo/laminar_trace_info.json
-              retention-days: 30
-              if-no-files-found: ignore
diff --git a/.github/workflows/pr-review-by-openhands.yml b/.github/workflows/pr-review-by-openhands.yml
index ec8dc3786c..668dc695b1 100644
--- a/.github/workflows/pr-review-by-openhands.yml
+++ b/.github/workflows/pr-review-by-openhands.yml
@@ -37,17 +37,82 @@ jobs:
             cancel-in-progress: true
         runs-on: ubuntu-24.04
         steps:
-            - name: Run PR Review
-              uses: OpenHands/software-agent-sdk/.github/actions/pr-review@main
+            - name: Checkout extensions repository
+              uses: actions/checkout@v4
               with:
-                  # LLM model(s) to use. Can be comma-separated for A/B testing
-                  # - one model will be randomly selected per review
-                  llm-model: litellm_proxy/claude-sonnet-4-5-20250929
-                  llm-base-url: https://llm-proxy.app.all-hands.dev
-                  # Review style: roasted (other option: standard)
-                  review-style: roasted
-                  # Use the PR's head commit SHA to test SDK changes on the SDK repo itself
-                  sdk-version: ${{ github.event.pull_request.head.sha }}
-                  llm-api-key: ${{ secrets.LLM_API_KEY }}
-                  github-token: ${{ secrets.ALLHANDS_BOT_GITHUB_PAT }}
-                  lmnr-api-key: ${{ secrets.LMNR_SKILLS_API_KEY }}
+                  repository: OpenHands/extensions
+                  ref: main
+                  path: extensions
+
+            - name: Checkout PR repository
+              uses: actions/checkout@v4
+              with:
+                  repository: ${{ github.event.pull_request.head.repo.full_name }}
+                  ref: ${{ github.event.pull_request.head.ref }}
+                  fetch-depth: 0
+                  persist-credentials: false
+                  path: pr-repo
+
+            - name: Set up Python
+              uses: actions/setup-python@v5
+              with:
+                  python-version: '3.12'
+
+            # Security: this workflow executes untrusted PR content (diff/title/body) via an
+            # LLM-powered reviewer agent that can run Bash. GitHub Actions caches are shared
+            # across workflows within a repository and can enable cache-poisoning pivots into
+            # more-privileged workflows. Keep caching disabled here.
+            - name: Install uv
+              uses: astral-sh/setup-uv@v6
+              with:
+                  enable-cache: false
+
+            - name: Install GitHub CLI
+              shell: bash
+              run: |
+                  sudo apt-get update
+                  sudo apt-get install -y gh
+
+            - name: Install OpenHands dependencies
+              shell: bash
+              run: |
+                  # Install openhands SDK and tools from PyPI
+                  uv pip install --system openhands-sdk openhands-tools lmnr
+
+            - name: Run PR review
+              shell: bash
+              env:
+                  LLM_MODEL: litellm_proxy/claude-sonnet-4-5-20250929
+                  LLM_BASE_URL: https://llm-proxy.app.all-hands.dev
+                  REVIEW_STYLE: roasted
+                  LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+                  GITHUB_TOKEN: ${{ secrets.ALLHANDS_BOT_GITHUB_PAT }}
+                  LMNR_PROJECT_API_KEY: ${{ secrets.LMNR_SKILLS_API_KEY }}
+                  PR_NUMBER: ${{ github.event.pull_request.number }}
+                  PR_TITLE: ${{ github.event.pull_request.title }}
+                  PR_BODY: ${{ github.event.pull_request.body }}
+                  PR_BASE_BRANCH: ${{ github.event.pull_request.base.ref }}
+                  PR_HEAD_BRANCH: ${{ github.event.pull_request.head.ref }}
+                  REPO_NAME: ${{ github.repository }}
+              run: |
+                  cd pr-repo
+                  uv run python ../extensions/plugins/pr-review/scripts/agent_script.py
+
+            - name: Upload logs as artifact
+              uses: actions/upload-artifact@v4
+              if: always()
+              with:
+                  name: openhands-pr-review-logs
+                  path: |
+                      *.log
+                      output/
+                  retention-days: 7
+
+            - name: Upload Laminar trace info for evaluation
+              uses: actions/upload-artifact@v4
+              if: success()
+              with:
+                  name: pr-review-trace-${{ github.event.pull_request.number }}
+                  path: pr-repo/laminar_trace_info.json
+                  retention-days: 30
+                  if-no-files-found: ignore
diff --git a/examples/03_github_workflows/02_pr_review/README.md b/examples/03_github_workflows/02_pr_review/README.md
index 77edf2b6e9..883ec2a4c9 100644
--- a/examples/03_github_workflows/02_pr_review/README.md
+++ b/examples/03_github_workflows/02_pr_review/README.md
@@ -2,12 +2,11 @@
 
 This example demonstrates how to set up a GitHub Actions workflow for automated pull request reviews using the OpenHands agent SDK. When a PR is labeled with `review-this` or when openhands-agent is added as a reviewer, OpenHands will analyze the changes and provide detailed, constructive feedback.
 
-**Note**: The actual review scripts now live in the [OpenHands/extensions](https://github.com/OpenHands/extensions/tree/main/plugins/pr-review) repository. This directory contains the GitHub Action that references those scripts.
+**Note**: The actual review scripts now live in the [OpenHands/extensions](https://github.com/OpenHands/extensions/tree/main/plugins/pr-review) repository. This directory contains an example workflow that references those scripts.
 
 ## Files
 
-- **`action.yml`**: Composite GitHub Action that loads scripts from the extensions repository
-- **`workflow.yml`**: Example GitHub Actions workflow file that uses the composite action
+- **`workflow.yml`**: Example GitHub Actions workflow file that runs the PR review agent
 - **`evaluate_review.py`**: Script to evaluate review effectiveness when PR is closed
 - **`README.md`**: This documentation file
 
@@ -63,25 +62,20 @@ Set the following secrets in your GitHub repository settings:
 
 ### 3. Customize the workflow (optional)
 
-Edit `.github/workflows/pr-review-by-openhands.yml` to customize the inputs:
+Edit `.github/workflows/pr-review-by-openhands.yml` to customize the environment variables:
 
 ```yaml
-- name: Run PR Review
-  uses: ./.github/actions/pr-review
-  with:
-      # LLM model(s) to use. Can be comma-separated for A/B testing
-      # - one model will be randomly selected per review
-      llm-model: anthropic/claude-sonnet-4-5-20250929
-      llm-base-url: ''
-      # Review style: roasted (other option: standard)
-      review-style: roasted
-      # Extensions git ref to use (tag, branch, or commit SHA, e.g., 'v1.0.0', 'main', or 'abc1234')
-      extensions-version: main
-      # Optional: override the extensions repo (owner/repo) if you forked it
-      extensions-repo: OpenHands/extensions
-      # Secrets
-      llm-api-key: ${{ secrets.LLM_API_KEY }}
-      github-token: ${{ secrets.GITHUB_TOKEN }}
+            - name: Run PR review
+              shell: bash
+              env:
+                  # Customize these variables as needed
+                  LLM_MODEL: anthropic/claude-3-5-sonnet-20240620
+                  LLM_BASE_URL: ''
+                  REVIEW_STYLE: roasted
+                  # Secrets
+                  LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+                  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+                  LMNR_PROJECT_API_KEY: ${{ secrets.LMNR_PROJECT_API_KEY }}
 ```
 
 ### 4. Create the review label
@@ -172,27 +166,22 @@ You are a code reviewer for this project. Follow these guidelines:
 
 See the [software-agent-sdk's own code-review skill](https://github.com/OpenHands/software-agent-sdk/blob/main/.agents/skills/code-review.md) for a complete example of a custom code review skill.
 
-## Composite Action
+## Workflow Configuration
 
-This workflow uses a reusable composite action located in this directory (`action.yml`). The composite action handles:
+The workflow is configured using environment variables in the `Run PR review` step.
 
-- Checking out the extensions repository at the specified version
-- Setting up Python and dependencies
-- Running the PR review agent (from extensions repo)
-- Uploading logs as artifacts
+### Environment Variables
 
-### Action Inputs
+| Variable | Description | Default Example |
+|----------|-------------|---------|
+| `LLM_MODEL` | LLM model(s) - can be comma-separated for A/B testing | `anthropic/claude-3-5-sonnet-20240620` |
+| `LLM_BASE_URL` | LLM base URL (optional) | `''` |
+| `REVIEW_STYLE` | Review style: 'standard' or 'roasted' | `roasted` |
+| `LLM_API_KEY` | LLM API key | `${{ secrets.LLM_API_KEY }}` |
+| `GITHUB_TOKEN` | GitHub token for API access | `${{ secrets.GITHUB_TOKEN }}` |
+| `LMNR_PROJECT_API_KEY` | Laminar API key for observability (optional) | `${{ secrets.LMNR_PROJECT_API_KEY }}` |
 
-| Input | Description | Required | Default |
-|-------|-------------|----------|---------|
-| `llm-model` | LLM model(s) - can be comma-separated for A/B testing | No | `anthropic/claude-sonnet-4-5-20250929` |
-| `llm-base-url` | LLM base URL (optional) | No | `''` |
-| `review-style` | Review style: 'standard' or 'roasted' | No | `roasted` |
-| `extensions-version` | Git ref for extensions (tag, branch, or commit SHA) | No | `main` |
-| `extensions-repo` | Extensions repository (owner/repo) | No | `OpenHands/extensions` |
-| `llm-api-key` | LLM API key | Yes | - |
-| `github-token` | GitHub token for API access | Yes | - |
-| `lmnr-api-key` | Laminar API key for observability (optional) | No | - |
+To use a specific version of the extensions repository, modify the `Checkout extensions repository` step in the workflow file.
 
 ## A/B Testing with Multiple Models
 
@@ -200,16 +189,17 @@ The PR review workflow supports A/B testing different LLM models. When multiple
 
 ### Configuration
 
-Specify multiple models as a comma-separated list in the `llm-model` parameter:
+Specify multiple models as a comma-separated list in the `LLM_MODEL` environment variable:
 
 ```yaml
-- name: Run PR Review
-  uses: ./.github/actions/pr-review
-  with:
-      # Multiple models for A/B testing - one will be randomly selected
-      llm-model: 'litellm_proxy/claude-sonnet-4-5-20250929,litellm_proxy/gpt-4.1-2025-04-14'
-      llm-api-key: ${{ secrets.LLM_API_KEY }}
-      github-token: ${{ secrets.GITHUB_TOKEN }}
+            - name: Run PR review
+              shell: bash
+              env:
+                  # Multiple models for A/B testing - one will be randomly selected
+                  LLM_MODEL: 'anthropic/claude-3-5-sonnet-20240620,gpt-4'
+                  LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+                  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+                  # ... other variables
 ```
 
 ### Observability
diff --git a/examples/03_github_workflows/02_pr_review/action.yml b/examples/03_github_workflows/02_pr_review/action.yml
deleted file mode 120000
index 5be4828323..0000000000
--- a/examples/03_github_workflows/02_pr_review/action.yml
+++ /dev/null
@@ -1 +0,0 @@
-../../../.github/actions/pr-review/action.yml
\ No newline at end of file
diff --git a/examples/03_github_workflows/02_pr_review/workflow.yml b/examples/03_github_workflows/02_pr_review/workflow.yml
index 944bb04e9f..908d9f4d5c 100644
--- a/examples/03_github_workflows/02_pr_review/workflow.yml
+++ b/examples/03_github_workflows/02_pr_review/workflow.yml
@@ -32,25 +32,73 @@ jobs:
             github.event.requested_reviewer.login == 'openhands-agent'
         runs-on: ubuntu-latest
         steps:
-            - name: Checkout for composite action
+            - name: Checkout extensions repository
               uses: actions/checkout@v4
               with:
-                  repository: OpenHands/software-agent-sdk
-                  # Use a specific version tag or branch (e.g., 'v1.0.0' or 'main')
+                  repository: OpenHands/extensions
                   ref: main
-                  sparse-checkout: .github/actions/pr-review
+                  path: extensions
 
-            - name: Run PR Review
-              uses: ./.github/actions/pr-review
+            - name: Checkout PR repository
+              uses: actions/checkout@v4
+              with:
+                  repository: ${{ github.event.pull_request.head.repo.full_name }}
+                  ref: ${{ github.event.pull_request.head.ref }}
+                  fetch-depth: 0
+                  persist-credentials: false
+                  path: pr-repo
+
+            - name: Set up Python
+              uses: actions/setup-python@v5
+              with:
+                  python-version: '3.12'
+
+            # Security: this workflow executes untrusted PR content (diff/title/body) via an
+            # LLM-powered reviewer agent that can run Bash. GitHub Actions caches are shared
+            # across workflows within a repository and can enable cache-poisoning pivots into
+            # more-privileged workflows. Keep caching disabled here.
+            - name: Install uv
+              uses: astral-sh/setup-uv@v6
+              with:
+                  enable-cache: false
+
+            - name: Install GitHub CLI
+              shell: bash
+              run: |
+                  sudo apt-get update
+                  sudo apt-get install -y gh
+
+            - name: Install OpenHands dependencies
+              shell: bash
+              run: |
+                  # Install openhands SDK and tools from PyPI
+                  uv pip install --system openhands-sdk openhands-tools lmnr
+
+            - name: Run PR review
+              shell: bash
+              env:
+                  LLM_MODEL: anthropic/claude-3-5-sonnet-20240620
+                  LLM_BASE_URL: ''
+                  REVIEW_STYLE: roasted
+                  LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+                  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+                  LMNR_PROJECT_API_KEY: ${{ secrets.LMNR_PROJECT_API_KEY }}
+                  PR_NUMBER: ${{ github.event.pull_request.number }}
+                  PR_TITLE: ${{ github.event.pull_request.title }}
+                  PR_BODY: ${{ github.event.pull_request.body }}
+                  PR_BASE_BRANCH: ${{ github.event.pull_request.base.ref }}
+                  PR_HEAD_BRANCH: ${{ github.event.pull_request.head.ref }}
+                  REPO_NAME: ${{ github.repository }}
+              run: |
+                  cd pr-repo
+                  uv run python ../extensions/plugins/pr-review/scripts/agent_script.py
+
+            - name: Upload logs as artifact
+              uses: actions/upload-artifact@v4
+              if: always()
               with:
-                  # LLM model(s) to use. Can be comma-separated for A/B testing
-                  # - one model will be randomly selected per review
-                  llm-model: anthropic/claude-sonnet-4-5-20250929
-                  llm-base-url: ''
-                  # Review style: roasted (other option: standard)
-                  review-style: roasted
-                  # Extensions version to use (version tag or branch name)
-                  extensions-version: main
-                  # Secrets
-                  llm-api-key: ${{ secrets.LLM_API_KEY }}
-                  github-token: ${{ secrets.GITHUB_TOKEN }}
+                  name: openhands-pr-review-logs
+                  path: |
+                      *.log
+                      output/
+                  retention-days: 7

From d17e67c73aee1e58ba40ccbf500d99f616763807 Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Mon, 9 Mar 2026 23:41:11 +0000
Subject: [PATCH 3/3] Remove evaluate_review.py and update workflows to use
 extensions repo

---
 .github/workflows/pr-review-evaluation.yml    |  16 +-
 .../02_pr_review/README.md                    |   1 -
 .../02_pr_review/evaluate_review.py           | 442 ------------------
 3 files changed, 8 insertions(+), 451 deletions(-)
 delete mode 100644 examples/03_github_workflows/02_pr_review/evaluate_review.py

diff --git a/.github/workflows/pr-review-evaluation.yml b/.github/workflows/pr-review-evaluation.yml
index 33f4560971..1b080d54ff 100644
--- a/.github/workflows/pr-review-evaluation.yml
+++ b/.github/workflows/pr-review-evaluation.yml
@@ -57,22 +57,22 @@ jobs:
                     echo "This PR may not have been reviewed by the agent, skipping evaluation"
                   fi
 
-            - name: Checkout software-agent-sdk repository
+            - name: Checkout extensions repository
               if: steps.check-trace.outputs.trace_exists == 'true'
-              uses: actions/checkout@v5
+              uses: actions/checkout@v4
               with:
-                  repository: OpenHands/software-agent-sdk
-                  path: software-agent-sdk
+                  repository: OpenHands/extensions
+                  path: extensions
 
             - name: Set up Python
               if: steps.check-trace.outputs.trace_exists == 'true'
-              uses: actions/setup-python@v6
+              uses: actions/setup-python@v5
               with:
-                  python-version: '3.13'
+                  python-version: '3.12'
 
             - name: Install uv
               if: steps.check-trace.outputs.trace_exists == 'true'
-              uses: astral-sh/setup-uv@v7
+              uses: astral-sh/setup-uv@v6
               with:
                   enable-cache: true
 
@@ -92,7 +92,7 @@ jobs:
                   cp trace-info/laminar_trace_info.json .
 
                   # Run the evaluation script
-                  uv run python software-agent-sdk/examples/03_github_workflows/02_pr_review/evaluate_review.py
+                  uv run python extensions/plugins/pr-review/workflows/evaluate_review.py
 
             - name: Upload evaluation logs
               uses: actions/upload-artifact@v5
diff --git a/examples/03_github_workflows/02_pr_review/README.md b/examples/03_github_workflows/02_pr_review/README.md
index 883ec2a4c9..bca2f5cc99 100644
--- a/examples/03_github_workflows/02_pr_review/README.md
+++ b/examples/03_github_workflows/02_pr_review/README.md
@@ -7,7 +7,6 @@ This example demonstrates how to set up a GitHub Actions workflow for automated
 ## Files
 
 - **`workflow.yml`**: Example GitHub Actions workflow file that runs the PR review agent
-- **`evaluate_review.py`**: Script to evaluate review effectiveness when PR is closed
 - **`README.md`**: This documentation file
 
 ## Features
diff --git a/examples/03_github_workflows/02_pr_review/evaluate_review.py b/examples/03_github_workflows/02_pr_review/evaluate_review.py
deleted file mode 100644
index 9f8cf14362..0000000000
--- a/examples/03_github_workflows/02_pr_review/evaluate_review.py
+++ /dev/null
@@ -1,442 +0,0 @@
-#!/usr/bin/env python3
-"""
-PR Review Evaluation Script
-
-This script runs when a PR is merged or closed to evaluate how well the
-review comments were addressed. It creates an evaluation trace in Laminar
-that can be processed by a signal to determine review effectiveness.
-
-The evaluation flow:
-1. Read the original trace ID from the artifact
-2. Fetch PR review comments and thread discussion from GitHub
-3. Fetch the final patch/diff
-4. Create an evaluation span with all context
-5. Optionally score the original trace
-
-Environment Variables:
-    LMNR_PROJECT_API_KEY: Laminar project API key (required)
-    GITHUB_TOKEN: GitHub token for API access (required)
-    PR_NUMBER: Pull request number (required)
-    REPO_NAME: Repository name in format owner/repo (required)
-    PR_MERGED: Whether the PR was merged ('true' or 'false')
-"""
-
-import json
-
-# Configure logging
-import logging
-import os
-import sys
-import urllib.error
-import urllib.request
-from pathlib import Path
-
-from lmnr import Laminar, LaminarClient
-
-
-logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
-logger = logging.getLogger(__name__)
-
-
-def _get_required_env(name: str) -> str:
-    """Get a required environment variable or raise an error."""
-    value = os.getenv(name)
-    if not value:
-        raise ValueError(f"{name} environment variable is required")
-    return value
-
-
-def _get_github_headers() -> dict[str, str]:
-    """Get headers for GitHub API requests."""
-    token = _get_required_env("GITHUB_TOKEN")
-    return {
-        "Accept": "application/vnd.github.v3+json",
-        "Authorization": f"Bearer {token}",
-        "X-GitHub-Api-Version": "2022-11-28",
-    }
-
-
-def _get_agent_usernames() -> set[str]:
-    """Get the set of agent usernames to identify agent comments.
-
-    Configurable via AGENT_USERNAMES environment variable (comma-separated).
-    Defaults to 'openhands-agent,all-hands-bot'.
-    """
-    usernames = os.getenv("AGENT_USERNAMES", "openhands-agent,all-hands-bot")
-    return set(name.strip() for name in usernames.split(",") if name.strip())
-
-
-def _handle_github_api_error(e: urllib.error.HTTPError, context: str) -> None:
-    """Handle GitHub API errors with rate limit awareness."""
-    if e.code == 429:
-        retry_after = e.headers.get("Retry-After", "60")
-        logger.warning(f"Rate limited by GitHub API. Retry after {retry_after}s")
-    logger.error(f"Failed to {context}: HTTP {e.code}")
-
-
-def fetch_pr_review_comments(repo: str, pr_number: str) -> list[dict]:
-    """Fetch all review comments on a PR.
-
-    This includes inline code review comments, not regular PR comments.
-    """
-    url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}/comments"
-    request = urllib.request.Request(url, headers=_get_github_headers())
-
-    try:
-        with urllib.request.urlopen(request, timeout=60) as response:
-            return json.loads(response.read().decode("utf-8"))
-    except urllib.error.HTTPError as e:
-        _handle_github_api_error(e, "fetch review comments")
-        return []
-
-
-def fetch_pr_issue_comments(repo: str, pr_number: str) -> list[dict]:
-    """Fetch issue-style comments on a PR (the main thread)."""
-    url = f"https://api.github.com/repos/{repo}/issues/{pr_number}/comments"
-    request = urllib.request.Request(url, headers=_get_github_headers())
-
-    try:
-        with urllib.request.urlopen(request, timeout=60) as response:
-            return json.loads(response.read().decode("utf-8"))
-    except urllib.error.HTTPError as e:
-        _handle_github_api_error(e, "fetch issue comments")
-        return []
-
-
-def fetch_pr_reviews(repo: str, pr_number: str) -> list[dict]:
-    """Fetch all reviews on a PR (approve, request changes, comment)."""
-    url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}/reviews"
-    request = urllib.request.Request(url, headers=_get_github_headers())
-
-    try:
-        with urllib.request.urlopen(request, timeout=60) as response:
-            return json.loads(response.read().decode("utf-8"))
-    except urllib.error.HTTPError as e:
-        _handle_github_api_error(e, "fetch reviews")
-        return []
-
-
-def fetch_pr_diff(repo: str, pr_number: str) -> str:
-    """Fetch the final diff of the PR."""
-    url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}"
-    headers = _get_github_headers()
-    headers["Accept"] = "application/vnd.github.v3.diff"
-
-    request = urllib.request.Request(url, headers=headers)
-
-    try:
-        with urllib.request.urlopen(request, timeout=60) as response:
-            return response.read().decode("utf-8", errors="replace")
-    except urllib.error.HTTPError as e:
-        _handle_github_api_error(e, "fetch PR diff")
-        return ""
-
-
-def fetch_pr_info(repo: str, pr_number: str) -> dict:
-    """Fetch PR metadata."""
-    url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}"
-    request = urllib.request.Request(url, headers=_get_github_headers())
-
-    try:
-        with urllib.request.urlopen(request, timeout=60) as response:
-            return json.loads(response.read().decode("utf-8"))
-    except urllib.error.HTTPError as e:
-        _handle_github_api_error(e, "fetch PR info")
-        return {}
-
-
-def extract_agent_comments(
-    review_comments: list[dict], issue_comments: list[dict], reviews: list[dict]
-) -> list[dict]:
-    """Extract comments made by the review agent.
-
-    Agent usernames are configurable via AGENT_USERNAMES environment variable.
-    """
-    agent_users = _get_agent_usernames()
-    agent_comments = []
-
-    # Review comments (inline code comments)
-    for comment in review_comments:
-        if comment.get("user", {}).get("login") in agent_users:
-            agent_comments.append(
-                {
-                    "type": "review_comment",
-                    "id": comment.get("id"),
-                    "body": comment.get("body", ""),
-                    "path": comment.get("path"),
-                    "line": comment.get("line") or comment.get("original_line"),
-                    "created_at": comment.get("created_at"),
-                }
-            )
-
-    # Issue comments (main thread)
-    for comment in issue_comments:
-        if comment.get("user", {}).get("login") in agent_users:
-            agent_comments.append(
-                {
-                    "type": "issue_comment",
-                    "id": comment.get("id"),
-                    "body": comment.get("body", ""),
-                    "created_at": comment.get("created_at"),
-                }
-            )
-
-    # Review bodies
-    for review in reviews:
-        if review.get("user", {}).get("login") in agent_users and review.get("body"):
-            agent_comments.append(
-                {
-                    "type": "review",
-                    "id": review.get("id"),
-                    "body": review.get("body", ""),
-                    "state": review.get("state"),
-                    "created_at": review.get("submitted_at"),
-                }
-            )
-
-    return agent_comments
-
-
-def extract_human_responses(
-    review_comments: list[dict],
-    issue_comments: list[dict],
-    agent_users: set[str] | None = None,
-) -> list[dict]:
-    """Extract comments/responses from humans (non-agent users).
-
-    Agent usernames are configurable via AGENT_USERNAMES environment variable.
-    """
-    if agent_users is None:
-        agent_users = _get_agent_usernames()
-    human_responses = []
-
-    for comment in review_comments:
-        if comment.get("user", {}).get("login") not in agent_users:
-            human_responses.append(
-                {
-                    "type": "review_comment",
-                    "user": comment.get("user", {}).get("login"),
-                    "body": comment.get("body", ""),
-                    "in_reply_to_id": comment.get("in_reply_to_id"),
-                    "created_at": comment.get("created_at"),
-                }
-            )
-
-    for comment in issue_comments:
-        if comment.get("user", {}).get("login") not in agent_users:
-            human_responses.append(
-                {
-                    "type": "issue_comment",
-                    "user": comment.get("user", {}).get("login"),
-                    "body": comment.get("body", ""),
-                    "created_at": comment.get("created_at"),
-                }
-            )
-
-    return human_responses
-
-
-def truncate_text(text: str, max_chars: int = 50000) -> str:
-    """Truncate text to stay within reasonable API payload limits.
-
-    Max 50k chars chosen to stay well under typical API payload limits
-    while preserving enough context for evaluation. This keeps the
-    evaluation trace size manageable for Laminar processing.
-    """
-    if len(text) <= max_chars:
-        return text
-    return text[:max_chars] + f"\n\n... [truncated, {len(text)} total chars]"
-
-
-def main():
-    """Run the PR review evaluation."""
-    logger.info("Starting PR review evaluation...")
-
-    # Get required environment variables
-    pr_number = _get_required_env("PR_NUMBER")
-    repo_name = _get_required_env("REPO_NAME")
-    pr_merged = os.getenv("PR_MERGED", "false").lower() == "true"
-
-    logger.info(f"Evaluating PR #{pr_number} in {repo_name}")
-    logger.info(f"PR was merged: {pr_merged}")
-
-    # Read original trace info from artifact
-    trace_info_path = Path("laminar_trace_info.json")
-    original_trace_id = None
-    original_span_context = None
-    original_trace_data = {}
-
-    if trace_info_path.exists():
-        with open(trace_info_path) as f:
-            original_trace_data = json.load(f)
-            original_trace_id = original_trace_data.get("trace_id")
-            original_span_context = original_trace_data.get("span_context")
-            logger.info(f"Original trace ID: {original_trace_id}")
-            if original_span_context:
-                logger.info(
-                    "Found span context - will add evaluation to original trace"
-                )
-            else:
-                logger.info("No span context - evaluation will create standalone trace")
-    else:
-        logger.warning(
-            "No trace info file found - evaluation will create standalone trace"
-        )
-
-    # Fetch PR data from GitHub
-    logger.info("Fetching PR data from GitHub...")
-    review_comments = fetch_pr_review_comments(repo_name, pr_number)
-    issue_comments = fetch_pr_issue_comments(repo_name, pr_number)
-    reviews = fetch_pr_reviews(repo_name, pr_number)
-    final_diff = fetch_pr_diff(repo_name, pr_number)
-    pr_info = fetch_pr_info(repo_name, pr_number)
-
-    logger.info(f"Found {len(review_comments)} review comments")
-    logger.info(f"Found {len(issue_comments)} issue comments")
-    logger.info(f"Found {len(reviews)} reviews")
-
-    # Extract agent comments and human responses
-    agent_comments = extract_agent_comments(review_comments, issue_comments, reviews)
-    human_responses = extract_human_responses(review_comments, issue_comments)
-
-    logger.info(f"Agent made {len(agent_comments)} comments")
-    logger.info(f"Humans made {len(human_responses)} responses")
-
-    # Initialize Laminar for tracing
-    Laminar.initialize()
-
-    # Create evaluation context
-    evaluation_context = {
-        "pr_number": pr_number,
-        "repo_name": repo_name,
-        "pr_merged": pr_merged,
-        "pr_title": pr_info.get("title", ""),
-        "pr_state": pr_info.get("state", ""),
-        "original_trace_id": original_trace_id,
-        "agent_comments": agent_comments,
-        "human_responses": human_responses,
-        "final_diff": truncate_text(final_diff),
-        "total_review_comments": len(review_comments),
-        "total_issue_comments": len(issue_comments),
-    }
-
-    # Create an evaluation span that can be processed by a Laminar signal
-    # The signal will analyze the agent comments vs final diff to determine
-    # which suggestions were addressed.
-    #
-    # IMPORTANT: If we have the original span context, we use parent_span_context
-    # to add this span as a child of the original trace. This allows Laminar
-    # signals to operate on the complete trace (review + evaluation) together.
-    with Laminar.start_as_current_span(
-        name="pr_review_evaluation",
-        input=evaluation_context,
-        tags=["pr-review-evaluation"],
-        parent_span_context=original_span_context,
-    ):
-        # Set trace metadata for filtering and linking
-        Laminar.set_trace_metadata(
-            {
-                "original_trace_id": original_trace_id or "none",
-                "evaluation_type": "pr_review_effectiveness",
-                "pr_number": pr_number,
-                "repo_name": repo_name,
-                "pr_merged": str(pr_merged),
-            }
-        )
-
-        # Log summary for visibility
-        summary = {
-            "pr": f"{repo_name}#{pr_number}",
-            "merged": pr_merged,
-            "agent_comments_count": len(agent_comments),
-            "human_responses_count": len(human_responses),
-            "diff_length": len(final_diff),
-        }
-        logger.info(f"Evaluation summary: {json.dumps(summary)}")
-
-        # Set output with key metrics
-        Laminar.set_span_output(
-            {
-                "summary": summary,
-                "ready_for_signal": True,
-            }
-        )
-
-        # Capture trace ID while inside the span context
-        # (get_trace_id() returns None outside a span context)
-        eval_trace_id = Laminar.get_trace_id()
-
-    # Flush to ensure span is sent
-    Laminar.flush()
-
-    # If we have the original trace ID, we can also score it directly
-    # This provides immediate feedback without waiting for signal processing
-    if original_trace_id:
-        try:
-            client = LaminarClient()
-
-            # PLACEHOLDER SCORE: This is a simple engagement metric, NOT a measure
-            # of review effectiveness. The actual effectiveness score will come from
-            # the Laminar signal which analyzes whether suggestions were implemented.
-            #
-            # This score only indicates:
-            # - Whether humans responded to agent comments (engagement)
-            # - Whether the PR was merged (completion)
-            #
-            # It does NOT measure:
-            # - Whether agent suggestions were actually helpful
-            # - Whether suggestions were implemented in the final code
-            # - Quality of the review feedback
-            preliminary_score = 0.0
-            if agent_comments:
-                engagement_ratio = min(len(human_responses) / len(agent_comments), 1.0)
-                preliminary_score = engagement_ratio * 0.5  # Scale to 0-0.5
-
-                if pr_merged:
-                    preliminary_score += 0.3
-
-            client.evaluators.score(
-                name="review_engagement",
-                trace_id=original_trace_id,
-                score=preliminary_score,
-                metadata={
-                    "agent_comments": len(agent_comments),
-                    "human_responses": len(human_responses),
-                    "pr_merged": pr_merged,
-                    "note": "Placeholder - signal provides effectiveness analysis",
-                    "score_type": "engagement_only",
-                },
-            )
-            logger.info(
-                f"Added preliminary score {preliminary_score:.2f} "
-                f"to original trace {original_trace_id}"
-            )
-
-            # Tag the original trace to indicate evaluation was done
-            client.tags.tag(original_trace_id, ["evaluated", f"pr-{pr_number}"])
-            logger.info(f"Tagged original trace {original_trace_id}")
-
-        except Exception as e:
-            logger.warning(f"Failed to score original trace: {e}")
-            # Don't fail the workflow if scoring fails
-
-    # Print evaluation summary
-    print("\n=== PR Review Evaluation ===")
-    print(f"PR: {repo_name}#{pr_number}")
-    print(f"Merged: {pr_merged}")
-    print(f"Agent Comments: {len(agent_comments)}")
-    print(f"Human Responses: {len(human_responses)}")
-    if original_trace_id:
-        print(f"Original Review Trace: {original_trace_id}")
-    if eval_trace_id:
-        print(f"Evaluation Trace: {eval_trace_id}")
-
-    logger.info("PR review evaluation completed successfully")
-
-
-if __name__ == "__main__":
-    try:
-        main()
-    except Exception as e:
-        logger.error(f"Evaluation failed: {e}")
-        sys.exit(1)