diff --git a/.github/pr-visual/pr-3-b854247.png b/.github/pr-visual/pr-3-b854247.png new file mode 100644 index 0000000..1891990 Binary files /dev/null and b/.github/pr-visual/pr-3-b854247.png differ diff --git a/.github/pr-visual/pr-3-b854247.txt b/.github/pr-visual/pr-3-b854247.txt new file mode 100644 index 0000000..54782de --- /dev/null +++ b/.github/pr-visual/pr-3-b854247.txt @@ -0,0 +1,31 @@ +I will start by exploring the codebase to understand how the agentic logic is integrated and confirm the current state of the project. +I will now check `action.yml` to see the current inputs and how the action is intended to be used. +I will now examine `action/run.js` to see how the Gemini CLI is being used for PR analysis. +I will read `src/analyze.ts` to see if there is any additional information that could be relevant for the creative brief. +**Creative Brief: "Beyond the Diff — The Agentic Evolution of PR Visual"** + +**Goal:** Visualize the transition of the `pr-visual` action from a basic diff-to-image tool to an "Agentic Analyst" that understands the entire codebase context. + +**Key Insight:** Context is the bridge between code and clarity. By using Gemini CLI to read the repository alongside the diff, `pr-visual` now generates insights based on "why" and "how," not just "what changed." + +**Visual Concept: "The Context Engine"** +A central, glowing Gemini "Brain" icon acting as a processor. +1. **Intelligent Input:** On the left, a raw Git Diff document merges with a translucent 3D "Codebase Tree" (representing the tool's new ability to read files). Both flow into the central Brain. +2. **Agentic Analysis:** Inside the Brain, show "gears" or "scanning beams" labeled "Agentic Context" and "Deep Understanding." +3. **Versioned Output:** On the right, instead of a single image, show a **stacked deck of three infographic cards**. + * The top card is vibrant and labeled "Latest (Commit `7f8e9a`)". + * The cards below are slightly faded and labeled with older SHAs (e.g., `4a2b3c`), representing the new commit-based history. +4. **UI Integration:** A small, clean callout box showing a "Previous versions" dropdown menu, mirroring the new GitHub comment UI. + +**Style & Palette:** +* **Style:** TECH / DARK MODE. +* **Background:** Deep obsidian (#0d1117). +* **Accents:** Electric Cyan (#00d4ff) for active analysis, Neon Magenta (#ff00ff) for the versioned history, and Monospace typefaces for technical labels. +* **Atmosphere:** High-tech, precise, and automated. + +**One Key Diagram:** +A horizontal flow: **[Diff + Repository] ➔ [Gemini Agent] ➔ [Versioned Infographic History]**. + +**Headline:** "Context-Aware PR Visuals: Every Commit, Fully Understood." + +IMPORTANT STYLE OVERRIDE: Use a CLEAN / CORPORATE style: Professional PowerPoint aesthetic, polished boxes with shadows, blues/grays/teal palette, clean sans-serif fonts, structured grid layout. \ No newline at end of file diff --git a/.github/workflows/pr-visual-with-gemini-cli.yml.template b/.github/workflows/pr-visual-with-gemini-cli.yml.template deleted file mode 100644 index da728f8..0000000 --- a/.github/workflows/pr-visual-with-gemini-cli.yml.template +++ /dev/null @@ -1,59 +0,0 @@ -# PR Visual with Gemini CLI - Agentic codebase exploration -# -# This workflow lets Gemini CLI explore your codebase to understand the PR, -# then generates a visual based on that understanding. -# -# Better for: Complex PRs where raw diffs lack context - -name: PR Visual (Agentic) - -on: - pull_request: - types: [opened, synchronize] - paths-ignore: - - '.github/pr-visual/**' # Prevent infinite loop - -jobs: - visualize: - runs-on: ubuntu-latest - permissions: - contents: write - pull-requests: write - - steps: - - name: Checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - # Step 1: Let Gemini CLI explore the codebase and understand the PR - - name: Understand PR with Gemini CLI - id: understand - uses: google-github-actions/run-gemini-cli@v1 - with: - gemini_api_key: ${{ secrets.GEMINI_API_KEY }} - prompt: | - You are a creative director preparing a brief for an infographic designer. - - Explore this PR's changes. Read any files you need to fully understand: - - What problem does this PR solve? - - What's the key change or insight? - - How does it fit into the broader codebase? - - Then write a creative brief for generating a visual infographic. - - Guidelines for the brief: - - Scale complexity to the change (small fix = simple visual) - - Focus on the ONE key insight, not every detail - - Prefer clarity over comprehensiveness - - A single compelling diagram beats 5 dense sections - - Output ONLY the creative brief, no preamble. - - # Step 2: Generate the visual from the brief - - name: Generate PR Visual - uses: gitethanwoo/pr-visual@v1 - with: - gemini-api-key: ${{ secrets.GEMINI_API_KEY }} - prompt: ${{ steps.understand.outputs.summary }} - style: clean # or: excalidraw, minimal, tech, playful diff --git a/.github/workflows/pr-visual.yml b/.github/workflows/pr-visual.yml index b89b6c5..80812d1 100644 --- a/.github/workflows/pr-visual.yml +++ b/.github/workflows/pr-visual.yml @@ -4,8 +4,6 @@ name: PR Visual on: pull_request: types: [opened, synchronize] - paths-ignore: - - '.github/pr-visual/**' jobs: visualize: diff --git a/.github/workflows/pr-visual.yml.template b/.github/workflows/pr-visual.yml.template deleted file mode 100644 index ad021b2..0000000 --- a/.github/workflows/pr-visual.yml.template +++ /dev/null @@ -1,32 +0,0 @@ -# PR Visual - Auto-generate infographics for your PRs -# -# Copy this file to your repo as .github/workflows/pr-visual.yml -# Then add your GEMINI_API_KEY to repository secrets. -# -# Get a Gemini API key at: https://aistudio.google.com/apikey - -name: PR Visual - -on: - pull_request: - types: [opened, synchronize] - -jobs: - visualize: - runs-on: ubuntu-latest - permissions: - contents: write # Required to commit the image to the PR branch - pull-requests: write # Required to post comments - - steps: - - name: Checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 # Full history needed for diff - - - name: Generate PR Visual - uses: YOUR_USERNAME/pr-visual@v1 - with: - gemini-api-key: ${{ secrets.GEMINI_API_KEY }} - style: clean # Options: clean, excalidraw, minimal, tech, playful - # comment: true # Set to false to skip posting a comment diff --git a/README.md b/README.md index d20eb4b..9c643be 100644 --- a/README.md +++ b/README.md @@ -99,8 +99,6 @@ name: PR Visual on: pull_request: types: [opened, synchronize] - paths-ignore: - - '.github/pr-visual/**' # Prevent infinite loop jobs: visualize: @@ -121,55 +119,10 @@ jobs: ``` The action will: -- Generate an infographic from the PR diff -- Commit it to `.github/pr-visual/` on the PR branch -- Post a comment with the embedded image - -### Advanced: Agentic Mode with Gemini CLI - -For complex PRs where raw diffs lack context, chain with [Gemini CLI](https://github.com/google-github-actions/run-gemini-cli) to let AI explore your codebase first: - -```yaml -name: PR Visual (Agentic) - -on: - pull_request: - types: [opened, synchronize] - paths-ignore: - - '.github/pr-visual/**' # Prevent infinite loop - -jobs: - visualize: - runs-on: ubuntu-latest - permissions: - contents: write - pull-requests: write - - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - # Step 1: Let Gemini CLI explore and understand the PR - - name: Understand PR - id: understand - uses: google-github-actions/run-gemini-cli@v1 - with: - gemini_api_key: ${{ secrets.GEMINI_API_KEY }} - prompt: | - Explore this PR. Read files as needed to understand what it does. - Then write a concise creative brief for an infographic. - Scale complexity to the change - small fixes need simple visuals. - Output ONLY the brief. - - # Step 2: Generate visual from the brief - - uses: gitethanwoo/pr-visual@v1 - with: - gemini-api-key: ${{ secrets.GEMINI_API_KEY }} - prompt: ${{ steps.understand.outputs.summary }} -``` - -This approach produces better visuals because Gemini can read the actual code, not just the diff. +- Use [Gemini CLI](https://github.com/google-gemini/gemini-cli) to intelligently analyze the PR (can read files for context, not just the diff) +- Generate an infographic from the analysis +- Upload it to a GitHub Release (no commits to your PR branch!) +- Post a comment with the image and a collapsible prompt/history section ## CLI Reference diff --git a/action.yml b/action.yml index 6d0dadc..cb25531 100644 --- a/action.yml +++ b/action.yml @@ -52,10 +52,6 @@ runs: cd action npm ci - - name: Install Gemini CLI - shell: bash - run: npm install -g @google/gemini-cli - - name: Generate PR Visual id: generate shell: bash diff --git a/action/run.js b/action/run.js index e065476..0b7cf0d 100644 --- a/action/run.js +++ b/action/run.js @@ -7,6 +7,7 @@ import * as path from "node:path"; import { execSync } from "node:child_process"; const IMAGE_MODEL = "gemini-3-pro-image-preview"; +const RELEASE_TAG = "pr-visual-assets"; const STYLE_INSTRUCTIONS = { clean: `Use a CLEAN / CORPORATE style: Professional PowerPoint aesthetic, polished boxes with shadows, blues/grays/teal palette, clean sans-serif fonts, structured grid layout.`, @@ -62,9 +63,9 @@ Analyze this diff. Read any files you need to understand the context. Then outpu try { console.log("Running Gemini CLI for agentic analysis..."); - // Run gemini CLI in headless mode with auto-approve for file reads + // Run gemini CLI via npx in headless mode with auto-approve for file reads const output = execSync( - `cat "${tempFile}" | gemini -y -m gemini-3-flash-preview`, + `cat "${tempFile}" | npx -y @google/gemini-cli -y -m gemini-3-flash-preview`, { encoding: "utf-8", maxBuffer: 10 * 1024 * 1024, // 10MB buffer @@ -110,91 +111,121 @@ async function generateImage(prompt, apiKey) { throw new Error("No image data in response"); } -async function commitImageToPR(octokit, imageBuffer, context, prompt) { +async function getOrCreateRelease(octokit, owner, repo) { + // Try to get existing release + try { + const { data: release } = await octokit.rest.repos.getReleaseByTag({ + owner, + repo, + tag: RELEASE_TAG, + }); + return release; + } catch (e) { + if (e.status !== 404) throw e; + } + + // Create the release + console.log("Creating pr-visual-assets release..."); + const { data: release } = await octokit.rest.repos.createRelease({ + owner, + repo, + tag_name: RELEASE_TAG, + name: "PR Visual Assets", + body: "Auto-generated release for storing PR visual images. Do not delete.", + draft: false, + prerelease: false, + }); + + return release; +} + +async function uploadToRelease(octokit, imageBuffer, context, prompt) { const { owner, repo } = context.repo; const prNumber = context.payload.pull_request.number; - const headRef = context.payload.pull_request.head.ref; const commitSha = context.payload.pull_request.head.sha.slice(0, 7); - // Use commit SHA in filename so each push gets its own image - const imagePath = `.github/pr-visual/pr-${prNumber}-${commitSha}.png`; - const promptPath = `.github/pr-visual/pr-${prNumber}-${commitSha}.txt`; - const imageContent = imageBuffer.toString("base64"); - const promptContent = Buffer.from(prompt).toString("base64"); + const release = await getOrCreateRelease(octokit, owner, repo); - // Commit both image and prompt file - await octokit.rest.repos.createOrUpdateFileContents({ + const imageName = `pr-${prNumber}-${commitSha}.png`; + const promptName = `pr-${prNumber}-${commitSha}.txt`; + + // Delete existing assets with same name (if re-running on same commit) + for (const asset of release.assets) { + if (asset.name === imageName || asset.name === promptName) { + await octokit.rest.repos.deleteReleaseAsset({ + owner, + repo, + asset_id: asset.id, + }); + } + } + + // Upload image + console.log(`Uploading ${imageName} to release...`); + const { data: imageAsset } = await octokit.rest.repos.uploadReleaseAsset({ owner, repo, - path: imagePath, - message: `Add PR visual for #${prNumber} (${commitSha})`, - content: imageContent, - branch: headRef, + release_id: release.id, + name: imageName, + data: imageBuffer, }); - await octokit.rest.repos.createOrUpdateFileContents({ + // Upload prompt + console.log(`Uploading ${promptName} to release...`); + await octokit.rest.repos.uploadReleaseAsset({ owner, repo, - path: promptPath, - message: `Add PR visual prompt for #${prNumber} (${commitSha})`, - content: promptContent, - branch: headRef, + release_id: release.id, + name: promptName, + data: Buffer.from(prompt), }); - const imageUrl = `https://raw.githubusercontent.com/${owner}/${repo}/${headRef}/${imagePath}`; - return { imagePath, imageUrl, commitSha }; + const imageUrl = imageAsset.browser_download_url; + return { imageName, imageUrl, commitSha }; } async function getExistingImages(octokit, context) { const { owner, repo } = context.repo; const prNumber = context.payload.pull_request.number; - const headRef = context.payload.pull_request.head.ref; try { - const { data: contents } = await octokit.rest.repos.getContent({ + const { data: release } = await octokit.rest.repos.getReleaseByTag({ owner, repo, - path: ".github/pr-visual", - ref: headRef, + tag: RELEASE_TAG, }); - if (!Array.isArray(contents)) return []; - - // Filter to images for this PR, extract commit SHA from filename - const prImages = contents - .filter((f) => f.name.startsWith(`pr-${prNumber}-`) && f.name.endsWith(".png")) - .map((f) => { - const match = f.name.match(/pr-\d+-([a-f0-9]+)\.png/); + // Filter to images for this PR + const prImages = release.assets + .filter((a) => a.name.startsWith(`pr-${prNumber}-`) && a.name.endsWith(".png")) + .map((a) => { + const match = a.name.match(/pr-\d+-([a-f0-9]+)\.png/); const sha = match ? match[1] : null; return { - name: f.name, + name: a.name, sha, - url: `https://raw.githubusercontent.com/${owner}/${repo}/${headRef}/.github/pr-visual/${f.name}`, - promptUrl: `https://raw.githubusercontent.com/${owner}/${repo}/${headRef}/.github/pr-visual/pr-${prNumber}-${sha}.txt`, + url: a.browser_download_url, + promptAsset: release.assets.find((p) => p.name === `pr-${prNumber}-${sha}.txt`), }; }); // Fetch prompts for each image for (const img of prImages) { - try { - const { data: promptFile } = await octokit.rest.repos.getContent({ - owner, - repo, - path: `.github/pr-visual/pr-${prNumber}-${img.sha}.txt`, - ref: headRef, - }); - if (promptFile.content) { - img.prompt = Buffer.from(promptFile.content, "base64").toString("utf-8"); + if (img.promptAsset) { + try { + const response = await fetch(img.promptAsset.browser_download_url); + img.prompt = await response.text(); + } catch (e) { + img.prompt = null; } - } catch (e) { - // Prompt file doesn't exist for older images + } else { img.prompt = null; } } return prImages; } catch (e) { - // Directory doesn't exist yet + // Release doesn't exist yet return []; } } @@ -230,7 +261,7 @@ ${formatPrompt(img.prompt)} \`\`\` `; - return `### \`${img.sha}\`\n![${img.sha}](${img.url}?t=${Date.now()})\n${promptSection}`; + return `### \`${img.sha}\`\n![${img.sha}](${img.url})\n${promptSection}`; }) .join("\n\n"); historySection = ` @@ -248,7 +279,7 @@ ${imageList} **Latest** (\`${currentSha}\`): -![PR Infographic](${imageUrl}?t=${Date.now()}) +![PR Infographic](${imageUrl})
View prompt @@ -355,17 +386,17 @@ async function main() { console.log("\nGenerating image..."); const imageBuffer = await generateImage(finalPrompt, apiKey || hostedApiKey); - console.log("Committing image to PR branch..."); - const { imagePath, imageUrl, commitSha } = await commitImageToPR(octokit, imageBuffer, context, finalPrompt); + console.log("Uploading image to GitHub Release..."); + const { imageName, imageUrl, commitSha } = await uploadToRelease(octokit, imageBuffer, context, finalPrompt); - console.log(`Image committed to: ${imagePath}`); + console.log(`Image uploaded: ${imageName}`); if (shouldComment) { console.log("Posting comment..."); await postOrUpdateComment(octokit, context, imageUrl, style, commitSha, finalPrompt); } - core.setOutput("image-path", imagePath); + core.setOutput("image-path", imageName); core.setOutput("image-url", imageUrl); console.log("PR Visual complete!");