From 4a4c781e6aadb833e4c1bc22f7502a287ab93582 Mon Sep 17 00:00:00 2001 From: Heng Qian Date: Thu, 26 Mar 2026 10:13:25 +0000 Subject: [PATCH] Add automated duplicate issue detection and auto-close workflows Implements a 3-workflow system using claude-code-action with Bedrock OIDC: - claude-dedupe-issues.yml: detects duplicates on new issues via Claude - auto-close-duplicates.yml: daily cron closes flagged issues after 3 days - remove-autoclose-on-activity.yml: removes autoclose label on human comment Signed-off-by: Heng Qian Co-Authored-By: Claude Opus 4.6 --- .claude/commands/dedupe.md | 33 +++++ .github/workflows/auto-close-duplicates.yml | 129 ++++++++++++++++++ .github/workflows/claude-dedupe-issues.yml | 42 ++++++ .../remove-autoclose-on-activity.yml | 42 ++++++ scripts/comment-on-duplicates.sh | 88 ++++++++++++ 5 files changed, 334 insertions(+) create mode 100644 .claude/commands/dedupe.md create mode 100644 .github/workflows/auto-close-duplicates.yml create mode 100644 .github/workflows/claude-dedupe-issues.yml create mode 100644 .github/workflows/remove-autoclose-on-activity.yml create mode 100755 scripts/comment-on-duplicates.sh diff --git a/.claude/commands/dedupe.md b/.claude/commands/dedupe.md new file mode 100644 index 00000000000..5cefb3674b4 --- /dev/null +++ b/.claude/commands/dedupe.md @@ -0,0 +1,33 @@ +--- +allowed-tools: Bash(gh:*), Bash(./scripts/comment-on-duplicates.sh:*) +description: Find duplicate GitHub issues +--- + +Find up to 3 likely duplicate issues for a given GitHub issue. + +Follow these steps precisely: + +1. Use `gh issue view ` to read the issue. If the issue is closed, or is broad product feedback without a specific bug/feature request, or already has a duplicate detection comment (containing ``), stop and report why you are not proceeding. + +2. Summarize the issue's core problem in 2-3 sentences. Identify the key terms, error messages, and affected components. + +3. Search for potential duplicates using **at least 3 different search strategies**. Run these searches in parallel: + - `gh search issues "" --repo $GITHUB_REPOSITORY --state open --limit 15` + - `gh search issues "" --repo $GITHUB_REPOSITORY --state open --limit 15` + - `gh search issues "" --repo $GITHUB_REPOSITORY --state open --limit 15` + - `gh search issues "" --repo $GITHUB_REPOSITORY --state all --limit 10` (include closed issues for reference) + +4. For each candidate issue that looks like a potential match, read it with `gh issue view ` to verify it is truly about the same problem. Filter out false positives — issues that merely share keywords but describe different problems. + +5. If you find 1-3 genuine duplicates, post the result using the comment script: + ``` + ./scripts/comment-on-duplicates.sh --base-issue --potential-duplicates [dup2] [dup3] + ``` + +6. If no genuine duplicates are found, report that no duplicates were detected and take no further action. + +Important notes: +- Only flag issues as duplicates when you are confident they describe the **same underlying problem** +- Prefer open issues as duplicates, but closed issues can be referenced too +- Do not flag the issue as a duplicate of itself +- The base issue number is the last part of the issue reference (e.g., for `owner/repo/issues/42`, the number is `42`) diff --git a/.github/workflows/auto-close-duplicates.yml b/.github/workflows/auto-close-duplicates.yml new file mode 100644 index 00000000000..aa83c378bdc --- /dev/null +++ b/.github/workflows/auto-close-duplicates.yml @@ -0,0 +1,129 @@ +name: Auto-close duplicate issues + +on: + schedule: + - cron: "0 9 * * *" + workflow_dispatch: + +permissions: + issues: write + +jobs: + auto-close-duplicates: + runs-on: ubuntu-latest + timeout-minutes: 10 + steps: + - name: Close stale duplicate issues + uses: actions/github-script@v7 + with: + script: | + const { owner, repo } = context.repo; + const THREE_DAYS_MS = 3 * 24 * 60 * 60 * 1000; + const now = Date.now(); + + // Find all open issues with the autoclose label + const issues = await github.paginate(github.rest.issues.listForRepo, { + owner, + repo, + state: 'open', + labels: 'autoclose', + per_page: 100, + }); + + console.log(`Found ${issues.length} open issues with autoclose label`); + + let closedCount = 0; + + for (const issue of issues) { + console.log(`Processing issue #${issue.number}: ${issue.title}`); + + // Get comments to find the duplicate detection comment + const comments = await github.rest.issues.listComments({ + owner, + repo, + issue_number: issue.number, + per_page: 100, + }); + + // Find the duplicate detection comment (posted by our script) + const dupeComments = comments.data.filter(c => + c.body.includes('') + ); + + if (dupeComments.length === 0) { + console.log(` No duplicate detection comment found, skipping`); + continue; + } + + const lastDupeComment = dupeComments[dupeComments.length - 1]; + const dupeCommentAge = now - new Date(lastDupeComment.created_at).getTime(); + + if (dupeCommentAge < THREE_DAYS_MS) { + const daysLeft = ((THREE_DAYS_MS - dupeCommentAge) / (24 * 60 * 60 * 1000)).toFixed(1); + console.log(` Duplicate comment is too recent (${daysLeft} days remaining), skipping`); + continue; + } + + // Check for human comments after the duplicate detection comment + const humanCommentsAfter = comments.data.filter(c => + new Date(c.created_at) > new Date(lastDupeComment.created_at) && + c.user.type !== 'Bot' && + !c.body.includes('') && + !c.body.includes('automatically closed as a duplicate') + ); + + if (humanCommentsAfter.length > 0) { + console.log(` Has ${humanCommentsAfter.length} human comment(s) after detection, skipping`); + continue; + } + + // Check for thumbs-down reaction from the issue author + const reactions = await github.rest.reactions.listForIssueComment({ + owner, + repo, + comment_id: lastDupeComment.id, + per_page: 100, + }); + + const authorThumbsDown = reactions.data.some(r => + r.user.id === issue.user.id && r.content === '-1' + ); + + if (authorThumbsDown) { + console.log(` Issue author gave thumbs-down on duplicate comment, skipping`); + continue; + } + + // Extract the primary duplicate issue number from the comment + const dupeMatch = lastDupeComment.body.match(/#(\d+)/); + const dupeNumber = dupeMatch ? dupeMatch[1] : 'unknown'; + + // Close the issue + console.log(` Closing as duplicate of #${dupeNumber}`); + + await github.rest.issues.update({ + owner, + repo, + issue_number: issue.number, + state: 'closed', + state_reason: 'not_planned', + }); + + await github.rest.issues.addLabels({ + owner, + repo, + issue_number: issue.number, + labels: ['duplicate'], + }); + + await github.rest.issues.createComment({ + owner, + repo, + issue_number: issue.number, + body: `This issue has been automatically closed as a duplicate of #${dupeNumber}.\n\nIf this is incorrect, please reopen this issue or create a new one.\n\nšŸ¤– Generated with [Claude Code](https://claude.ai/code)`, + }); + + closedCount++; + } + + console.log(`Done. Closed ${closedCount} duplicate issue(s).`); diff --git a/.github/workflows/claude-dedupe-issues.yml b/.github/workflows/claude-dedupe-issues.yml new file mode 100644 index 00000000000..385e57a155a --- /dev/null +++ b/.github/workflows/claude-dedupe-issues.yml @@ -0,0 +1,42 @@ +name: Claude Issue Dedupe + +on: + issues: + types: [opened] + workflow_dispatch: + inputs: + issue_number: + description: 'Issue number to check for duplicates' + required: true + type: string + +permissions: + contents: read + issues: write + id-token: write + +jobs: + dedupe: + runs-on: ubuntu-latest + timeout-minutes: 10 + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Configure AWS Credentials (OIDC) + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }} + aws-region: us-west-2 + + - name: Run duplicate detection + uses: anthropics/claude-code-action@v1 + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_REPOSITORY: ${{ github.repository }} + with: + use_bedrock: "true" + github_token: ${{ secrets.GITHUB_TOKEN }} + allowed_non_write_users: "*" + prompt: "/dedupe ${{ github.repository }}/issues/${{ github.event.issue.number || inputs.issue_number }}" + claude_args: "--model us.anthropic.claude-sonnet-4-5-20250929-v1:0" diff --git a/.github/workflows/remove-autoclose-on-activity.yml b/.github/workflows/remove-autoclose-on-activity.yml new file mode 100644 index 00000000000..67dbfc0c1b4 --- /dev/null +++ b/.github/workflows/remove-autoclose-on-activity.yml @@ -0,0 +1,42 @@ +name: Remove autoclose label on activity + +on: + issue_comment: + types: [created] + +permissions: + issues: write + +jobs: + remove-autoclose: + if: | + github.event.issue.state == 'open' && + contains(github.event.issue.labels.*.name, 'autoclose') && + github.event.comment.user.type != 'Bot' + runs-on: ubuntu-latest + steps: + - name: Remove autoclose label + uses: actions/github-script@v7 + with: + script: | + const { owner, repo } = context.repo; + const issueNumber = context.issue.number; + const commenter = context.payload.comment.user.login; + + console.log(`Removing autoclose label from issue #${issueNumber} due to comment from ${commenter}`); + + try { + await github.rest.issues.removeLabel({ + owner, + repo, + issue_number: issueNumber, + name: 'autoclose', + }); + console.log(`Successfully removed autoclose label from issue #${issueNumber}`); + } catch (error) { + if (error.status === 404) { + console.log(`autoclose label was already removed from issue #${issueNumber}`); + } else { + throw error; + } + } diff --git a/scripts/comment-on-duplicates.sh b/scripts/comment-on-duplicates.sh new file mode 100755 index 00000000000..04a9935c5ba --- /dev/null +++ b/scripts/comment-on-duplicates.sh @@ -0,0 +1,88 @@ +#!/bin/bash +# +# Copyright OpenSearch Contributors +# SPDX-License-Identifier: Apache-2.0 +# +# Posts a formatted duplicate detection comment and adds the autoclose label. +# +# Usage: +# ./scripts/comment-on-duplicates.sh --base-issue 123 --potential-duplicates 456 789 + +set -euo pipefail + +REPO="${GITHUB_REPOSITORY:-}" +BASE_ISSUE="" +DUPLICATES=() + +while [[ $# -gt 0 ]]; do + case $1 in + --base-issue) + BASE_ISSUE="$2" + shift 2 + ;; + --potential-duplicates) + shift + while [[ $# -gt 0 && ! "$1" =~ ^-- ]]; do + DUPLICATES+=("$1") + shift + done + ;; + *) + echo "Unknown argument: $1" >&2 + exit 1 + ;; + esac +done + +if [[ -z "$BASE_ISSUE" ]]; then + echo "Error: --base-issue is required" >&2 + exit 1 +fi + +if [[ ${#DUPLICATES[@]} -eq 0 ]]; then + echo "Error: --potential-duplicates requires at least one issue number" >&2 + exit 1 +fi + +REPO_FLAG="" +if [[ -n "$REPO" ]]; then + REPO_FLAG="--repo $REPO" +fi + +# Build duplicate list +DUP_LIST="" +for dup in "${DUPLICATES[@]}"; do + TITLE=$(gh issue view "$dup" $REPO_FLAG --json title -q .title 2>/dev/null || echo "") + if [[ -n "$TITLE" ]]; then + DUP_LIST+="- #${dup} — ${TITLE}"$'\n' + else + DUP_LIST+="- #${dup}"$'\n' + fi +done + +# Build the comment body with a hidden marker for auto-close detection +BODY=" +### Possible Duplicate + +Found **${#DUPLICATES[@]}** possible duplicate issue(s): + +${DUP_LIST} +If this is **not** a duplicate, please comment on this issue and the \`autoclose\` label will be removed automatically. + +Otherwise, this issue will be **automatically closed in 3 days**. + +šŸ¤– Generated with [Claude Code](https://claude.ai/code)" + +# Post the comment +echo "$BODY" | gh issue comment "$BASE_ISSUE" $REPO_FLAG --body-file - + +# Ensure the autoclose label exists +gh label create "autoclose" \ + --description "Issue will be auto-closed as duplicate" \ + --color "cccccc" \ + $REPO_FLAG 2>/dev/null || true + +# Add autoclose label +gh issue edit "$BASE_ISSUE" $REPO_FLAG --add-label "autoclose" + +echo "Posted duplicate comment and added autoclose label to issue #${BASE_ISSUE}"