From 2d88d81a4f8af0415cfba314a3c728d13bf1a13d Mon Sep 17 00:00:00 2001 From: Michal Berger Date: Fri, 16 May 2025 12:04:04 +0200 Subject: [PATCH 1/2] .github/scripts: Refactor false-positive helper into python The amount of json data we need to pass through jq is quite cumbersome to handle, not allowing to easily apply more complex logic against the searched data. Having it under py should be far more straightforward. Signed-off-by: Michal Berger --- .github/scripts/helpers/__init__.py | 9 ++ .github/scripts/helpers/env.py | 22 +++ .github/scripts/helpers/gerrit.py | 38 +++++ .github/scripts/helpers/github.py | 46 ++++++ .../scripts/parse_false_positive_comment.py | 134 ++++++++++++++++++ .../scripts/parse_false_positive_comment.sh | 118 --------------- .../gerrit-false-positives-handler.yml | 2 +- 7 files changed, 250 insertions(+), 119 deletions(-) create mode 100644 .github/scripts/helpers/__init__.py create mode 100755 .github/scripts/helpers/env.py create mode 100755 .github/scripts/helpers/gerrit.py create mode 100755 .github/scripts/helpers/github.py create mode 100755 .github/scripts/parse_false_positive_comment.py delete mode 100755 .github/scripts/parse_false_positive_comment.sh diff --git a/.github/scripts/helpers/__init__.py b/.github/scripts/helpers/__init__.py new file mode 100644 index 0000000..e03c358 --- /dev/null +++ b/.github/scripts/helpers/__init__.py @@ -0,0 +1,9 @@ +from .env import Env +from .gerrit import Gerrit +from .github import GitHub + +__all__ = [ + "Env", + "Gerrit", + "GitHub", +] diff --git a/.github/scripts/helpers/env.py b/.github/scripts/helpers/env.py new file mode 100755 index 0000000..dc69002 --- /dev/null +++ b/.github/scripts/helpers/env.py @@ -0,0 +1,22 @@ +import os + + +class Env: + def __init__(self): + self.reported_by = os.getenv("AUTHOR") + self.gerrit_comment = os.getenv("COMMENT") + self.gerrit_bot_http_passwd = os.getenv("GERRIT_BOT_HTTP_PASSWD") + self.gerrit_bot_user = os.getenv("GERRIT_BOT_USER") + self.gh_issue_pat = os.getenv("GH_ISSUES_PAT") + self.gh_repo = os.getenv("GH_REPO") + self.spdk_repo = os.getenv("REPO") + self.change_num = os.getenv("change_num") # FIXME: to uppercase + self.patch_set = os.getenv("patch_set") # FIXME: to uppercase + + if not all(self.__dict__.values()): + raise AttributeError( + f"Not all env attributes were set: {self.__dict__.items()}" + ) + + self.change_num = int(self.change_num) + self.patch_set = int(self.patch_set) diff --git a/.github/scripts/helpers/gerrit.py b/.github/scripts/helpers/gerrit.py new file mode 100755 index 0000000..e6952f2 --- /dev/null +++ b/.github/scripts/helpers/gerrit.py @@ -0,0 +1,38 @@ +import json +import requests + + +class Gerrit: + def __init__(self, env, auth=True): + self.env = env + self.auth = auth + self.format = "o=DETAILED_ACCOUNTS&o=MESSAGES&o=LABELS&o=SKIP_DIFFSTAT" + self.gerrit_url = "https://review.spdk.io" + self.creds = () + if self.auth: + self.creds = (self.env.gerrit_bot_user, self.env.gerrit_bot_http_passwd) + self.gerrit_url += "/a/changes" + else: + self.gerrit_url += "/changes" + + def get_change(self): + raw = requests.get( + f"{self.gerrit_url}/{self.env.spdk_repo.replace("/", "%2F")}~{self.env.change_num}?{self.format}", + auth=self.creds, + ) + + raw.raise_for_status() + details = raw.text.splitlines() + + if len(details) != 2: + return {} + return json.loads(details[1]) + + def post_comment(self, msg): + post = requests.post( + f"{self.gerrit_url}/{self.env.change_num}/revisions/{self.env.patch_set}/review", + headers={"Content-Type": "application/json"}, + data=json.dumps(msg), + ) + + post.raise_for_status() diff --git a/.github/scripts/helpers/github.py b/.github/scripts/helpers/github.py new file mode 100755 index 0000000..bde7e58 --- /dev/null +++ b/.github/scripts/helpers/github.py @@ -0,0 +1,46 @@ +import re +import requests +import json + + +class GitHub: + def __init__(self, env, auth=True): + self.env = env + self.auth = auth + self.gh_url = f"https://api.github.com/repos/{self.env.gh_repo}" + self.gh_auth_headers = {} + if self.auth: + self.gh_auth_headers = { + "Accept": "application/vnd.github+json", + "Authorization": f"Bearer {self.env.gh_issue_pat}", + } + + def get_issue_from_gerrit_comment(self): + pattern = re.compile( + r"patch set [0-9]+:\n\nfalse positive:\s*[#]?([0-9]+)$", + re.IGNORECASE, + ) + + gh_issue = int(re.search(pattern, self.env.gerrit_comment).groups()[0]) + gh_issue_j = requests.get( + f"{self.gh_url}/issues/{gh_issue}", headers=self.gh_auth_headers + ).json() + + return gh_issue, gh_issue_j + + def post_comment(self, gh_issue, msg): + post = requests.post( + f"{self.gh_url}/issues/{gh_issue}/comments", + headers=self.gh_auth_headers, + json=json.dumps(msg), + ) + + post.raise_for_status() + + def post_rerun(self, run_id): + post = requests.post( + f"{self.gh_url}/actions/runs/{run_id}/run", + headers=self.gh_auth_headers, + ) + + post.raise_for_status() diff --git a/.github/scripts/parse_false_positive_comment.py b/.github/scripts/parse_false_positive_comment.py new file mode 100755 index 0000000..1a565e2 --- /dev/null +++ b/.github/scripts/parse_false_positive_comment.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python3 + +import re +import sys + +from helpers import Env, Gerrit, GitHub + + +def is_wip(change): + return change.get("work_in_progress") + + +def is_current_revision(change, patch_set): + return change.get("current_revision_number") == patch_set + + +def is_negative_from_user(change, user): + user_ver = next( + (ver for ver in change["labels"]["Verified"]["all"] if ver["username"] == user), + {}, + ) + + return user_ver and user_ver.get("value", 0) == -1 + + +def is_issue_state(issue, state="open"): + set_state = issue.get("state").lower() + return set_state == state.lower() + + +def get_failed_builds_for_patch_by_user(change, user, patch_set): + # Group comments into respective patchsets, pick the latest "Build failed" comment from + # the latest patchset it was recorded at and check if that patchset is older or the + # same as patch_set. + + pattern = "Build failed. Results: " + comments_per_patchset = {} + + for msg in change["messages"]: + if msg["author"]["username"] != user: + continue + if msg["_revision_number"] not in comments_per_patchset: + comments_per_patchset[msg["_revision_number"]] = [] + comments_per_patchset[msg["_revision_number"]].append(msg["message"]) + + latest_build_failed_patchset = -1 + for p in sorted(comments_per_patchset.keys()): + if pattern in "\n".join(comments_per_patchset[p]): + latest_build_failed_patchset = p + + if latest_build_failed_patchset == -1 or latest_build_failed_patchset > patch_set: + return [] + + failed_builds = [] + for comment in comments_per_patchset[latest_build_failed_patchset]: + for line in comment.splitlines(): + if pattern in line: + failed_builds.append(line) + + return failed_builds + + +def get_url_details_from_failed_build(build): + run_url_pattern = re.compile(r"\((https://.+)\)") + run_id_pattern = re.compile(r"\[([0-9]+)/[0-9]+\]") + + return ( + re.search(run_url_pattern, build).groups()[0], + re.search(run_id_pattern, build).groups()[0], + ) + + +def main(): + env = Env() + gerrit = Gerrit(env, auth=True) + github = GitHub(env, auth=True) + + change_details = gerrit.get_change() + change_gh_details = github.get_issue_from_gerrit_comment() + + (gh_issue, gh_issue_j) = change_gh_details + + if not change_details: + print(f"Change {env.change_num} does not exist? Verify the environment.") + sys.exit(0) + + if not gh_issue: + print( + "Ignore. Comment does not include valid false positive phrase, no issue number found." + ) + sys.exit(0) + + if is_wip(change_details): + print("Ignore. Comment posted to WIP change.") + sys.exit(0) + + if not is_current_revision(change_details, env.patch_set): + print("Ignore. Comment posted to different patch set.") + sys.exit(0) + + if not is_negative_from_user(change_details, env.gerrit_bot_user): + print("Ignore. Comment posted with no negative vote from CI") + sys.exit(0) + + if not is_issue_state(gh_issue_j, "open"): + print("Comment points to incorrect GitHub issue.") + gerrit.post_to_gerrit( + {"message": f"Issue #{gh_issue} does not exist or is already closed."} + ) + sys.exit(0) + + failed_builds = get_failed_builds_for_patch_by_user( + change_details, env.gerrit_bot_user, env.patch_set + ) + + if not failed_builds: + print("Did not find comments indicating build failure") + sys.exit(1) + + (fp_run_url, fp_run_id) = get_url_details_from_failed_build(failed_builds[-1]) + + github.post_comment( + gh_issue, + { + "body": f"Another instance of this failure. Reported by @{env.reported_by}. Log: {fp_run_url}" + }, + ) + + github.post_rerun(fp_run_id) + gerrit.post_comment({"message": "Retriggered", "labels": {"Verified": 0}}) + + +if __name__ == "__main__": + main() diff --git a/.github/scripts/parse_false_positive_comment.sh b/.github/scripts/parse_false_positive_comment.sh deleted file mode 100755 index 80cc103..0000000 --- a/.github/scripts/parse_false_positive_comment.sh +++ /dev/null @@ -1,118 +0,0 @@ -#!/usr/bin/env bash -# TODO: Rewrite this into python, the json pulp here is unbearable - -# COMMENT: ${{ fromJSON(needs.env_vars.outputs.client_payload).comment }} -# AUTHOR: ${{ fromJSON(needs.env_vars.outputs.client_payload).author.username }} -# REPO: ${{ github.repository_owner }}/spdk -# GH_REPO: ${{ github.repository }} -# GERRIT_BOT_USER: ${{ secrets.GERRIT_BOT_USER }} -# GERRIT_BOT_HTTP_PASSWD: ${{ secrets.GERRIT_BOT_HTTP_PASSWD }} -# GH_ISSUES_PAT: ${{ secrets.GH_ISSUES_PAT }} -# change_num: ${{ fromJSON(needs.env_vars.outputs.client_payload).change.number }} -# patch_set: ${{ fromJSON(needs.env_vars.outputs.client_payload).patchSet.number }} -set -eu -shopt -s extglob - -spdk_repo=$REPO -gerrit_comment=$COMMENT -reported_by=$AUTHOR - -gerrit_url=https://review.spdk.io/changes -gerrit_format_q="o=DETAILED_ACCOUNTS&o=MESSAGES&o=LABELS&o=SKIP_DIFFSTAT" - -# Looking for comment thats only content is "false positive: 123", with a leeway for no spaces -# or hashtag symbol before number -if [[ ! ${gerrit_comment,,} =~ "patch set "[0-9]+:$'\n\nfalse positive:'[[:space:]]*[#]?([0-9]+)$ ]]; then - echo "Ignore. Comment does not include false positive phrase." - exit 0 -fi -gh_issue=${BASH_REMATCH[1]} - -# Verify that the issue exists and is open -if ! gh_status=$(gh issue -R "$spdk_repo" view "$gh_issue" --json state --jq .state) \ - || [[ "$gh_status" != "OPEN" ]]; then - # shellcheck disable=SC2154 - curl -L -X POST \ - --user "$GERRIT_BOT_USER:$GERRIT_BOT_HTTP_PASSWD" \ - --header "Content-Type: application/json" \ - --data "{'message': 'Issue #$gh_issue does not exist or is already closed.'}" \ - --fail-with-body \ - "$gerrit_url/$change_num/revisions/$patch_set/review" - echo "Comment points to incorrect GitHub issue." - exit 0 -fi - -# Get latest info about a change itself - first line is the XSSI mitigation string, drop it -curl -s -X GET \ - --user "$GERRIT_BOT_USER:$GERRIT_BOT_HTTP_PASSWD" \ - "$gerrit_url/spdk%2Fspdk~$change_num?$gerrit_format_q" \ - | tail -n +2 | jq . | tee change.json - -if [[ ! -s change.json ]]; then - echo "Change $change_num not found, exiting." - echo "Either it's a private change or in restricted branch." - exit 0 -fi - -# Do not test any change marked as WIP -# .work_in_progress is not set when false -work_in_progress="$(jq -r '.work_in_progress' change.json)" -if [[ "$work_in_progress" == "true" ]]; then - echo "Ignore. Comment posted to WIP change." - exit 0 -fi - -# Only test latest patch set -current_patch_set="$(jq -r '.current_revision_number' change.json)" -if ((current_patch_set != patch_set)); then - echo "Ignore. Comment posted to different ($current_patch_set) patch set." - exit 0 -fi - -# False positive should be used only on changes that already have a negative Verified vote -verified=$(jq -r ".labels.Verified.all[]? | select(.username==\"$GERRIT_BOT_USER\").value" change.json) -if ((verified != -1)); then - echo "Ignore. Comment posted with no negative vote from CI." - exit 0 -fi - -# Find workflow to rerun. As a sanity check grab comment meeting following criteria: -# failed build comment from most recent patch set posted by spdk-bot - this patch set -# has to be <= compared to $patch_set the workflow was triggered by. -# NOTE: Message parsing is very fragile and has to match summary job -mapfile -t fp_run_failed_messages < <( - jq -r ".messages | sort_by(._revision_number)[] | - select(.author.username==\"$GERRIT_BOT_USER\") | - select(._revision_number<=$patch_set) | - select(.message | test(\"Build failed\")) | - .message" change.json | grep "Build failed. Results: " -) - -if ((${#fp_run_failed_messages[@]} == 0)); then - echo "Did not find comments indicating build failure" - exit 1 -fi - -# E.g: -# Build failed. Results: [15028790454/1](https://github.com/spdk/spdk-ci/actions/runs/15028790454/attempts/3) -# Build failed. Results: [15028790454/1](https://github.com/spdk/spdk-ci/actions/runs/15028790454) -fp_run_failed_messages=("${fp_run_failed_messages[@]}") -# E.g: 15028790454 -fp_run_id=${fp_run_failed_messages[-1]//@(*"runs/"|"/attempts"*)/} -# E.g: https://github.com/spdk/spdk-ci/actions/runs/15028790454 -fp_run_url=${fp_run_failed_messages[-1]//@(*"("|")")/} - -message="Another instance of this failure. Reported by @$reported_by. Log: $fp_run_url" -# Special PAT to read/write GH issues is required -GH_TOKEN=$GH_ISSUES_PAT gh issue -R "$spdk_repo" comment "$gh_issue" -b "$message" - -# Rerun only failed jobs, which will rerun all dependent ones too. -gh run rerun "$fp_run_id" --failed -R "$GH_REPO" - -# Reset the verified vote and leave a comment indicating that workflows were retriggered -curl -L -X POST \ - --user "$GERRIT_BOT_USER:$GERRIT_BOT_HTTP_PASSWD" \ - --header "Content-Type: application/json" \ - --data "{'message': 'Retriggered', 'labels': {'Verified': '0'}}" \ - --fail-with-body \ - "$gerrit_url/$change_num/revisions/$patch_set/review" diff --git a/.github/workflows/gerrit-false-positives-handler.yml b/.github/workflows/gerrit-false-positives-handler.yml index ce8b7ee..e4beb67 100644 --- a/.github/workflows/gerrit-false-positives-handler.yml +++ b/.github/workflows/gerrit-false-positives-handler.yml @@ -40,4 +40,4 @@ jobs: steps: - uses: actions/checkout@v4 - name: Parse for false positive - run: .github/scripts/parse_false_positive_comment.sh + run: .github/scripts/parse_false_positive_comment.py From 4701250524aa25f4737d6178480b2b728a1bcc4e Mon Sep 17 00:00:00 2001 From: Michal Berger Date: Fri, 16 May 2025 15:43:20 +0200 Subject: [PATCH 2/2] .github/scripts: Allow to override auth params through environment Also, introduce ignore list for parameters that are not deemed as crucial (e.g.: for given workflow). Signed-off-by: Michal Berger --- .github/scripts/helpers/env.py | 15 +++++++++++---- .github/scripts/parse_false_positive_comment.py | 4 ++-- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/.github/scripts/helpers/env.py b/.github/scripts/helpers/env.py index dc69002..df86588 100755 --- a/.github/scripts/helpers/env.py +++ b/.github/scripts/helpers/env.py @@ -12,11 +12,18 @@ def __init__(self): self.spdk_repo = os.getenv("REPO") self.change_num = os.getenv("change_num") # FIXME: to uppercase self.patch_set = os.getenv("patch_set") # FIXME: to uppercase + self.gh_auth = not os.getenv("GITHUB_DISABLE_AUTH") + self.gerrit_auth = not os.getenv("GERRIT_DISABLE_AUTH") - if not all(self.__dict__.values()): - raise AttributeError( - f"Not all env attributes were set: {self.__dict__.items()}" - ) + self._ignore = ["gh_auth", "gerrit_auth"] + + for var, val in self.__dict__.items(): + if var in self._ignore: + continue + if not val: + raise AttributeError( + f"Not all env attributes were set: {self.__dict__.items()}" + ) self.change_num = int(self.change_num) self.patch_set = int(self.patch_set) diff --git a/.github/scripts/parse_false_positive_comment.py b/.github/scripts/parse_false_positive_comment.py index 1a565e2..8a45a20 100755 --- a/.github/scripts/parse_false_positive_comment.py +++ b/.github/scripts/parse_false_positive_comment.py @@ -72,8 +72,8 @@ def get_url_details_from_failed_build(build): def main(): env = Env() - gerrit = Gerrit(env, auth=True) - github = GitHub(env, auth=True) + gerrit = Gerrit(env, auth=env.gh_auth) + github = GitHub(env, auth=env.gerrit_auth) change_details = gerrit.get_change() change_gh_details = github.get_issue_from_gerrit_comment()