From d1b7987ccfe66a60df4df22e5452362c98911bbe Mon Sep 17 00:00:00 2001 From: Martin Paulucci Date: Wed, 29 Oct 2025 13:00:26 +0100 Subject: [PATCH 1/5] ci(l1): Fix "Unknown job" job message when workflow fails. --- .github/workflows/common_failure_alerts.yaml | 76 +++++++++++++++----- .github/workflows/daily_snapsync.yaml | 1 + 2 files changed, 58 insertions(+), 19 deletions(-) diff --git a/.github/workflows/common_failure_alerts.yaml b/.github/workflows/common_failure_alerts.yaml index 1cd8e6d0e16..5984baa3f48 100644 --- a/.github/workflows/common_failure_alerts.yaml +++ b/.github/workflows/common_failure_alerts.yaml @@ -50,31 +50,69 @@ jobs: id: failed_jobs uses: actions/github-script@v7 with: + result-encoding: string script: | const runId = context.payload.workflow_run.id; + const attemptNumber = context.payload.workflow_run.run_attempt ?? 1; const { owner, repo } = context.repo; const failingConclusions = new Set(['failure', 'timed_out', 'action_required']); - const failedJobs = await github.paginate( - github.rest.actions.listJobsForWorkflowRun, - { - owner, - repo, - run_id: runId, - per_page: 100, - }, - response => { + const ignoredJobs = new Set(['Integration Test']); + const relevantJobs = new Set(); + + async function collectJobs(fetchPage) { + let page = 1; + while (true) { + const response = await fetchPage(page); + const jobs = Array.isArray(response?.data?.jobs) ? response.data.jobs : []; - return jobs.filter( - job => job.conclusion && failingConclusions.has(job.conclusion) - ); + for (const job of jobs) { + if ( + job?.conclusion && + failingConclusions.has(job.conclusion) && + job?.name && + !ignoredJobs.has(job.name) + ) { + relevantJobs.add(job.name); + } + } + + if (jobs.length < 100) { + break; + } + page += 1; } - ); - const ignoredJobs = new Set(['Integration Test']); - const relevantJobs = failedJobs - .map(job => job.name) - .filter(name => !ignoredJobs.has(name)); - const names = relevantJobs.length > 0 ? relevantJobs.join('\n- ') : 'Unknown job'; - core.setOutput('names', relevantJobs.length > 0 ? `- ${names}` : names); + } + + try { + await collectJobs(page => + github.rest.actions.listJobsForWorkflowRunAttempt({ + owner, + repo, + run_id: runId, + attempt_number: attemptNumber, + per_page: 100, + page, + }) + ); + } catch (error) { + if (error?.status !== 404) { + throw error; + } + core.info('Falling back to run-wide job listing'); + await collectJobs(page => + github.rest.actions.listJobsForWorkflowRun({ + owner, + repo, + run_id: runId, + per_page: 100, + page, + }) + ); + } + + const jobList = Array.from(relevantJobs); + const names = jobList.length > 0 ? jobList.join('\n- ') : 'Unknown job'; + core.setOutput('names', jobList.length > 0 ? `- ${names}` : names); - name: Post failure to Slack env: diff --git a/.github/workflows/daily_snapsync.yaml b/.github/workflows/daily_snapsync.yaml index 60d45fd6ec1..6a11ba087c5 100644 --- a/.github/workflows/daily_snapsync.yaml +++ b/.github/workflows/daily_snapsync.yaml @@ -58,6 +58,7 @@ jobs: name: Sync ${{ matrix.network }} runs-on: ethrex-sync strategy: + fail-fast: false matrix: include: ${{ fromJson(needs.prepare.outputs.matrix) }} steps: From 3dff89659da64aec2a11aa78824b8b0cc33b3edb Mon Sep 17 00:00:00 2001 From: Martin Paulucci Date: Wed, 29 Oct 2025 15:03:23 +0100 Subject: [PATCH 2/5] Move script to a file. --- .github/workflows/common_failure_alerts.yaml | 63 +----------------- scripts/collect_failed_jobs.js | 70 ++++++++++++++++++++ 2 files changed, 72 insertions(+), 61 deletions(-) create mode 100644 scripts/collect_failed_jobs.js diff --git a/.github/workflows/common_failure_alerts.yaml b/.github/workflows/common_failure_alerts.yaml index 5984baa3f48..4f2107a034e 100644 --- a/.github/workflows/common_failure_alerts.yaml +++ b/.github/workflows/common_failure_alerts.yaml @@ -52,67 +52,8 @@ jobs: with: result-encoding: string script: | - const runId = context.payload.workflow_run.id; - const attemptNumber = context.payload.workflow_run.run_attempt ?? 1; - const { owner, repo } = context.repo; - const failingConclusions = new Set(['failure', 'timed_out', 'action_required']); - const ignoredJobs = new Set(['Integration Test']); - const relevantJobs = new Set(); - - async function collectJobs(fetchPage) { - let page = 1; - while (true) { - const response = await fetchPage(page); - - const jobs = Array.isArray(response?.data?.jobs) ? response.data.jobs : []; - for (const job of jobs) { - if ( - job?.conclusion && - failingConclusions.has(job.conclusion) && - job?.name && - !ignoredJobs.has(job.name) - ) { - relevantJobs.add(job.name); - } - } - - if (jobs.length < 100) { - break; - } - page += 1; - } - } - - try { - await collectJobs(page => - github.rest.actions.listJobsForWorkflowRunAttempt({ - owner, - repo, - run_id: runId, - attempt_number: attemptNumber, - per_page: 100, - page, - }) - ); - } catch (error) { - if (error?.status !== 404) { - throw error; - } - core.info('Falling back to run-wide job listing'); - await collectJobs(page => - github.rest.actions.listJobsForWorkflowRun({ - owner, - repo, - run_id: runId, - per_page: 100, - page, - }) - ); - } - - const jobList = Array.from(relevantJobs); - const names = jobList.length > 0 ? jobList.join('\n- ') : 'Unknown job'; - core.setOutput('names', jobList.length > 0 ? `- ${names}` : names); + const collectFailedJobs = require('./scripts/collect_failed_jobs'); + await collectFailedJobs({ github, core, context }); - name: Post failure to Slack env: diff --git a/scripts/collect_failed_jobs.js b/scripts/collect_failed_jobs.js new file mode 100644 index 00000000000..4af4c9ca37c --- /dev/null +++ b/scripts/collect_failed_jobs.js @@ -0,0 +1,70 @@ +'use strict'; + +/** + * Collects failed job names for the current workflow run and exposes them via step output. + * @param {{ github: import('@actions/github').GitHub, core: import('@actions/core'), context: any }} deps + */ +module.exports = async function collectFailedJobs({ github, core, context }) { + const runId = context.payload.workflow_run.id; + const attemptNumber = context.payload.workflow_run.run_attempt ?? 1; + const { owner, repo } = context.repo; + + const failingConclusions = new Set(['failure', 'timed_out', 'action_required']); + const ignoredJobs = new Set(['Integration Test']); + const relevantJobs = new Set(); + + async function collectJobs(fetchPage) { + let page = 1; + while (true) { + const response = await fetchPage(page); + + const jobs = Array.isArray(response?.data?.jobs) ? response.data.jobs : []; + for (const job of jobs) { + if ( + job?.conclusion && + failingConclusions.has(job.conclusion) && + job?.name && + !ignoredJobs.has(job.name) + ) { + relevantJobs.add(job.name); + } + } + + if (jobs.length < 100) { + break; + } + page += 1; + } + } + + try { + await collectJobs(page => + github.rest.actions.listJobsForWorkflowRunAttempt({ + owner, + repo, + run_id: runId, + attempt_number: attemptNumber, + per_page: 100, + page, + }) + ); + } catch (error) { + if (error?.status !== 404) { + throw error; + } + core.info('Falling back to run-wide job listing'); + await collectJobs(page => + github.rest.actions.listJobsForWorkflowRun({ + owner, + repo, + run_id: runId, + per_page: 100, + page, + }) + ); + } + + const jobList = Array.from(relevantJobs); + const names = jobList.length > 0 ? jobList.join('\n- ') : 'Unknown job'; + core.setOutput('names', jobList.length > 0 ? `- ${names}` : names); +}; From 987709384055ac9b61be400fcc889c61084e8d70 Mon Sep 17 00:00:00 2001 From: Martin Paulucci Date: Wed, 29 Oct 2025 14:16:14 +0100 Subject: [PATCH 3/5] ci(l1): ensure ethrex main is not cached when running kurtosis. --- .github/actions/snapsync-run/action.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/actions/snapsync-run/action.yml b/.github/actions/snapsync-run/action.yml index 62a8e424f92..cd2bb973b28 100644 --- a/.github/actions/snapsync-run/action.yml +++ b/.github/actions/snapsync-run/action.yml @@ -13,6 +13,10 @@ runs: steps: - uses: actions/checkout@v4 + # We need to run this step because kurtosis uses cached docker images but we want the latest ethrex image + - name: Remove cached ethrex image + run: docker image rm -f ghcr.io/lambdaclass/ethrex:main || true + - name: Generate Kurtosis args shell: bash env: From 777150a82363e492355ae31fb5890995714e2687 Mon Sep 17 00:00:00 2001 From: Martin Paulucci Date: Wed, 29 Oct 2025 15:08:50 +0100 Subject: [PATCH 4/5] Revert "ci(l1): ensure ethrex main is not cached when running kurtosis." This reverts commit 5a02f23fdbe9b6362ecf48edcbb56d9f591707d1. --- .github/actions/snapsync-run/action.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/actions/snapsync-run/action.yml b/.github/actions/snapsync-run/action.yml index cd2bb973b28..62a8e424f92 100644 --- a/.github/actions/snapsync-run/action.yml +++ b/.github/actions/snapsync-run/action.yml @@ -13,10 +13,6 @@ runs: steps: - uses: actions/checkout@v4 - # We need to run this step because kurtosis uses cached docker images but we want the latest ethrex image - - name: Remove cached ethrex image - run: docker image rm -f ghcr.io/lambdaclass/ethrex:main || true - - name: Generate Kurtosis args shell: bash env: From f316491963eaa68a3292d90795f71d917dc47ebb Mon Sep 17 00:00:00 2001 From: Martin Paulucci Date: Wed, 29 Oct 2025 15:09:22 +0100 Subject: [PATCH 5/5] Remove fail fast. --- .github/workflows/daily_snapsync.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/daily_snapsync.yaml b/.github/workflows/daily_snapsync.yaml index 6a11ba087c5..60d45fd6ec1 100644 --- a/.github/workflows/daily_snapsync.yaml +++ b/.github/workflows/daily_snapsync.yaml @@ -58,7 +58,6 @@ jobs: name: Sync ${{ matrix.network }} runs-on: ethrex-sync strategy: - fail-fast: false matrix: include: ${{ fromJson(needs.prepare.outputs.matrix) }} steps: