From 8585cdcc15ecb687cb3469054d7ec6983b56b789 Mon Sep 17 00:00:00 2001 From: Neil Berkman Date: Mon, 9 Mar 2026 20:07:41 -0700 Subject: [PATCH 1/4] ci: add nxboot OTA resilience canary workflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a weekly + on-demand workflow that tests nxboot power-loss resilience on nucleo-h743zi using Renode-based fault injection (tardigrade). Builds the nxboot-loader and nxboot-app configs from this repo, injects power-loss faults at write points during the OTA update path, and verifies the device recovers to a bootable state. Schedule-only (weekly) and workflow_dispatch — does not run on push or pull_request, so it never blocks normal development. Signed-off-by: Neil Berkman --- .github/workflows/ota-resilience-canary.yml | 171 ++++++++++++++++++++ 1 file changed, 171 insertions(+) create mode 100644 .github/workflows/ota-resilience-canary.yml diff --git a/.github/workflows/ota-resilience-canary.yml b/.github/workflows/ota-resilience-canary.yml new file mode 100644 index 0000000000000..48fbae3464aee --- /dev/null +++ b/.github/workflows/ota-resilience-canary.yml @@ -0,0 +1,171 @@ +name: OTA resilience canary + +on: + schedule: + - cron: "0 8 * * 0" + workflow_dispatch: + inputs: + fault_budget: + description: Number of fault points to test across the write range + required: true + default: "64" + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + nxboot-canary: + runs-on: ubuntu-22.04 + timeout-minutes: 120 + env: + TOOLCHAIN_VERSION: 13.2.1-1.1 + TOOLCHAIN_URL: https://github.com/xpack-dev-tools/arm-none-eabi-gcc-xpack/releases/download/v13.2.1-1.1/xpack-arm-none-eabi-gcc-13.2.1-1.1-linux-x64.tar.gz + TOOLCHAIN_SHA256: 1252a8cafe9237de27a765376697230368eec21db44dc3f1edeb8d838dabd530 + RENODE_URL: https://builds.renode.io/renode-1.16.1.linux-dotnet.tar.gz + PYTHONUNBUFFERED: "1" + + steps: + - name: Checkout NuttX + uses: actions/checkout@v4 + with: + path: nuttx + + - name: Checkout NuttX apps + uses: actions/checkout@v4 + with: + repository: apache/nuttx-apps + path: nuttx-apps + + - name: Checkout tardigrade + uses: actions/checkout@v4 + with: + repository: neilberkman/tardigrade + ref: 819d143a56c83fd7860ccc8f76a414a717956d94 + path: tardigrade + + - name: Install NuttX host dependencies + run: | + set -euxo pipefail + sudo apt-get update + sudo apt-get install -y kconfig-frontends || sudo apt-get install -y kconfig-frontends-nox + + - name: Install pinned Arm GNU toolchain + run: | + set -euxo pipefail + tarball="${RUNNER_TEMP}/arm-toolchain.tar.gz" + curl -L "${TOOLCHAIN_URL}" -o "${tarball}" + echo "${TOOLCHAIN_SHA256} ${tarball}" | sha256sum -c - + tar -xzf "${tarball}" -C "${RUNNER_TEMP}" + echo "${RUNNER_TEMP}/xpack-arm-none-eabi-gcc-${TOOLCHAIN_VERSION}/bin" >> "${GITHUB_PATH}" + + - name: Build nxboot bootloader and application + run: | + set -euo pipefail + python3 tardigrade/targets/nuttx_nxboot/build_public_target.py \ + --nuttx-root nuttx \ + --apps-root nuttx-apps \ + --output-dir "${RUNNER_TEMP}/nxboot-build" \ + --header-size 0x400 \ + --jobs 4 + + - name: Generate runtime profile + run: | + python3 tardigrade/targets/nuttx_nxboot/generate_runtime_profile.py \ + --build-dir "${RUNNER_TEMP}/nxboot-build" \ + --output-profile "${RUNNER_TEMP}/nxboot-canary-profile.yaml" \ + --fault-max-writes auto \ + --boot-cycles 2 \ + --name nuttx_nxboot_canary + + - name: Install Renode portable + run: | + set -euxo pipefail + tarball="${RUNNER_TEMP}/renode-portable.tar.gz" + curl -L "${RENODE_URL}" -o "${tarball}" + mkdir -p "${RUNNER_TEMP}/renode" + tar -xzf "${tarball}" -C "${RUNNER_TEMP}/renode" --strip-components=1 + python3 -m pip install --user -r "${RUNNER_TEMP}/renode/tests/requirements.txt" + echo "${RUNNER_TEMP}/renode" >> "${GITHUB_PATH}" + + - name: Install Python dependencies + run: python3 -m pip install --user pyyaml + + - name: Run OTA resilience sweep + env: + FAULT_BUDGET: ${{ inputs.fault_budget || '64' }} + OTA_RENODE_POINT_TIMEOUT_S: "900" + run: | + set -euo pipefail + budget="${FAULT_BUDGET}" + step_flag="" + if [ "$budget" -gt 0 ] 2>/dev/null; then + estimated_writes=196608 + step=$(( (estimated_writes + budget - 1) / budget )) + if [ "$step" -gt 1 ]; then + step_flag="--fault-step $step" + fi + fi + cd tardigrade + python3 scripts/audit_bootloader.py \ + --profile "${RUNNER_TEMP}/nxboot-canary-profile.yaml" \ + --renode-test "${RUNNER_TEMP}/renode/renode-test" \ + --workers 2 \ + --max-batch-points 16 \ + --robot-var "TEST_TIMEOUT:10 minutes" \ + $step_flag \ + --output "${RUNNER_TEMP}/nxboot-canary-results.json" + + - name: Print summary + if: always() + env: + REPORT_PATH: ${{ runner.temp }}/nxboot-canary-results.json + run: | + if [ -f "${REPORT_PATH}" ]; then + python3 - <<'PY' + import json + import os + report = os.environ["REPORT_PATH"] + payload = json.load(open(report)) + summary = payload.get("summary", {}).get("runtime_sweep", {}) + control = summary.get("control", {}) + issues = int(summary.get("issue_points") or 0) + bricks = int(summary.get("bricks") or 0) + cal = payload.get("calibration", {}) + print("Profile:", payload.get("profile")) + print("Verdict:", payload.get("verdict")) + print("Calibrated writes:", cal.get("writes")) + print("Fault points:", summary.get("total_fault_points")) + print("Issues:", issues) + print("Bricks:", bricks) + print("Control outcome:", control.get("boot_outcome")) + mba = control.get("multi_boot_analysis") or {} + if mba: + print("Control multi-boot:", mba.get("status"), mba.get("final_slot")) + step_summary = os.environ.get("GITHUB_STEP_SUMMARY") + if step_summary: + with open(step_summary, "a") as fh: + fh.write("## nxboot OTA resilience canary\n") + fh.write(f"- Verdict: **{payload.get('verdict')}**\n") + fh.write(f"- Calibrated writes: {cal.get('writes')}\n") + fh.write(f"- Fault points tested: {summary.get('total_fault_points')}\n") + fh.write(f"- Bricks: {bricks}\n") + fh.write(f"- Issues: {issues}\n") + if bricks > 0 or issues > 0: + print(f"::error::OTA resilience canary found {bricks} bricks and {issues} issues") + raise SystemExit(1) + PY + fi + + - name: Upload results + if: always() + uses: actions/upload-artifact@v4 + with: + name: ota-resilience-canary-results + if-no-files-found: ignore + path: | + ${{ runner.temp }}/nxboot-canary-results.json + ${{ runner.temp }}/nxboot-canary-profile.yaml From 76945c350c2bd5bee97bac3746efdcad31651175 Mon Sep 17 00:00:00 2001 From: Neil Berkman Date: Mon, 9 Mar 2026 21:42:34 -0700 Subject: [PATCH 2/4] ci: set persist-credentials: false on checkout steps Fixes zizmor artipacked warnings for all three actions/checkout@v4 steps in the OTA resilience canary workflow. Signed-off-by: Neil Berkman --- .github/workflows/ota-resilience-canary.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/ota-resilience-canary.yml b/.github/workflows/ota-resilience-canary.yml index 48fbae3464aee..bf1f86eb0a13f 100644 --- a/.github/workflows/ota-resilience-canary.yml +++ b/.github/workflows/ota-resilience-canary.yml @@ -33,12 +33,14 @@ jobs: uses: actions/checkout@v4 with: path: nuttx + persist-credentials: false - name: Checkout NuttX apps uses: actions/checkout@v4 with: repository: apache/nuttx-apps path: nuttx-apps + persist-credentials: false - name: Checkout tardigrade uses: actions/checkout@v4 @@ -46,6 +48,7 @@ jobs: repository: neilberkman/tardigrade ref: 819d143a56c83fd7860ccc8f76a414a717956d94 path: tardigrade + persist-credentials: false - name: Install NuttX host dependencies run: | From 91a9faa6c4fa085a8e2ce7d3cbf69becf897a40f Mon Sep 17 00:00:00 2001 From: Neil Berkman Date: Mon, 9 Mar 2026 22:13:31 -0700 Subject: [PATCH 3/4] ci: update tardigrade pin to handle pre-patched NuttX tree Signed-off-by: Neil Berkman --- .github/workflows/ota-resilience-canary.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ota-resilience-canary.yml b/.github/workflows/ota-resilience-canary.yml index bf1f86eb0a13f..60fdac2a1c1a4 100644 --- a/.github/workflows/ota-resilience-canary.yml +++ b/.github/workflows/ota-resilience-canary.yml @@ -46,7 +46,7 @@ jobs: uses: actions/checkout@v4 with: repository: neilberkman/tardigrade - ref: 819d143a56c83fd7860ccc8f76a414a717956d94 + ref: b90b61f7aefa813aac8e565217172d936d70c4ba path: tardigrade persist-credentials: false From d3688c81df9bbd185bac0da012738b2f74e410c2 Mon Sep 17 00:00:00 2001 From: Neil Berkman Date: Mon, 9 Mar 2026 22:19:06 -0700 Subject: [PATCH 4/4] ci: update tardigrade pin (resolve relative paths) Signed-off-by: Neil Berkman --- .github/workflows/ota-resilience-canary.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ota-resilience-canary.yml b/.github/workflows/ota-resilience-canary.yml index 60fdac2a1c1a4..442bf2cc0f2b3 100644 --- a/.github/workflows/ota-resilience-canary.yml +++ b/.github/workflows/ota-resilience-canary.yml @@ -46,7 +46,7 @@ jobs: uses: actions/checkout@v4 with: repository: neilberkman/tardigrade - ref: b90b61f7aefa813aac8e565217172d936d70c4ba + ref: 7aaa1dbe663297345365c5c562250d9796574150 path: tardigrade persist-credentials: false