Skip to content

Nightly Soak Test

Nightly Soak Test #62

Workflow file for this run

name: Nightly Soak Test
on:
schedule:
- cron: '0 2 * * *' # 2 AM UTC
workflow_dispatch:
permissions:
contents: read
defaults:
run:
shell: bash -euo pipefail {0}
concurrency:
group: nightly-soak-${{ github.ref }}-${{ github.event_name }}
cancel-in-progress: true
jobs:
soak-test:
runs-on: ubuntu-latest
timeout-minutes: 90
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5
- uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f
- name: Build REL image
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83
with:
context: .
file: docker/Dockerfile
target: intent-rel
build-args: |
BASE_IMAGE=${{ vars.WINEBOT_BASE_IMAGE || 'ghcr.io/mark-e-deyoung/winebot-base:base-2026-02-09' }}
BUILD_INTENT=rel
load: true
push: false
tags: winebot:rel
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Start services
env:
BUILD_INTENT: rel
WINEBOT_RECORD: 1
run: |
docker compose -f compose/docker-compose.yml --profile interactive up -d winebot-interactive
- name: Wait for service health
run: scripts/ci/wait-for-container-health.sh compose/docker-compose.yml interactive winebot-interactive 120 2
- name: Run soak test
id: run_soak_test
env:
DURATION_SECONDS: 3300 # 55 minutes; keep headroom for setup/teardown
INTERVAL_SECONDS: 30
MAX_LOG_MB: 1024
MAX_SESSION_MB: 8192
run: |
# Run the soak diagnostic script inside the container
docker compose -f compose/docker-compose.yml exec -T --user winebot \
winebot-interactive /scripts/diagnostics/soak-resource-bounds.sh nightly
- name: Run fault injection diagnostics
id: run_fault_injection
run: |
docker compose -f compose/docker-compose.yml exec -T --user winebot \
winebot-interactive /scripts/diagnostics/diagnose-fault-injection.sh
- name: Run profile matrix tests
id: run_profile_matrix
run: |
docker compose -f compose/docker-compose.yml --profile interactive --profile test run --rm test-runner \
pytest -q /work/tests/test_profile_matrix.py /work/tests/test_config_guard.py
- name: Normalize artifact directory ownership
run: |
mkdir -p artifacts
sudo chown -R "$(id -u):$(id -g)" artifacts || true
chmod -R u+rwX artifacts || true
- name: Generate trust pack
id: generate_trust_pack
run: |
scripts/ci/generate-trust-pack.sh artifacts/trust-pack-nightly
- name: Upload trust pack
uses: actions/upload-artifact@v4
with:
name: trust-pack-nightly
path: artifacts/trust-pack-nightly
- name: Collect diagnostics on failure
id: collect_failure_diagnostics
if: failure()
continue-on-error: true
run: |
set +e
mkdir -p artifacts
sudo chown -R "$(id -u):$(id -g)" artifacts || true
chmod -R u+rwX artifacts || true
FAIL_DIR="${GITHUB_WORKSPACE}/artifacts/nightly-failure"
mkdir -p "${FAIL_DIR}"
date -u +"%Y-%m-%dT%H:%M:%SZ" > "${FAIL_DIR}/collected-at-utc.txt"
docker compose -f compose/docker-compose.yml --profile interactive ps > "${FAIL_DIR}/compose-ps.txt" || true
docker inspect "$(docker compose -f compose/docker-compose.yml --profile interactive ps -q winebot-interactive)" \
> "${FAIL_DIR}/container-inspect.json" || true
docker compose -f compose/docker-compose.yml logs > "${FAIL_DIR}/compose-logs.txt" || true
scripts/winebotctl diag bundle --max-mb 200 || true
find logs -maxdepth 1 -name 'diagnostic_bundle_*.tar.gz' -exec cp -f {} "${FAIL_DIR}/" \; 2>/dev/null || true
- name: Upload failure diagnostics
if: failure()
run: |
FAIL_DIR="${GITHUB_WORKSPACE}/artifacts/nightly-failure"
mkdir -p "${FAIL_DIR}"
test -f "${FAIL_DIR}/collected-at-utc.txt" || date -u +"%Y-%m-%dT%H:%M:%SZ" > "${FAIL_DIR}/collected-at-utc.txt"
- name: Upload failure diagnostics artifact
if: failure()
uses: actions/upload-artifact@v4
with:
name: nightly-failure-diagnostics
path: ${{ github.workspace }}/artifacts/nightly-failure
if-no-files-found: warn
- name: Verify nightly artifacts
if: always()
env:
JOB_STATUS: ${{ job.status }}
GENERATE_TRUST_PACK_OUTCOME: ${{ steps.generate_trust_pack.outcome }}
COLLECT_FAILURE_DIAGNOSTICS_OUTCOME: ${{ steps.collect_failure_diagnostics.outcome }}
run: |
TRUST_DIR="${GITHUB_WORKSPACE}/artifacts/trust-pack-nightly"
FAIL_DIR="${GITHUB_WORKSPACE}/artifacts/nightly-failure"
if [ "${GENERATE_TRUST_PACK_OUTCOME}" = "success" ]; then
test -s "${TRUST_DIR}/summary.md"
test -s "${TRUST_DIR}/inventory.json"
fi
if [ "${JOB_STATUS}" = "failure" ]; then
test -d "${FAIL_DIR}"
test -s "${FAIL_DIR}/collected-at-utc.txt"
fi
- name: Stop services
if: always()
run: |
docker compose -f compose/docker-compose.yml --profile interactive down --remove-orphans