Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 141 additions & 2 deletions .github/workflows/bench.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
name: Benchmarks

on:
push:
branches: [main]
pull_request:
branches: [main]

permissions:
contents: write
pull-requests: write

env:
CARGO_TERM_COLOR: always

Expand All @@ -12,14 +18,147 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0

- uses: dtolnay/rust-toolchain@stable

- uses: Swatinem/rust-cache@v2
- name: Run benchmarks
run: cargo bench --bench benchmarks -- --output-format bencher | tee bench-output.txt

- name: Run benchmarks (bencher output)
run: cargo bench --bench benchmarks -- --output-format bencher 2>/dev/null | tee bench-output.txt

- name: Generate benchmark snapshot
shell: bash
run: |
set -euo pipefail

VERSION="$(grep '^version' Cargo.toml | head -1 | sed 's/.*"\(.*\)".*/\1/')"
DATE="$(date -u '+%Y-%m-%d')"
TIMESTAMP="$(date -u '+%Y-%m-%dT%H:%M:%SZ')"
COMMIT="${{ github.sha }}"
COMMIT_SHORT="$(echo "${COMMIT}" | head -c 7)"
REF="${{ github.ref_name }}"

mkdir -p bench-results

JSON_FILE="bench-results/bench-snapshot-${VERSION}-${DATE}.json"

# --- Build structured JSON ---
{
echo "{"
echo " \"version\": \"${VERSION}\","
echo " \"date\": \"${DATE}\","
echo " \"timestamp\": \"${TIMESTAMP}\","
echo " \"commit\": \"${COMMIT}\","
echo " \"commit_short\": \"${COMMIT_SHORT}\","
echo " \"ref\": \"${REF}\","
echo " \"run_number\": ${{ github.run_number }},"
echo " \"rust_version\": \"$(rustc --version)\","
echo " \"os\": \"$(uname -s) $(uname -r)\","
echo " \"arch\": \"$(uname -m)\","
echo " \"benchmarks\": ["

FIRST=true
while IFS= read -r line; do
if [[ "${line}" =~ ^test[[:space:]]+(.+)[[:space:]]+\.\.\.[[:space:]]+bench:[[:space:]]+([0-9,]+)[[:space:]]+ns/iter[[:space:]]+\(\+/-[[:space:]]+([0-9,]+)\) ]]; then
NAME="${BASH_REMATCH[1]}"
NS="${BASH_REMATCH[2]//,/}"
DEV="${BASH_REMATCH[3]//,/}"

if [ "${FIRST}" = "true" ]; then
FIRST=false
else
echo ","
fi
printf ' {"name": "%s", "ns_per_iter": %s, "deviation": %s}' \
"${NAME}" "${NS}" "${DEV}"
fi
done < bench-output.txt

echo ""
echo " ]"
echo "}"
} > "${JSON_FILE}"

cp "${JSON_FILE}" bench-results/bench-latest.json

# --- Generate human-readable summary for PR / step summary ---
{
echo "# Benchmark Report"
echo ""
echo "- **Version:** ${VERSION}"
echo "- **Date:** ${DATE}"
echo "- **Commit:** [\`${COMMIT_SHORT}\`](https://github.com/${{ github.repository }}/commit/${COMMIT})"
echo "- **Ref:** \`${REF}\`"
echo "- **Rust:** $(rustc --version)"
echo "- **OS:** $(uname -s) $(uname -r) ($(uname -m))"
echo ""
echo "| Benchmark | ns/iter | ±deviation |"
echo "|-----------|--------:|----------:|"

while IFS= read -r line; do
if [[ "${line}" =~ ^test[[:space:]]+(.+)[[:space:]]+\.\.\.[[:space:]]+bench:[[:space:]]+([0-9,]+)[[:space:]]+ns/iter[[:space:]]+\(\+/-[[:space:]]+([0-9,]+)\) ]]; then
NAME="${BASH_REMATCH[1]}"
NS="${BASH_REMATCH[2]}"
DEV="${BASH_REMATCH[3]}"
echo "| ${NAME} | ${NS} | ${DEV} |"
fi
done < bench-output.txt

echo ""
echo "_Generated by CI run #${{ github.run_number }}_"
} > bench-results/bench-report.md

cat bench-results/bench-report.md

- name: Post benchmark summary
run: cat bench-results/bench-report.md >> "$GITHUB_STEP_SUMMARY"

- name: Upload benchmark results
uses: actions/upload-artifact@v4
with:
name: benchmark-results
path: |
bench-output.txt
bench-results/
target/criterion/
retention-days: 90

- name: Compare with baseline (PR only)
if: github.event_name == 'pull_request'
shell: bash
run: |
set -euo pipefail

# Try to download the latest baseline from the default branch
echo "Checking for baseline benchmark data..."

# Use the artifact from this run as the current data
CURRENT="bench-results/bench-latest.json"
if [ ! -f "${CURRENT}" ]; then
echo "No current benchmark data found, skipping comparison."
exit 0
fi

# Extract current benchmarks into a simple comparison format
{
echo "## Benchmark Comparison"
echo ""
echo "Benchmarks from this PR (\`${{ github.head_ref }}\`):"
echo ""
echo "| Benchmark | ns/iter | ±deviation |"
echo "|-----------|--------:|----------:|"

# Parse from the JSON snapshot
python3 -c "
import json, sys
with open('${CURRENT}') as f:
data = json.load(f)
for b in data.get('benchmarks', []):
print(f'| {b[\"name\"]} | {b[\"ns_per_iter\"]:,} | {b[\"deviation\"]:,} |')
" 2>/dev/null || echo "_(Could not parse benchmark data)_"

echo ""
echo "> **Note:** Compare with the latest \`main\` branch run in the Actions tab for regression detection."
} >> "$GITHUB_STEP_SUMMARY"
6 changes: 6 additions & 0 deletions bench-results/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# bench-results directory is for local benchmark snapshots.
# CI-generated snapshots are uploaded as GitHub Actions artifacts.
# Only the README is tracked in git.
*
!README.md
!.gitignore
88 changes: 88 additions & 0 deletions bench-results/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# Benchmark Results

This directory holds versioned benchmark snapshots produced by the CI
[Benchmarks workflow](../.github/workflows/bench.yml) and the local
[`scripts/bench-report.sh`](../scripts/bench-report.sh) script.

## How It Works

### CI (Automated)

Every push to `main` and every pull request triggers the **Benchmarks**
workflow. It:

1. Runs the full Criterion benchmark suite (`cargo bench --bench benchmarks`).
2. Parses the bencher-format output into a structured **JSON snapshot**
containing version, commit, date, Rust toolchain, and per-benchmark
timings.
3. Generates a human-readable **Markdown report** posted to the GitHub
Actions step summary.
4. Uploads both artifacts (`benchmark-results`) with 90-day retention.
5. On PRs, adds a comparison note so reviewers can check for regressions
against the latest `main` run.

### Local (Manual)

Run the helper script from the repo root:

```bash
./scripts/bench-report.sh # outputs to bench-results/
./scripts/bench-report.sh /tmp/bench # custom output directory
```

This produces the same JSON + text snapshot pair and a `bench-latest.*`
copy for quick access.

## Snapshot Format

Each snapshot is a JSON file named
`bench-snapshot-<version>-<YYYY-MM-DD>.json`:

```json
{
"version": "0.1.0",
"date": "2026-02-23",
"timestamp": "2026-02-23T22:38:00Z",
"commit": "abc1234...",
"commit_short": "abc1234",
"rust_version": "rustc 1.82.0 ...",
"os": "Linux 6.5.0-...",
"arch": "x86_64",
"benchmarks": [
{
"name": "parse_json/records/1000",
"ns_per_iter": 123456,
"deviation": 789
}
]
}
```

## Using Snapshots for Release Notes

Compare the `bench-latest.json` from the current release with the
previous release's snapshot to quantify performance changes:

```bash
# Quick diff of two snapshots
python3 scripts/bench-compare.py \
bench-results/bench-snapshot-0.1.0-2026-01-01.json \
bench-results/bench-snapshot-0.2.0-2026-02-23.json
```

Or manually compare the `ns_per_iter` values in the JSON files.

## Tracked Benchmarks

The suite covers the full pipeline:

| Group | What it measures |
|-------|-----------------|
| `parse_*` | Deserialization speed (JSON, CSV, YAML) |
| `serialize_*` | Serialization speed (JSON, CSV, YAML, TOML) |
| `convert_*` | End-to-end format conversion |
| `mapping_*` | Mapping language operations (rename, filter, complex) |
| `e2e_*` | Full pipeline: parse → map → serialize |

Each group tests at 100 / 1,000 / 10,000 record sizes to show scaling
behaviour.
111 changes: 111 additions & 0 deletions scripts/bench-report.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
#!/usr/bin/env bash
# bench-report.sh — Run Criterion benchmarks and produce a versioned JSON snapshot
# Usage: ./scripts/bench-report.sh [output-dir]
#
# Produces:
# <output-dir>/bench-snapshot-<version>-<date>.json — structured results
# <output-dir>/bench-snapshot-<version>-<date>.txt — human-readable report
# <output-dir>/bench-latest.json — symlink / copy of latest
set -euo pipefail

REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
OUTPUT_DIR="${1:-${REPO_ROOT}/bench-results}"
VERSION="$(grep '^version' "${REPO_ROOT}/Cargo.toml" | head -1 | sed 's/.*"\(.*\)".*/\1/')"
DATE="$(date -u '+%Y-%m-%d')"
TIMESTAMP="$(date -u '+%Y-%m-%dT%H:%M:%SZ')"
COMMIT="$(git -C "${REPO_ROOT}" rev-parse --short HEAD 2>/dev/null || echo 'unknown')"
COMMIT_FULL="$(git -C "${REPO_ROOT}" rev-parse HEAD 2>/dev/null || echo 'unknown')"

mkdir -p "${OUTPUT_DIR}"

SNAPSHOT_BASE="bench-snapshot-${VERSION}-${DATE}"
JSON_FILE="${OUTPUT_DIR}/${SNAPSHOT_BASE}.json"
TXT_FILE="${OUTPUT_DIR}/${SNAPSHOT_BASE}.txt"
BENCHER_FILE="${OUTPUT_DIR}/bench-bencher-output.txt"

echo "==> Running benchmarks (this may take a few minutes)..."
cargo bench --bench benchmarks -- --output-format bencher 2>/dev/null | tee "${BENCHER_FILE}"

echo ""
echo "==> Parsing results..."

# Parse Criterion's bencher-format output into structured JSON.
# Each line looks like: "test <name> ... bench: <ns> ns/iter (+/- <dev>)"
{
echo "{"
echo " \"version\": \"${VERSION}\","
echo " \"date\": \"${DATE}\","
echo " \"timestamp\": \"${TIMESTAMP}\","
echo " \"commit\": \"${COMMIT_FULL}\","
echo " \"commit_short\": \"${COMMIT}\","
echo " \"rust_version\": \"$(rustc --version)\","
echo " \"os\": \"$(uname -s) $(uname -r)\","
echo " \"arch\": \"$(uname -m)\","
echo " \"benchmarks\": ["

FIRST=true
while IFS= read -r line; do
# Match: test <name> ... bench: <ns> ns/iter (+/- <dev>)
if [[ "${line}" =~ ^test[[:space:]]+(.+)[[:space:]]+\.\.\.[[:space:]]+bench:[[:space:]]+([0-9,]+)[[:space:]]+ns/iter[[:space:]]+\(\+/-[[:space:]]+([0-9,]+)\) ]]; then
NAME="${BASH_REMATCH[1]}"
NS="${BASH_REMATCH[2]//,/}"
DEV="${BASH_REMATCH[3]//,/}"

if [ "${FIRST}" = "true" ]; then
FIRST=false
else
echo ","
fi

printf ' {"name": "%s", "ns_per_iter": %s, "deviation": %s}' \
"${NAME}" "${NS}" "${DEV}"
fi
done < "${BENCHER_FILE}"

echo ""
echo " ]"
echo "}"
} > "${JSON_FILE}"

# Generate human-readable report.
{
echo "# Benchmark Report"
echo ""
echo "- **Version:** ${VERSION}"
echo "- **Date:** ${DATE}"
echo "- **Commit:** ${COMMIT}"
echo "- **Rust:** $(rustc --version)"
echo "- **OS:** $(uname -s) $(uname -r) ($(uname -m))"
echo ""
echo "| Benchmark | ns/iter | ±deviation | ~throughput |"
echo "|-----------|---------|------------|-------------|"

while IFS= read -r line; do
if [[ "${line}" =~ ^test[[:space:]]+(.+)[[:space:]]+\.\.\.[[:space:]]+bench:[[:space:]]+([0-9,]+)[[:space:]]+ns/iter[[:space:]]+\(\+/-[[:space:]]+([0-9,]+)\) ]]; then
NAME="${BASH_REMATCH[1]}"
NS="${BASH_REMATCH[2]//,/}"
DEV="${BASH_REMATCH[3]//,/}"

# Compute approximate ops/sec
if [ "${NS}" -gt 0 ] 2>/dev/null; then
OPS=$(echo "scale=0; 1000000000 / ${NS}" | bc 2>/dev/null || echo "N/A")
printf "| %-50s | %'12s | %'12s | %s ops/s |\n" "${NAME}" "${NS}" "${DEV}" "${OPS}"
else
printf "| %-50s | %12s | %12s | N/A |\n" "${NAME}" "${NS}" "${DEV}"
fi
fi
done < "${BENCHER_FILE}"

echo ""
echo "_Generated by bench-report.sh_"
} > "${TXT_FILE}"

# Copy as latest
cp "${JSON_FILE}" "${OUTPUT_DIR}/bench-latest.json"
cp "${TXT_FILE}" "${OUTPUT_DIR}/bench-latest.txt"

echo ""
echo "==> Snapshot saved:"
echo " JSON: ${JSON_FILE}"
echo " Text: ${TXT_FILE}"
echo " Latest: ${OUTPUT_DIR}/bench-latest.json"