[Experiment] Random IO ideas #20248
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Runs all benchmarks once when we add the `action/benchmark` tag to a pull request. | |
| name: PR Benchmarks | |
| concurrency: | |
| # The group causes runs to queue instead of running in parallel. | |
| group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |
| # Don't cancel benchmarks that are already running, instead just queue them up. | |
| cancel-in-progress: false | |
| on: | |
| pull_request: | |
| types: [labeled, synchronize] | |
| branches: ["develop"] | |
| workflow_dispatch: { } | |
| permissions: | |
| actions: write # for removing labels | |
| contents: read | |
| pull-requests: write # for commenting on PRs | |
| id-token: write # enables AWS-GitHub OIDC | |
| jobs: | |
| label_trigger: | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 120 | |
| if: ${{ contains(github.event.head_commit.message, '[benchmark]') || github.event.label.name == 'action/benchmark' && github.event_name == 'pull_request' }} | |
| steps: | |
| # We remove the benchmark label first so that the workflow can be re-triggered. | |
| - uses: actions-ecosystem/action-remove-labels@v1 | |
| if: ${{ github.event.pull_request.head.repo.full_name == 'vortex-data/vortex' }} | |
| with: | |
| labels: action/benchmark | |
| fail_on_error: true | |
| bench: | |
| needs: label_trigger | |
| timeout-minutes: 120 | |
| runs-on: >- | |
| ${{ github.repository == 'vortex-data/vortex' | |
| && format('runs-on={0}/runner=bench-dedicated/tag={1}{2}', github.run_id, matrix.benchmark.id, github.event.pull_request.head.repo.fork == false && '/extras=s3-cache' || '') | |
| || 'ubuntu-latest' }} | |
| strategy: | |
| matrix: | |
| benchmark: | |
| - id: random-access-bench | |
| name: Random Access | |
| build_args: "--features lance" | |
| - id: compress-bench | |
| name: Compression | |
| if: ${{ contains(github.event.head_commit.message, '[benchmark]') || github.event.label.name == 'action/benchmark' && github.event_name == 'pull_request' }} | |
| steps: | |
| - uses: runs-on/action@v2 | |
| if: github.event.pull_request.head.repo.fork == false | |
| with: | |
| sccache: s3 | |
| - uses: actions/checkout@v6 | |
| with: | |
| ref: ${{ github.event.pull_request.head.sha }} | |
| - name: Setup benchmark environment | |
| run: sudo bash scripts/setup-benchmark.sh | |
| - uses: ./.github/actions/setup-rust | |
| with: | |
| repo-token: ${{ secrets.GITHUB_TOKEN }} | |
| - name: Install DuckDB | |
| run: | | |
| wget -qO- https://github.com/duckdb/duckdb/releases/download/v1.4.2/duckdb_cli-linux-amd64.zip | funzip > duckdb | |
| chmod +x duckdb | |
| echo "$PWD" >> $GITHUB_PATH | |
| - uses: ./.github/actions/system-info | |
| - name: Build binary | |
| shell: bash | |
| env: | |
| RUSTFLAGS: "-C target-cpu=native -C force-frame-pointers=yes" | |
| run: | | |
| cargo build --package ${{ matrix.benchmark.id }} --profile release_debug ${{ matrix.benchmark.build_args }} | |
| - name: Setup Polar Signals | |
| if: github.event.pull_request.head.repo.fork == false | |
| uses: polarsignals/gh-actions-ps-profiling@v0.8.1 | |
| with: | |
| polarsignals_cloud_token: ${{ secrets.POLAR_SIGNALS_API_KEY }} | |
| labels: "branch=${{ github.ref_name }};gh_run_id=${{ github.run_id }};benchmark=${{ matrix.benchmark.id }}" | |
| project_uuid: "e5d846e1-b54c-46e7-9174-8bf055a3af56" | |
| profiling_frequency: 199 | |
| extra_args: "--off-cpu-threshold=0.03" # Personally tuned by @brancz | |
| - name: Run ${{ matrix.benchmark.name }} benchmark | |
| shell: bash | |
| env: | |
| RUST_BACKTRACE: full | |
| run: | | |
| bash scripts/bench-taskset.sh target/release_debug/${{ matrix.benchmark.id }} -d gh-json -o results.json | |
| - name: Setup AWS CLI | |
| if: github.event.pull_request.head.repo.fork == false | |
| uses: aws-actions/configure-aws-credentials@v6 | |
| with: | |
| role-to-assume: arn:aws:iam::245040174862:role/GitHubBenchmarkRole | |
| aws-region: us-east-1 | |
| - name: Install uv | |
| uses: spiraldb/actions/.github/actions/setup-uv@0.18.5 | |
| with: | |
| sync: false | |
| - name: Compare results | |
| shell: bash | |
| run: | | |
| set -Eeu -o pipefail -x | |
| base_commit_sha=$(\ | |
| curl -L \ | |
| -H "Accept: application/vnd.github+json" \ | |
| -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \ | |
| https://api.github.com/repos/vortex-data/vortex/actions/workflows/bench.yml/runs\?branch\=develop\&status\=success\&per_page\=1 \ | |
| | jq -r '.workflow_runs[].head_sha' \ | |
| ) | |
| python3 scripts/s3-download.py s3://vortex-ci-benchmark-results/data.json.gz data.json.gz --no-sign-request | |
| gzip -d -c data.json.gz | grep $base_commit_sha > base.json | |
| echo '# Benchmarks: ${{ matrix.benchmark.name }}' > comment.md | |
| echo '' >> comment.md | |
| uv run --no-project scripts/compare-benchmark-jsons.py base.json results.json "${{ matrix.benchmark.name }}" \ | |
| >> comment.md | |
| cat comment.md >> $GITHUB_STEP_SUMMARY | |
| - name: Comment PR | |
| if: github.event.pull_request.head.repo.fork == false | |
| uses: thollander/actions-comment-pull-request@v3 | |
| with: | |
| file-path: comment.md | |
| comment-tag: bench-pr-comment-${{ matrix.benchmark.id }} | |
| - name: Comment PR on failure | |
| if: failure() && inputs.mode == 'pr' && github.event.pull_request.head.repo.fork == false | |
| uses: thollander/actions-comment-pull-request@v3 | |
| with: | |
| message: | | |
| # 🚨🚨🚨❌❌❌ BENCHMARK FAILED ❌❌❌🚨🚨🚨 | |
| Benchmark `${{ matrix.benchmark.name }}` failed! Check the [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for details. | |
| comment-tag: bench-pr-comment-${{ matrix.benchmark.id }} | |
| sql: | |
| needs: label_trigger | |
| uses: ./.github/workflows/sql-benchmarks.yml | |
| secrets: inherit | |
| with: | |
| mode: "pr" | |
| benchmark_matrix: | | |
| [ | |
| { | |
| "id": "clickbench-nvme", | |
| "subcommand": "clickbench", | |
| "name": "Clickbench on NVME", | |
| "targets": "datafusion:parquet,datafusion:vortex,duckdb:parquet,duckdb:vortex,duckdb:duckdb" | |
| }, | |
| { | |
| "id": "tpch-nvme", | |
| "subcommand": "tpch", | |
| "name": "TPC-H SF=1 on NVME", | |
| "targets": "datafusion:arrow,datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact,duckdb:duckdb", | |
| "scale_factor": "1.0" | |
| }, | |
| { | |
| "id": "tpch-s3", | |
| "subcommand": "tpch", | |
| "name": "TPC-H SF=1 on S3", | |
| "local_dir": "vortex-bench/data/tpch/1.0", | |
| "remote_storage": "s3://vortex-ci-benchmark-datasets/${{github.ref_name}}/${{github.run_id}}/tpch/1.0/", | |
| "targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact", | |
| "scale_factor": "1.0" | |
| }, | |
| { | |
| "id": "tpch-nvme-10", | |
| "subcommand": "tpch", | |
| "name": "TPC-H SF=10 on NVME", | |
| "targets": "datafusion:arrow,datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact,duckdb:duckdb", | |
| "scale_factor": "10.0" | |
| }, | |
| { | |
| "id": "tpch-s3-10", | |
| "subcommand": "tpch", | |
| "name": "TPC-H SF=10 on S3", | |
| "local_dir": "vortex-bench/data/tpch/10.0", | |
| "remote_storage": "s3://vortex-ci-benchmark-datasets/${{github.ref_name}}/${{github.run_id}}/tpch/10.0/", | |
| "targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact", | |
| "scale_factor": "10.0" | |
| }, | |
| { | |
| "id": "tpcds-nvme", | |
| "subcommand": "tpcds", | |
| "name": "TPC-DS SF=1 on NVME", | |
| "targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact,duckdb:duckdb", | |
| "scale_factor": "1.0" | |
| }, | |
| { | |
| "id": "statpopgen", | |
| "subcommand": "statpopgen", | |
| "name": "Statistical and Population Genetics", | |
| "targets": "duckdb:parquet,duckdb:vortex,duckdb:vortex-compact", | |
| "scale_factor": "100" | |
| }, | |
| { | |
| "id": "fineweb", | |
| "subcommand": "fineweb", | |
| "name": "FineWeb NVMe", | |
| "targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact", | |
| "scale_factor": "100" | |
| }, | |
| { | |
| "id": "fineweb-s3", | |
| "subcommand": "fineweb", | |
| "name": "FineWeb S3", | |
| "local_dir": "vortex-bench/data/fineweb", | |
| "remote_storage": "s3://vortex-ci-benchmark-datasets/${{github.ref_name}}/${{github.run_id}}/fineweb/", | |
| "targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact", | |
| "scale_factor": "100" | |
| }, | |
| { | |
| "id": "polarsignals", | |
| "subcommand": "polarsignals", | |
| "name": "PolarSignals Profiling", | |
| "targets": "datafusion:vortex", | |
| "scale_factor": "1" | |
| }, | |
| ] |