diff --git a/.github/workflows/compat-gen-upload.yml b/.github/workflows/compat-gen-upload.yml
new file mode 100644
index 00000000000..de7cc486188
--- /dev/null
+++ b/.github/workflows/compat-gen-upload.yml
@@ -0,0 +1,32 @@
+name: Compat Fixture Upload
+
+on:
+ workflow_dispatch:
+ inputs:
+ version:
+ description: "Version to generate fixtures for (e.g. 0.62.0)"
+ required: true
+
+jobs:
+ upload-fixtures:
+ runs-on: ubuntu-latest
+ permissions:
+ id-token: write
+ contents: read
+ steps:
+ - uses: actions/checkout@v4
+
+ - uses: dtolnay/rust-toolchain@stable
+
+ - uses: Swatinem/rust-cache@v2
+
+ - name: Configure AWS credentials
+ uses: aws-actions/configure-aws-credentials@v5
+ with:
+ role-to-assume: arn:aws:iam::245040174862:role/GitHubBenchmarkRole
+ aws-region: us-east-1
+
+ - name: Generate and upload fixtures
+ run: >
+ python3 vortex-test/compat-gen/scripts/upload.py
+ --version "${{ inputs.version }}"
diff --git a/.github/workflows/compat-test-weekly.yml b/.github/workflows/compat-test-weekly.yml
new file mode 100644
index 00000000000..1eaa22c15b8
--- /dev/null
+++ b/.github/workflows/compat-test-weekly.yml
@@ -0,0 +1,24 @@
+name: Compat Test
+
+on:
+ schedule:
+ - cron: "0 6 * * 1" # Monday 6am UTC
+ workflow_dispatch: { }
+
+env:
+ FIXTURES_URL: https://vortex-compat-fixtures.s3.amazonaws.com
+
+jobs:
+ compat-test:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+
+ - uses: dtolnay/rust-toolchain@stable
+
+ - uses: Swatinem/rust-cache@v2
+
+ - name: Run compat tests
+ run: |
+ cargo run -p vortex-compat --release --bin compat-validate -- \
+ --fixtures-url "$FIXTURES_URL"
diff --git a/Cargo.lock b/Cargo.lock
index d380a3d6229..c8fcb71680a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -9848,6 +9848,29 @@ dependencies = [
"vortex-session",
]
+[[package]]
+name = "vortex-compat"
+version = "0.1.0"
+dependencies = [
+ "arrow-array",
+ "chrono",
+ "clap",
+ "futures",
+ "parquet",
+ "reqwest",
+ "serde",
+ "serde_json",
+ "tokio",
+ "tpchgen",
+ "tpchgen-arrow",
+ "vortex",
+ "vortex-array",
+ "vortex-buffer",
+ "vortex-error",
+ "vortex-session",
+ "vortex-utils",
+]
+
[[package]]
name = "vortex-cub"
version = "0.1.0"
diff --git a/Cargo.toml b/Cargo.toml
index 0da5ee805ba..6c6aa5ab60f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -32,6 +32,7 @@ members = [
"vortex-jni",
"vortex-python",
"vortex-tui",
+ "vortex-test/compat-gen",
"vortex-test/e2e",
"vortex-test/e2e-cuda",
"xtask",
@@ -281,6 +282,7 @@ vortex-zstd = { version = "0.1.0", path = "./encodings/zstd", default-features =
# No version constraints for unpublished crates.
vortex-bench = { path = "./vortex-bench", default-features = false }
+vortex-compat = { path = "./vortex-test/compat-gen" }
vortex-cuda = { path = "./vortex-cuda", default-features = false }
vortex-cuda-macros = { path = "./vortex-cuda/macros" }
vortex-duckdb = { path = "./vortex-duckdb", default-features = false }
diff --git a/vortex-test/compat-gen/Cargo.toml b/vortex-test/compat-gen/Cargo.toml
new file mode 100644
index 00000000000..66595ab6b4c
--- /dev/null
+++ b/vortex-test/compat-gen/Cargo.toml
@@ -0,0 +1,54 @@
+[package]
+name = "vortex-compat"
+authors = { workspace = true }
+description = "Backward-compatibility fixture generation and testing for Vortex"
+edition = { workspace = true }
+homepage = { workspace = true }
+include = { workspace = true }
+keywords = { workspace = true }
+license = { workspace = true }
+publish = false
+repository = { workspace = true }
+rust-version = { workspace = true }
+version = { workspace = true }
+
+[lints]
+workspace = true
+
+[[bin]]
+name = "compat-gen"
+path = "src/main.rs"
+
+[[bin]]
+name = "compat-validate"
+path = "src/validate_main.rs"
+
+[dependencies]
+# Vortex crates
+vortex = { workspace = true, features = ["files", "tokio"] }
+vortex-array = { workspace = true, features = ["_test-harness"] }
+vortex-buffer = { workspace = true }
+vortex-error = { workspace = true }
+vortex-session = { workspace = true }
+vortex-utils = { workspace = true }
+
+# TPC-H generation
+arrow-array = { workspace = true }
+tpchgen = { workspace = true }
+tpchgen-arrow = { workspace = true }
+
+# ClickBench parquet reading
+parquet = { workspace = true }
+
+# Async runtime
+futures = { workspace = true }
+tokio = { workspace = true, features = ["full"] }
+
+# HTTP fetching (for ClickBench fixture + compat-test S3 downloads)
+reqwest = { workspace = true }
+
+# CLI + serialization
+chrono = { workspace = true, features = ["serde"] }
+clap = { workspace = true, features = ["derive"] }
+serde = { workspace = true, features = ["derive"] }
+serde_json = { workspace = true }
diff --git a/vortex-test/compat-gen/README.md b/vortex-test/compat-gen/README.md
new file mode 100644
index 00000000000..6473a866ee3
--- /dev/null
+++ b/vortex-test/compat-gen/README.md
@@ -0,0 +1,263 @@
+# vortex-compat: Backward-Compatibility Testing
+
+This crate provides two binaries that together ensure Vortex can always read files
+written by older versions:
+
+- **`compat-gen`** — generates deterministic fixture files for a given Vortex version.
+- **`compat-validate`** — reads fixtures from every historical version and validates
+ they round-trip to the expected arrays.
+
+Fixtures are stored in an S3 bucket. CI uploads new fixtures on every release tag
+and runs weekly validation against all prior versions.
+
+## Fixture Contract
+
+Fixtures are the unit of backward-compatibility. Each fixture is a named file
+(e.g. `primitives.vortex`) whose contents are defined by a deterministic `build()`
+method. The following rules apply:
+
+- **Immutable data.** Once a fixture's `build()` is defined, its output (columns,
+ values, nulls, ordering) must never change. Every version that includes that
+ fixture must produce byte-for-byte identical logical arrays. `compat-validate`
+ validates this by rebuilding expected arrays from `build()` and comparing them
+ against what was read from the stored file.
+
+- **New capabilities get new files.** To test a new encoding, data type, or
+ structural pattern, add a new fixture with a new filename. Never modify an
+ existing fixture to cover new ground.
+
+- **Additive-only fixture list.** The fixture list only ever grows; fixtures are
+ never removed. The upload script (`scripts/upload.py`) enforces this by checking
+ that every fixture in the previous version's manifest still exists in the
+ generated output. Each fixture's `since` field in the manifest records the first
+ version that introduced it.
+
+- **`versions.json`** is the top-level index listing every version that has
+ uploaded fixtures. `compat-validate` iterates over all listed versions.
+
+- **Watch for dependency drift.** `compat-validate` compares stored files against
+ `build()` output from the *current* code. If a dependency (e.g. `tpchgen`)
+ silently changes its output across versions, old fixtures will fail validation
+ even though the Vortex reader is fine. If you see unexpected failures across
+ all old versions for a specific fixture, check whether its `build()` deps
+ changed before blaming the reader.
+
+## First-Time Setup: Bootstrap the Bucket
+
+After creating the S3 bucket (see [AWS Setup](#aws-setup-one-time) below), seed it
+with the first fixture set:
+
+```bash
+# Generate + upload (first version, no previous manifest to merge)
+python3 vortex-test/compat-gen/scripts/upload.py --version 0.62.0
+
+# Verify the round-trip
+AWS_PROFILE=vortex-ci cargo run -p vortex-compat --release --bin compat-validate -- \
+ --fixtures-url https://vortex-compat-fixtures.s3.amazonaws.com
+```
+
+## Uploading Fixtures for a New Version
+
+Use the upload script, which handles building, manifest merging, and S3 upload:
+
+```bash
+# Full upload
+python3 vortex-test/compat-gen/scripts/upload.py --version 0.63.0
+
+# Dry run (generate + merge manifest, skip S3)
+python3 vortex-test/compat-gen/scripts/upload.py --version 0.63.0 --dry-run
+
+# Skip the cargo build (if you already have fixtures generated)
+python3 vortex-test/compat-gen/scripts/upload.py \
+ --version 0.63.0 --output /tmp/fixtures/ --skip-build
+
+# Verify all versions
+cargo run -p vortex-compat --release --bin compat-validate -- \
+ --fixtures-url https://vortex-compat-fixtures.s3.amazonaws.com
+```
+
+## Re-uploading Fixtures for an Existing Version
+
+The upload script will overwrite the existing prefix in S3:
+
+```bash
+python3 vortex-test/compat-gen/scripts/upload.py --version 0.62.0
+```
+
+No need to update `versions.json` — the script handles it idempotently.
+
+## Local-Only Workflow
+
+You can skip S3 entirely and work against local directories:
+
+```bash
+# Generate into a versioned subdirectory
+cargo run -p vortex-compat --release --bin compat-gen -- \
+ --version 0.62.0 --output /tmp/compat-root/v0.62.0/
+
+# Validate all local versions
+cargo run -p vortex-compat --release --bin compat-validate -- \
+ --fixtures-dir /tmp/compat-root/
+```
+
+If the bucket requires authenticated access, set your AWS profile:
+
+```bash
+AWS_PROFILE=vortex-ci cargo run -p vortex-compat --release --bin compat-validate -- \
+ --fixtures-url https://vortex-compat-fixtures.s3.amazonaws.com
+```
+
+## AWS Setup (one-time)
+
+All resources live in the **benchmark account (245040174862)**, region **us-east-1**.
+
+### 1. Create the S3 bucket
+
+```bash
+aws s3api create-bucket \
+ --bucket vortex-compat-fixtures \
+ --region us-east-1
+```
+
+### 2. Enable public read access
+
+Disable the "Block Public Access" settings that prevent a public bucket policy:
+
+```bash
+aws s3api put-public-access-block \
+ --bucket vortex-compat-fixtures \
+ --public-access-block-configuration \
+ BlockPublicAcls=true,IgnorePublicAcls=true,BlockPublicPolicy=false,RestrictPublicBuckets=false
+```
+
+Then attach a bucket policy that grants unauthenticated read:
+
+```bash
+aws s3api put-bucket-policy \
+ --bucket vortex-compat-fixtures \
+ --policy '{
+ "Version": "2012-10-17",
+ "Statement": [
+ {
+ "Sid": "PublicRead",
+ "Effect": "Allow",
+ "Principal": "*",
+ "Action": ["s3:GetObject", "s3:ListBucket"],
+ "Resource": [
+ "arn:aws:s3:::vortex-compat-fixtures",
+ "arn:aws:s3:::vortex-compat-fixtures/*"
+ ]
+ }
+ ]
+ }'
+```
+
+### 3. Grant the benchmark role access to the compat bucket
+
+The CI workflow reuses the existing `GitHubBenchmarkRole`
+(`arn:aws:iam::245040174862:role/GitHubBenchmarkRole`).
+Add an inline policy granting it S3 access to the compat fixtures bucket:
+
+```bash
+aws iam put-role-policy \
+ --role-name GitHubBenchmarkRole \
+ --policy-name CompatFixturesS3Access \
+ --policy-document '{
+ "Version": "2012-10-17",
+ "Statement": [
+ {
+ "Effect": "Allow",
+ "Action": [
+ "s3:PutObject",
+ "s3:GetObject",
+ "s3:ListBucket"
+ ],
+ "Resource": [
+ "arn:aws:s3:::vortex-compat-fixtures",
+ "arn:aws:s3:::vortex-compat-fixtures/*"
+ ]
+ }
+ ]
+ }'
+```
+
+## CI Workflows
+
+### Fixture upload (`.github/workflows/compat-gen-upload.yml`)
+
+Triggered via **manual dispatch** with a required `version` input (e.g. `0.62.0`).
+Will be updated to also trigger on release tag pushes once the workflow is proven.
+
+1. Checks out the current branch
+2. Runs `scripts/upload.py --version ` which:
+ - Builds and runs `compat-gen` to generate fixtures
+ - Fetches the previous version's manifest and merges `since` values
+ - Enforces additive-only (no fixtures removed)
+ - Uploads fixtures to `s3://vortex-compat-fixtures/v/`
+ - Updates `versions.json` with ETag-based optimistic locking
+
+### Weekly compat test (`.github/workflows/compat-test-weekly.yml`)
+
+Runs **every Monday at 06:00 UTC** and on **manual dispatch**.
+
+1. Checks out `main` at HEAD
+2. Runs `compat-test --fixtures-url https://vortex-compat-fixtures.s3.amazonaws.com`
+3. Validates every version listed in `versions.json`
+
+## Fixture Suite
+
+| Fixture | File | Since | Description |
+|---------|------|-------|-------------|
+| Primitives | `primitives.vortex` | 0.62.0 | All numeric types (u8–u64, i32, i64, f32, f64) with min/mid/max values |
+| Strings | `strings.vortex` | 0.62.0 | Variable-length strings including empty, ASCII, Unicode, and emoji |
+| Booleans | `booleans.vortex` | 0.62.0 | Boolean array with mixed true/false values |
+| Nullable | `nullable.vortex` | 0.62.0 | Nullable int and string columns with interleaved nulls |
+| Nested Struct | `struct_nested.vortex` | 0.62.0 | Two-level nested struct (inner struct within outer struct) |
+| Chunked | `chunked.vortex` | 0.62.0 | Multi-chunk file: 3 chunks of 1000 rows each |
+| TPC-H Lineitem | `tpch_lineitem.vortex` | 0.62.0 | TPC-H lineitem table at scale factor 0.01 |
+| TPC-H Orders | `tpch_orders.vortex` | 0.62.0 | TPC-H orders table at scale factor 0.01 |
+| ClickBench Hits | `clickbench_hits_1k.vortex` | 0.62.0 | First 1000 rows of the ClickBench hits table |
+
+### Adding a new fixture
+
+New encodings, data types, or structural patterns always get a **new fixture file**.
+Never modify an existing fixture's `build()` output (see [Fixture Contract](#fixture-contract)).
+
+1. Create a struct implementing the `Fixture` trait in `src/fixtures/`:
+ ```rust
+ pub struct MyFixture;
+ impl Fixture for MyFixture {
+ fn name(&self) -> &str { "my_fixture.vortex" }
+ fn build(&self) -> VortexResult> { /* deterministic array construction */ }
+ }
+ ```
+2. Register it in `all_fixtures()` in `src/fixtures/mod.rs`.
+3. Run `compat-gen` locally to verify it produces a valid file.
+4. Upload fixtures for the current version — the upload script merges the manifest
+ so the new fixture gets `since` set to the current version while existing
+ fixtures keep their original `since` values.
+
+## Adapter Epochs
+
+The adapter module (`src/adapter.rs`) contains the read/write logic for the Vortex file
+format. As the format API evolves across major versions, new "epochs" are introduced:
+
+| Epoch | Vortex Versions | Key API Surface |
+|-------|----------------|-----------------|
+| A | v0.36.0 | Original `VortexFileWriter` / `VortexOpenOptions` |
+| B | v0.45.0 – v0.52.0 | Intermediate session-based API |
+| C | v0.58.0 – HEAD | `session.write_options()` / `session.open_options().open_buffer()` |
+
+Only Epoch C is currently active. Earlier epochs were used during initial development
+and can be resurrected by cherry-picking the adapter code onto an older release branch
+if retroactive fixture generation is needed.
+
+### Cherry-picking to older releases
+
+To generate fixtures for a version in Epoch A or B:
+
+1. Check out the target tag (e.g. `git checkout v0.45.0`)
+2. Cherry-pick the compat-gen crate: `git cherry-pick --no-commit `
+3. Swap `src/adapter.rs` to the appropriate epoch's implementation
+4. Resolve any dependency mismatches in `Cargo.toml`
+5. Run `compat-gen` and upload the resulting fixtures
diff --git a/vortex-test/compat-gen/scripts/upload.py b/vortex-test/compat-gen/scripts/upload.py
new file mode 100755
index 00000000000..63b496158ee
--- /dev/null
+++ b/vortex-test/compat-gen/scripts/upload.py
@@ -0,0 +1,351 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+"""Upload Vortex backward-compat fixtures to S3.
+
+Wraps the full upload lifecycle:
+ 1. Build + run compat-gen to produce fixture files and a naive manifest
+ 2. Fetch the previous version's manifest from S3 (via public HTTP)
+ 3. Merge `since` values: keep old `since` for existing fixtures, current
+ version for new ones
+ 4. Enforce additive-only: every fixture in the previous manifest must exist
+ in the generated output
+ 5. Upload the output directory to S3
+ 6. Update versions.json with ETag-based optimistic locking
+
+Requires only Python 3 stdlib + `aws` CLI on PATH.
+"""
+
+import argparse
+import json
+import os
+import subprocess
+import sys
+import tempfile
+import time
+import urllib.error
+import urllib.request
+
+S3_BUCKET = "vortex-compat-fixtures"
+FIXTURES_URL = "https://vortex-compat-fixtures.s3.amazonaws.com"
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def log(msg: str) -> None:
+ print(msg, file=sys.stderr)
+
+
+def run(cmd: list[str], *, check: bool = True, **kwargs) -> subprocess.CompletedProcess:
+ log(f" $ {' '.join(cmd)}")
+ return subprocess.run(cmd, check=check, **kwargs)
+
+
+def http_get(url: str) -> bytes | None:
+ """Fetch *url* over HTTPS. Returns None on 404, raises on other errors."""
+ try:
+ with urllib.request.urlopen(url) as resp:
+ return resp.read()
+ except urllib.error.HTTPError as exc:
+ if exc.code == 404 or exc.code == 403:
+ return None
+ raise
+
+
+def version_sort_key(v: str) -> list[int]:
+ return list(map(int, v.split(".")))
+
+
+# ---------------------------------------------------------------------------
+# S3 helpers (reuse head_etag / put_object pattern from scripts/s3-upload.py)
+# ---------------------------------------------------------------------------
+
+
+def head_etag(bucket: str, key: str) -> str | None:
+ """Fetch the current ETag for an S3 object, or None if missing."""
+ result = subprocess.run(
+ [
+ "aws",
+ "s3api",
+ "head-object",
+ "--bucket",
+ bucket,
+ "--key",
+ key,
+ "--query",
+ "ETag",
+ "--output",
+ "text",
+ ],
+ capture_output=True,
+ text=True,
+ )
+ if result.returncode != 0:
+ return None
+ etag = result.stdout.strip()
+ if not etag or etag == "null":
+ return None
+ return etag
+
+
+def put_object(bucket: str, key: str, body: str, if_match: str | None) -> bool:
+ """Upload a single object with optional ETag precondition."""
+ cmd = [
+ "aws",
+ "s3api",
+ "put-object",
+ "--bucket",
+ bucket,
+ "--key",
+ key,
+ "--body",
+ body,
+ ]
+ if if_match:
+ cmd.extend(["--if-match", if_match])
+ result = subprocess.run(cmd, capture_output=True)
+ return result.returncode == 0
+
+
+def upload_versions_json(local_path: str, max_retries: int = 5) -> None:
+ """Upload versions.json with ETag-based optimistic locking + retry."""
+ key = "versions.json"
+ for attempt in range(1, max_retries + 1):
+ etag = head_etag(S3_BUCKET, key)
+ if put_object(S3_BUCKET, key, local_path, etag):
+ log(" versions.json uploaded.")
+ return
+
+ if attempt == max_retries:
+ break
+
+ delay = min(2**attempt, 30)
+ log(f" versions.json upload failed (attempt {attempt}/{max_retries}), retrying in {delay}s...")
+ time.sleep(delay)
+
+ log(f"ERROR: versions.json upload failed after {max_retries} attempts")
+ sys.exit(1)
+
+
+# ---------------------------------------------------------------------------
+# Core logic
+# ---------------------------------------------------------------------------
+
+
+def fetch_versions() -> list[str]:
+ """Fetch the current versions.json from S3 (public HTTP)."""
+ data = http_get(f"{FIXTURES_URL}/versions.json")
+ if data is None:
+ return []
+ return json.loads(data)
+
+
+def fetch_previous_manifest(versions: list[str], current_version: str) -> dict | None:
+ """Fetch the manifest.json for the latest version before *current_version*."""
+ candidates = [v for v in versions if v != current_version]
+ if not candidates:
+ return None
+ candidates.sort(key=version_sort_key)
+ latest = candidates[-1]
+ log(f" previous version: {latest}")
+ data = http_get(f"{FIXTURES_URL}/v{latest}/manifest.json")
+ if data is None:
+ return None
+ return json.loads(data)
+
+
+def normalize_manifest_fixtures(manifest: dict) -> list[dict]:
+ """Handle old manifest format where fixtures was a list of strings."""
+ entries = manifest.get("fixtures", [])
+ normalized = []
+ for entry in entries:
+ if isinstance(entry, str):
+ # Old format: just a filename string — no `since` info
+ normalized.append({"name": entry, "since": "unknown"})
+ else:
+ normalized.append(entry)
+ return normalized
+
+
+def merge_manifest(
+ generated_manifest_path: str,
+ previous_manifest: dict | None,
+ current_version: str,
+) -> None:
+ """Merge `since` values from the previous manifest into the generated one.
+
+ Also enforces the additive-only rule: every fixture in the previous manifest
+ must exist in the generated output.
+ """
+ with open(generated_manifest_path) as f:
+ generated = json.load(f)
+
+ if previous_manifest is None:
+ # First upload — nothing to merge.
+ return
+
+ prev_fixtures = normalize_manifest_fixtures(previous_manifest)
+ prev_by_name = {e["name"]: e for e in prev_fixtures}
+ gen_by_name = {e["name"]: e for e in generated["fixtures"]}
+
+ # Additive-only check: every previous fixture must still exist.
+ missing = sorted(set(prev_by_name) - set(gen_by_name))
+ if missing:
+ log(f"ERROR: fixtures removed since previous version: {missing}")
+ log("Fixtures must never be removed — only added.")
+ sys.exit(1)
+
+ # Merge: keep old `since` for existing fixtures, current version for new.
+ for entry in generated["fixtures"]:
+ name = entry["name"]
+ if name in prev_by_name:
+ entry["since"] = prev_by_name[name]["since"]
+ else:
+ entry["since"] = current_version
+
+ with open(generated_manifest_path, "w") as f:
+ json.dump(generated, f, indent=2)
+ f.write("\n")
+
+ log(f" merged manifest: {len(prev_by_name)} existing, {len(gen_by_name) - len(prev_by_name)} new fixtures")
+
+
+def build_fixtures(version: str, output_dir: str) -> None:
+ """Run cargo to build and execute compat-gen."""
+ run(
+ [
+ "cargo",
+ "run",
+ "-p",
+ "vortex-compat",
+ "--release",
+ "--bin",
+ "compat-gen",
+ "--",
+ "--version",
+ version,
+ "--output",
+ output_dir,
+ ]
+ )
+
+
+def upload_fixtures(version: str, output_dir: str) -> None:
+ """Upload the output directory to S3."""
+ run(
+ [
+ "aws",
+ "s3",
+ "cp",
+ output_dir,
+ f"s3://{S3_BUCKET}/v{version}/",
+ "--recursive",
+ ]
+ )
+
+
+def update_versions(version: str, tmp_dir: str) -> None:
+ """Append version to versions.json and upload with optimistic locking."""
+ versions = fetch_versions()
+
+ if version not in versions:
+ versions.append(version)
+ versions.sort(key=version_sort_key)
+
+ local_path = os.path.join(tmp_dir, "versions.json")
+ with open(local_path, "w") as f:
+ json.dump(versions, f, indent=2)
+ f.write("\n")
+
+ upload_versions_json(local_path)
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+
+def main() -> None:
+ parser = argparse.ArgumentParser(
+ description="Build, generate, and upload Vortex backward-compat fixtures.",
+ )
+ parser.add_argument(
+ "--version",
+ required=True,
+ help='Version tag for this fixture set (e.g. "0.62.0").',
+ )
+ parser.add_argument(
+ "--output",
+ help="Output directory for generated fixtures (default: temp dir).",
+ )
+ parser.add_argument(
+ "--skip-build",
+ action="store_true",
+ help="Skip cargo build + compat-gen run (assumes --output already populated).",
+ )
+ parser.add_argument(
+ "--dry-run",
+ action="store_true",
+ help="Generate and merge manifest but skip S3 upload.",
+ )
+ args = parser.parse_args()
+
+ # Resolve output directory.
+ if args.output:
+ output_dir = args.output
+ os.makedirs(output_dir, exist_ok=True)
+ owns_tmp = False
+ else:
+ tmp = tempfile.mkdtemp(prefix="compat-gen-")
+ output_dir = os.path.join(tmp, "fixtures")
+ os.makedirs(output_dir)
+ owns_tmp = True
+
+ try:
+ # Step 1: Build + generate fixtures.
+ if not args.skip_build:
+ log(f"[1/4] Generating fixtures for v{args.version}...")
+ build_fixtures(args.version, output_dir)
+ else:
+ log(f"[1/4] Skipping build (--skip-build), using {output_dir}")
+
+ # Step 2: Fetch previous manifest and merge `since` values.
+ log("[2/4] Fetching previous manifest...")
+ versions = fetch_versions()
+ prev_manifest = fetch_previous_manifest(versions, args.version)
+ manifest_path = os.path.join(output_dir, "manifest.json")
+ merge_manifest(manifest_path, prev_manifest, args.version)
+
+ if args.dry_run:
+ log("[3/4] Dry run — skipping S3 upload.")
+ log("[4/4] Dry run — skipping versions.json update.")
+ log(f"\nGenerated fixtures in: {output_dir}")
+ with open(manifest_path) as f:
+ log(f"Manifest:\n{f.read()}")
+ return
+
+ # Step 3: Upload fixtures to S3.
+ log(f"[3/4] Uploading fixtures to s3://{S3_BUCKET}/v{args.version}/...")
+ upload_fixtures(args.version, output_dir)
+
+ # Step 4: Update versions.json.
+ log("[4/4] Updating versions.json...")
+ # Use the parent of output_dir for the temp versions.json file.
+ tmp_dir = os.path.dirname(output_dir) if owns_tmp else tempfile.mkdtemp()
+ update_versions(args.version, tmp_dir)
+
+ log(f"\nDone: fixtures for v{args.version} uploaded.")
+ finally:
+ # Clean up temp dir if we created one.
+ if owns_tmp and not args.dry_run:
+ import shutil
+
+ shutil.rmtree(os.path.dirname(output_dir), ignore_errors=True)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/vortex-test/compat-gen/src/adapter.rs b/vortex-test/compat-gen/src/adapter.rs
new file mode 100644
index 00000000000..d3a92f2bfa2
--- /dev/null
+++ b/vortex-test/compat-gen/src/adapter.rs
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+// Epoch C adapter — for Vortex v0.58.0 through HEAD
+//
+// Write: session.write_options(), returns WriteSummary, takes &mut sink
+// Read: session.open_options().open_buffer(buf) (sync), into_array_stream() (async)
+
+use std::path::Path;
+
+use futures::stream;
+use tokio::runtime::Runtime;
+use vortex::VortexSessionDefault;
+use vortex::file::OpenOptionsSessionExt;
+use vortex::file::WriteOptionsSessionExt;
+use vortex::io::session::RuntimeSessionExt;
+use vortex_array::ArrayRef;
+use vortex_array::stream::ArrayStreamAdapter;
+use vortex_array::stream::ArrayStreamExt;
+use vortex_buffer::ByteBuffer;
+use vortex_error::VortexResult;
+use vortex_session::VortexSession;
+
+fn runtime() -> VortexResult {
+ Runtime::new().map_err(|e| vortex_error::vortex_err!("failed to create tokio runtime: {e}"))
+}
+
+/// Write a sequence of array chunks as a `.vortex` file.
+pub fn write_file(path: &Path, chunks: Vec) -> VortexResult<()> {
+ let dtype = chunks[0].dtype().clone();
+ let stream = ArrayStreamAdapter::new(dtype, stream::iter(chunks.into_iter().map(Ok)));
+
+ runtime()?.block_on(async {
+ let session = VortexSession::default().with_tokio();
+ let mut file = tokio::fs::File::create(path)
+ .await
+ .map_err(|e| vortex_error::vortex_err!("failed to create {}: {e}", path.display()))?;
+ let _summary = session.write_options().write(&mut file, stream).await?;
+ Ok(())
+ })
+}
+
+/// Read a `.vortex` file from bytes, returning the arrays.
+pub fn read_file(bytes: ByteBuffer) -> VortexResult> {
+ runtime()?.block_on(async {
+ let session = VortexSession::default().with_tokio();
+ let file = session.open_options().open_buffer(bytes)?;
+ let arr = file.scan()?.into_array_stream()?.read_all().await?;
+ Ok(vec![arr])
+ })
+}
diff --git a/vortex-test/compat-gen/src/fixtures/clickbench.rs b/vortex-test/compat-gen/src/fixtures/clickbench.rs
new file mode 100644
index 00000000000..ca76d1c241b
--- /dev/null
+++ b/vortex-test/compat-gen/src/fixtures/clickbench.rs
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+use arrow_array::RecordBatch;
+use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
+use vortex_array::ArrayRef;
+use vortex_array::arrow::FromArrowArray;
+use vortex_error::VortexResult;
+use vortex_error::vortex_err;
+
+use super::Fixture;
+
+/// First partition of ClickBench hits, limited to 1000 rows.
+const CLICKBENCH_URL: &str =
+ "https://pub-3ba949c0f0354ac18db1f0f14f0a2c52.r2.dev/clickbench/parquet_many/hits_0.parquet";
+
+pub struct ClickBenchHits1kFixture;
+
+impl Fixture for ClickBenchHits1kFixture {
+ fn name(&self) -> &str {
+ "clickbench_hits_1k.vortex"
+ }
+
+ fn build(&self) -> VortexResult> {
+ let bytes = reqwest::blocking::get(CLICKBENCH_URL)
+ .map_err(|e| vortex_err!("failed to download ClickBench parquet: {e}"))?
+ .bytes()
+ .map_err(|e| vortex_err!("failed to read ClickBench response body: {e}"))?;
+
+ let reader = ParquetRecordBatchReaderBuilder::try_new(bytes)
+ .map_err(|e| vortex_err!("failed to open parquet: {e}"))?
+ .with_batch_size(1000)
+ .with_limit(1000)
+ .build()
+ .map_err(|e| vortex_err!("failed to build parquet reader: {e}"))?;
+
+ let batches: Vec = reader
+ .collect::, _>>()
+ .map_err(|e| vortex_err!("failed to read parquet batches: {e}"))?;
+
+ batches
+ .into_iter()
+ .map(|batch| ArrayRef::from_arrow(batch, false))
+ .collect()
+ }
+}
diff --git a/vortex-test/compat-gen/src/fixtures/mod.rs b/vortex-test/compat-gen/src/fixtures/mod.rs
new file mode 100644
index 00000000000..604ca52e6c8
--- /dev/null
+++ b/vortex-test/compat-gen/src/fixtures/mod.rs
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+mod clickbench;
+mod synthetic;
+mod tpch;
+
+use vortex_array::ArrayRef;
+use vortex_error::VortexResult;
+
+/// A deterministic fixture that produces the same arrays every time.
+pub trait Fixture: Send + Sync {
+ /// The filename for this fixture, e.g. "primitives.vortex".
+ fn name(&self) -> &str;
+
+ /// Build the expected arrays. Must be deterministic.
+ ///
+ /// Returns a `Vec` to support chunked fixtures (multiple chunks).
+ /// Single-array fixtures return a one-element vec.
+ fn build(&self) -> VortexResult>;
+}
+
+/// All registered fixtures.
+pub fn all_fixtures() -> Vec> {
+ vec![
+ Box::new(synthetic::PrimitivesFixture),
+ Box::new(synthetic::StringsFixture),
+ Box::new(synthetic::BooleansFixture),
+ Box::new(synthetic::NullableFixture),
+ Box::new(synthetic::StructNestedFixture),
+ Box::new(synthetic::ChunkedFixture),
+ Box::new(tpch::TpchLineitemFixture),
+ Box::new(tpch::TpchOrdersFixture),
+ Box::new(clickbench::ClickBenchHits1kFixture),
+ ]
+}
diff --git a/vortex-test/compat-gen/src/fixtures/synthetic.rs b/vortex-test/compat-gen/src/fixtures/synthetic.rs
new file mode 100644
index 00000000000..1017a62d99b
--- /dev/null
+++ b/vortex-test/compat-gen/src/fixtures/synthetic.rs
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+use vortex_array::ArrayRef;
+use vortex_array::IntoArray;
+use vortex_array::arrays::BoolArray;
+use vortex_array::arrays::PrimitiveArray;
+use vortex_array::arrays::StructArray;
+use vortex_array::arrays::VarBinArray;
+use vortex_array::dtype::FieldNames;
+use vortex_array::validity::Validity;
+use vortex_buffer::buffer;
+use vortex_error::VortexResult;
+
+use super::Fixture;
+
+pub struct PrimitivesFixture;
+
+impl Fixture for PrimitivesFixture {
+ fn name(&self) -> &str {
+ "primitives.vortex"
+ }
+
+ fn build(&self) -> VortexResult> {
+ let arr = StructArray::try_new(
+ FieldNames::from(["u8", "u16", "u32", "u64", "i32", "i64", "f32", "f64"]),
+ vec![
+ PrimitiveArray::new(buffer![0u8, 128, 255], Validity::NonNullable).into_array(),
+ PrimitiveArray::new(buffer![0u16, 32768, 65535], Validity::NonNullable)
+ .into_array(),
+ PrimitiveArray::new(
+ buffer![0u32, 2_147_483_648, 4_294_967_295],
+ Validity::NonNullable,
+ )
+ .into_array(),
+ PrimitiveArray::new(
+ buffer![0u64, 9_223_372_036_854_775_808, u64::MAX],
+ Validity::NonNullable,
+ )
+ .into_array(),
+ PrimitiveArray::new(buffer![i32::MIN, 0i32, i32::MAX], Validity::NonNullable)
+ .into_array(),
+ PrimitiveArray::new(buffer![i64::MIN, 0i64, i64::MAX], Validity::NonNullable)
+ .into_array(),
+ PrimitiveArray::new(buffer![f32::MIN, 0.0f32, f32::MAX], Validity::NonNullable)
+ .into_array(),
+ PrimitiveArray::new(buffer![f64::MIN, 0.0f64, f64::MAX], Validity::NonNullable)
+ .into_array(),
+ ],
+ 3,
+ Validity::NonNullable,
+ )?;
+ Ok(vec![arr.into_array()])
+ }
+}
+
+pub struct StringsFixture;
+
+impl Fixture for StringsFixture {
+ fn name(&self) -> &str {
+ "strings.vortex"
+ }
+
+ fn build(&self) -> VortexResult> {
+ let strings = VarBinArray::from(vec!["", "hello", "こんにちは", "\u{1f980}"]);
+ let arr = StructArray::try_new(
+ FieldNames::from(["text"]),
+ vec![strings.into_array()],
+ 4,
+ Validity::NonNullable,
+ )?;
+ Ok(vec![arr.into_array()])
+ }
+}
+
+pub struct BooleansFixture;
+
+impl Fixture for BooleansFixture {
+ fn name(&self) -> &str {
+ "booleans.vortex"
+ }
+
+ fn build(&self) -> VortexResult> {
+ let bools = BoolArray::from_iter([true, false, true, true, false]);
+ let arr = StructArray::try_new(
+ FieldNames::from(["flag"]),
+ vec![bools.into_array()],
+ 5,
+ Validity::NonNullable,
+ )?;
+ Ok(vec![arr.into_array()])
+ }
+}
+
+pub struct NullableFixture;
+
+impl Fixture for NullableFixture {
+ fn name(&self) -> &str {
+ "nullable.vortex"
+ }
+
+ fn build(&self) -> VortexResult> {
+ let nullable_ints =
+ PrimitiveArray::from_option_iter([Some(1i32), None, Some(42), None, Some(-7)]);
+ let nullable_strings =
+ VarBinArray::from(vec![Some("hello"), None, Some("world"), Some(""), None]);
+ let arr = StructArray::try_new(
+ FieldNames::from(["int_col", "str_col"]),
+ vec![nullable_ints.into_array(), nullable_strings.into_array()],
+ 5,
+ Validity::NonNullable,
+ )?;
+ Ok(vec![arr.into_array()])
+ }
+}
+
+pub struct StructNestedFixture;
+
+impl Fixture for StructNestedFixture {
+ fn name(&self) -> &str {
+ "struct_nested.vortex"
+ }
+
+ fn build(&self) -> VortexResult> {
+ let inner = StructArray::try_new(
+ FieldNames::from(["a", "b"]),
+ vec![
+ PrimitiveArray::new(buffer![10i32, 20, 30], Validity::NonNullable).into_array(),
+ VarBinArray::from(vec!["x", "y", "z"]).into_array(),
+ ],
+ 3,
+ Validity::NonNullable,
+ )?;
+
+ let arr = StructArray::try_new(
+ FieldNames::from(["inner", "value"]),
+ vec![
+ inner.into_array(),
+ PrimitiveArray::new(buffer![1.1f64, 2.2, 3.3], Validity::NonNullable).into_array(),
+ ],
+ 3,
+ Validity::NonNullable,
+ )?;
+ Ok(vec![arr.into_array()])
+ }
+}
+
+pub struct ChunkedFixture;
+
+impl Fixture for ChunkedFixture {
+ fn name(&self) -> &str {
+ "chunked.vortex"
+ }
+
+ fn build(&self) -> VortexResult> {
+ // 3 chunks of 1000 rows each. Values are deterministic: chunk_idx * 1000 + row_idx.
+ (0u32..3)
+ .map(|chunk_idx| {
+ let values: Vec = (0u32..1000).map(|i| chunk_idx * 1000 + i).collect();
+ let primitives =
+ PrimitiveArray::new(vortex_buffer::Buffer::from(values), Validity::NonNullable);
+ Ok(StructArray::try_new(
+ FieldNames::from(["id"]),
+ vec![primitives.into_array()],
+ 1000,
+ Validity::NonNullable,
+ )?
+ .into_array())
+ })
+ .collect()
+ }
+}
diff --git a/vortex-test/compat-gen/src/fixtures/tpch.rs b/vortex-test/compat-gen/src/fixtures/tpch.rs
new file mode 100644
index 00000000000..2447733e5a8
--- /dev/null
+++ b/vortex-test/compat-gen/src/fixtures/tpch.rs
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+use arrow_array::RecordBatch;
+use tpchgen::generators::LineItemGenerator;
+use tpchgen::generators::OrderGenerator;
+use tpchgen_arrow::RecordBatchIterator;
+use vortex_array::ArrayRef;
+use vortex_array::arrow::FromArrowArray;
+use vortex_error::VortexResult;
+
+use super::Fixture;
+
+const SCALE_FACTOR: f64 = 0.01;
+
+fn collect_batches_as_vortex(iter: impl RecordBatchIterator) -> VortexResult> {
+ let batches: Vec = iter.collect();
+ batches
+ .into_iter()
+ .map(|batch| ArrayRef::from_arrow(batch, false))
+ .collect()
+}
+
+pub struct TpchLineitemFixture;
+
+impl Fixture for TpchLineitemFixture {
+ fn name(&self) -> &str {
+ "tpch_lineitem.vortex"
+ }
+
+ fn build(&self) -> VortexResult> {
+ let generator = LineItemGenerator::new(SCALE_FACTOR, 1, 1);
+ let arrow_iter = tpchgen_arrow::LineItemArrow::new(generator).with_batch_size(65_536);
+ collect_batches_as_vortex(arrow_iter)
+ }
+}
+
+pub struct TpchOrdersFixture;
+
+impl Fixture for TpchOrdersFixture {
+ fn name(&self) -> &str {
+ "tpch_orders.vortex"
+ }
+
+ fn build(&self) -> VortexResult> {
+ let generator = OrderGenerator::new(SCALE_FACTOR, 1, 1);
+ let arrow_iter = tpchgen_arrow::OrderArrow::new(generator).with_batch_size(65_536);
+ collect_batches_as_vortex(arrow_iter)
+ }
+}
diff --git a/vortex-test/compat-gen/src/lib.rs b/vortex-test/compat-gen/src/lib.rs
new file mode 100644
index 00000000000..6b758691198
--- /dev/null
+++ b/vortex-test/compat-gen/src/lib.rs
@@ -0,0 +1,7 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+pub mod adapter;
+pub mod fixtures;
+pub mod manifest;
+pub mod validate;
diff --git a/vortex-test/compat-gen/src/main.rs b/vortex-test/compat-gen/src/main.rs
new file mode 100644
index 00000000000..7f6bdb2e4ac
--- /dev/null
+++ b/vortex-test/compat-gen/src/main.rs
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+use std::path::PathBuf;
+
+use chrono::Utc;
+use clap::Parser;
+use vortex_compat::fixtures::all_fixtures;
+use vortex_compat::manifest::FixtureEntry;
+use vortex_compat::manifest::Manifest;
+use vortex_error::VortexResult;
+
+#[derive(Parser)]
+#[command(
+ name = "compat-gen",
+ about = "Generate Vortex backward-compat fixture files"
+)]
+struct Cli {
+ /// Version tag for this fixture set (e.g. "0.62.0").
+ #[arg(long)]
+ version: String,
+
+ /// Output directory for generated fixture files.
+ #[arg(long)]
+ output: PathBuf,
+}
+
+fn main() -> VortexResult<()> {
+ let cli = Cli::parse();
+
+ std::fs::create_dir_all(&cli.output)
+ .map_err(|e| vortex_error::vortex_err!("failed to create output dir: {e}"))?;
+
+ let fixtures = all_fixtures();
+ let mut entries = Vec::with_capacity(fixtures.len());
+
+ for fixture in &fixtures {
+ let chunks = fixture.build()?;
+ let path = cli.output.join(fixture.name());
+ vortex_compat::adapter::write_file(&path, chunks)?;
+
+ entries.push(FixtureEntry {
+ name: fixture.name().to_string(),
+ since: cli.version.clone(),
+ });
+ eprintln!(" wrote {}", fixture.name());
+ }
+
+ let manifest = Manifest {
+ version: cli.version.clone(),
+ generated_at: Utc::now(),
+ fixtures: entries,
+ };
+ let manifest_path = cli.output.join("manifest.json");
+ let manifest_json = serde_json::to_string_pretty(&manifest)
+ .map_err(|e| vortex_error::vortex_err!("failed to serialize manifest: {e}"))?;
+ std::fs::write(&manifest_path, manifest_json)
+ .map_err(|e| vortex_error::vortex_err!("failed to write manifest: {e}"))?;
+ eprintln!(" wrote manifest.json");
+
+ eprintln!("done: {} fixtures for v{}", fixtures.len(), cli.version);
+ Ok(())
+}
diff --git a/vortex-test/compat-gen/src/manifest.rs b/vortex-test/compat-gen/src/manifest.rs
new file mode 100644
index 00000000000..2c72ebce70d
--- /dev/null
+++ b/vortex-test/compat-gen/src/manifest.rs
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+use chrono::DateTime;
+use chrono::Utc;
+use serde::Deserialize;
+use serde::Serialize;
+
+/// Manifest listing all fixtures generated for a given version.
+#[derive(Debug, Serialize, Deserialize)]
+pub struct Manifest {
+ pub version: String,
+ pub generated_at: DateTime,
+ pub fixtures: Vec,
+}
+
+/// One entry in the manifest's fixture list.
+#[derive(Debug, Serialize, Deserialize)]
+pub struct FixtureEntry {
+ /// Filename, e.g. "primitives.vortex".
+ pub name: String,
+ /// First version that introduced this fixture, e.g. "0.62.0".
+ pub since: String,
+}
diff --git a/vortex-test/compat-gen/src/validate.rs b/vortex-test/compat-gen/src/validate.rs
new file mode 100644
index 00000000000..cc79523042a
--- /dev/null
+++ b/vortex-test/compat-gen/src/validate.rs
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+use std::path::PathBuf;
+
+use vortex_array::IntoArray;
+use vortex_array::arrays::ChunkedArray;
+use vortex_array::assert_arrays_eq;
+use vortex_buffer::ByteBuffer;
+use vortex_error::VortexResult;
+use vortex_error::vortex_bail;
+use vortex_error::vortex_err;
+use vortex_utils::aliases::hash_map::HashMap;
+
+use crate::adapter;
+use crate::fixtures::Fixture;
+use crate::fixtures::all_fixtures;
+use crate::manifest::Manifest;
+
+/// Result of validating one version's fixtures.
+pub struct VersionResult {
+ pub version: String,
+ pub passed: usize,
+ pub skipped: usize,
+ pub failed: Vec<(String, String)>,
+}
+
+/// Validate all versions' fixtures against the current reader.
+pub fn validate_all(
+ source: &FixtureSource,
+ versions: &[String],
+) -> VortexResult> {
+ let fixtures = all_fixtures();
+ let fixture_map: HashMap<&str, &dyn Fixture> =
+ fixtures.iter().map(|f| (f.name(), f.as_ref())).collect();
+
+ let mut results = Vec::new();
+ for version in versions {
+ let result = validate_version(source, version, &fixture_map)?;
+ results.push(result);
+ }
+ Ok(results)
+}
+
+fn validate_version(
+ source: &FixtureSource,
+ version: &str,
+ fixture_map: &HashMap<&str, &dyn Fixture>,
+) -> VortexResult {
+ let manifest = source.fetch_manifest(version)?;
+ let mut passed = 0;
+ let mut skipped = 0;
+ let mut failed = Vec::new();
+
+ for entry in &manifest.fixtures {
+ let Some(fixture) = fixture_map.get(entry.name.as_str()) else {
+ eprintln!(
+ " warn: unknown fixture {} in v{version}, skipping",
+ entry.name
+ );
+ skipped += 1;
+ continue;
+ };
+
+ eprintln!(" checking {} from v{version}...", entry.name);
+ let bytes = source.fetch_fixture(version, &entry.name)?;
+ match validate_one(bytes, *fixture) {
+ Ok(()) => passed += 1,
+ Err(e) => {
+ eprintln!(" FAIL: {} from v{version}: {e}", entry.name);
+ failed.push((entry.name.clone(), e.to_string()));
+ }
+ }
+ }
+
+ Ok(VersionResult {
+ version: version.to_string(),
+ passed,
+ skipped,
+ failed,
+ })
+}
+
+fn validate_one(bytes: ByteBuffer, fixture: &dyn Fixture) -> VortexResult<()> {
+ let actual = adapter::read_file(bytes)?;
+ let expected = fixture.build()?;
+
+ let actual_dtype = actual[0].dtype().clone();
+ let expected_dtype = expected[0].dtype().clone();
+ let actual_arr = ChunkedArray::try_new(actual, actual_dtype)?.into_array();
+ let expected_arr = ChunkedArray::try_new(expected, expected_dtype)?.into_array();
+
+ assert_arrays_eq!(actual_arr, expected_arr);
+ Ok(())
+}
+
+/// Source for fetching fixture files — either HTTPS or local directory.
+pub enum FixtureSource {
+ Url(String),
+ Dir(PathBuf),
+}
+
+impl FixtureSource {
+ fn fetch_manifest(&self, version: &str) -> VortexResult {
+ let json = match self {
+ FixtureSource::Url(base) => {
+ let url = format!("{base}/v{version}/manifest.json");
+ http_get_bytes(&url)?
+ }
+ FixtureSource::Dir(dir) => {
+ let path = dir.join(format!("v{version}")).join("manifest.json");
+ std::fs::read(&path)
+ .map_err(|e| vortex_err!("failed to read {}: {e}", path.display()))?
+ }
+ };
+ serde_json::from_slice(&json)
+ .map_err(|e| vortex_err!("failed to parse manifest for v{version}: {e}"))
+ }
+
+ fn fetch_fixture(&self, version: &str, name: &str) -> VortexResult {
+ let bytes = match self {
+ FixtureSource::Url(base) => {
+ let url = format!("{base}/v{version}/{name}");
+ http_get_bytes(&url)?
+ }
+ FixtureSource::Dir(dir) => {
+ let path = dir.join(format!("v{version}")).join(name);
+ std::fs::read(&path)
+ .map_err(|e| vortex_err!("failed to read {}: {e}", path.display()))?
+ }
+ };
+ Ok(ByteBuffer::from(bytes))
+ }
+}
+
+/// Discover versions from a versions.json file, or from local directory listing.
+pub fn discover_versions(source: &FixtureSource) -> VortexResult> {
+ match source {
+ FixtureSource::Url(base) => {
+ let url = format!("{base}/versions.json");
+ let bytes = http_get_bytes(&url)?;
+ let versions: Vec = serde_json::from_slice(&bytes)
+ .map_err(|e| vortex_err!("failed to parse versions.json: {e}"))?;
+ Ok(versions)
+ }
+ FixtureSource::Dir(dir) => {
+ let mut versions = Vec::new();
+ for entry in std::fs::read_dir(dir)
+ .map_err(|e| vortex_err!("failed to read dir {}: {e}", dir.display()))?
+ {
+ let entry = entry.map_err(|e| vortex_err!("failed to read dir entry: {e}"))?;
+ let name = entry.file_name();
+ let name = name.to_string_lossy();
+ if let Some(version) = name.strip_prefix('v')
+ && entry.path().join("manifest.json").exists()
+ {
+ versions.push(version.to_string());
+ }
+ }
+ versions.sort();
+ Ok(versions)
+ }
+ }
+}
+
+fn http_get_bytes(url: &str) -> VortexResult> {
+ let response = reqwest::blocking::get(url)
+ .map_err(|e| vortex_err!("HTTP request failed for {url}: {e}"))?;
+ if !response.status().is_success() {
+ vortex_bail!("HTTP {} fetching {url}", response.status());
+ }
+ response
+ .bytes()
+ .map(|b| b.to_vec())
+ .map_err(|e| vortex_err!("failed to read response body from {url}: {e}"))
+}
diff --git a/vortex-test/compat-gen/src/validate_main.rs b/vortex-test/compat-gen/src/validate_main.rs
new file mode 100644
index 00000000000..dbaf35ba822
--- /dev/null
+++ b/vortex-test/compat-gen/src/validate_main.rs
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+use std::path::PathBuf;
+
+use clap::Parser;
+use vortex_compat::validate::FixtureSource;
+use vortex_compat::validate::discover_versions;
+use vortex_compat::validate::validate_all;
+use vortex_error::VortexResult;
+
+#[derive(Parser)]
+#[command(
+ name = "compat-validate",
+ about = "Validate Vortex backward-compat fixtures"
+)]
+struct Cli {
+ /// HTTPS base URL for the fixture bucket.
+ /// e.g.
+ #[arg(long)]
+ fixtures_url: Option,
+
+ /// Local directory containing fixture versions (for development).
+ #[arg(long)]
+ fixtures_dir: Option,
+
+ /// Explicit list of versions to test (comma-separated).
+ /// If omitted, discovers versions from versions.json or directory listing.
+ #[arg(long, value_delimiter = ',')]
+ versions: Option>,
+}
+
+fn main() -> VortexResult<()> {
+ let cli = Cli::parse();
+
+ let source = match (&cli.fixtures_url, &cli.fixtures_dir) {
+ (Some(url), None) => FixtureSource::Url(url.clone()),
+ (None, Some(dir)) => FixtureSource::Dir(dir.clone()),
+ _ => {
+ vortex_error::vortex_bail!("specify exactly one of --fixtures-url or --fixtures-dir");
+ }
+ };
+
+ let versions = match cli.versions {
+ Some(v) => v,
+ None => {
+ eprintln!("discovering versions...");
+ discover_versions(&source)?
+ }
+ };
+
+ eprintln!(
+ "testing {} version(s): {}",
+ versions.len(),
+ versions.join(", ")
+ );
+
+ let results = validate_all(&source, &versions)?;
+
+ let mut total_passed = 0;
+ let mut total_failed = 0;
+ let mut total_skipped = 0;
+
+ for r in &results {
+ total_passed += r.passed;
+ total_failed += r.failed.len();
+ total_skipped += r.skipped;
+ if r.failed.is_empty() {
+ eprintln!(
+ " v{}: {} passed, {} skipped",
+ r.version, r.passed, r.skipped
+ );
+ } else {
+ eprintln!(
+ " v{}: {} passed, {} FAILED, {} skipped",
+ r.version,
+ r.passed,
+ r.failed.len(),
+ r.skipped
+ );
+ for (name, err) in &r.failed {
+ eprintln!(" FAIL {name}: {err}");
+ }
+ }
+ }
+
+ eprintln!("\nresult: {total_passed} passed, {total_failed} failed, {total_skipped} skipped");
+
+ if total_failed > 0 {
+ vortex_error::vortex_bail!("{total_failed} fixture(s) failed validation");
+ }
+
+ Ok(())
+}