From f9cf64fab23be315b93a8b2d21ff391ab084b48e Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Wed, 11 Mar 2026 15:26:55 +0000 Subject: [PATCH 01/13] feat: add compat-gen crate with Epoch A adapter (v0.36.0) Standalone crate for generating backward-compat fixture .vortex files. Uses Epoch A adapter targeting the v0.36.0 write API: - VortexWriteOptions::default() (no session) - .write(sink, stream).await returns the sink Fixtures: primitives, strings, booleans, nullable, struct_nested, chunked, tpch_lineitem, tpch_orders. Signed-off-by: Joe Isaacs Co-Authored-By: Claude Opus 4.6 --- vortex-test/compat-gen/Cargo.toml | 30 ++++ vortex-test/compat-gen/PLAN.md | 104 +++++++++++ vortex-test/compat-gen/src/adapter.rs | 31 ++++ vortex-test/compat-gen/src/fixtures/mod.rs | 30 ++++ .../compat-gen/src/fixtures/synthetic.rs | 169 ++++++++++++++++++ vortex-test/compat-gen/src/fixtures/tpch.rs | 45 +++++ vortex-test/compat-gen/src/main.rs | 56 ++++++ vortex-test/compat-gen/src/manifest.rs | 10 ++ 8 files changed, 475 insertions(+) create mode 100644 vortex-test/compat-gen/Cargo.toml create mode 100644 vortex-test/compat-gen/PLAN.md create mode 100644 vortex-test/compat-gen/src/adapter.rs create mode 100644 vortex-test/compat-gen/src/fixtures/mod.rs create mode 100644 vortex-test/compat-gen/src/fixtures/synthetic.rs create mode 100644 vortex-test/compat-gen/src/fixtures/tpch.rs create mode 100644 vortex-test/compat-gen/src/main.rs create mode 100644 vortex-test/compat-gen/src/manifest.rs diff --git a/vortex-test/compat-gen/Cargo.toml b/vortex-test/compat-gen/Cargo.toml new file mode 100644 index 00000000000..5f7f6fe8c1d --- /dev/null +++ b/vortex-test/compat-gen/Cargo.toml @@ -0,0 +1,30 @@ +[package] +name = "vortex-compat" +version = "0.1.0" +edition = "2021" + +[[bin]] +name = "compat-gen" +path = "src/main.rs" + +[dependencies] +# Vortex crates (path deps — resolve to whatever version is checked out) +vortex = { path = "../../vortex", features = ["files", "tokio"] } +vortex-array = { path = "../../vortex-array" } +vortex-buffer = { path = "../../vortex-buffer" } +vortex-error = { path = "../../vortex-error" } + +# TPC-H generation +tpchgen = "2" +tpchgen-arrow = "2" +arrow-array = "57" + +# Async runtime +tokio = { version = "1", features = ["full"] } +futures = "0.3" + +# CLI + serialization +clap = { version = "4", features = ["derive"] } +serde = { version = "1", features = ["derive"] } +serde_json = "1" +chrono = { version = "0.4", features = ["serde"] } diff --git a/vortex-test/compat-gen/PLAN.md b/vortex-test/compat-gen/PLAN.md new file mode 100644 index 00000000000..672c23fdd8c --- /dev/null +++ b/vortex-test/compat-gen/PLAN.md @@ -0,0 +1,104 @@ +# Vortex File Backward Compatibility Testing — Implementation Plan + +RFC: https://github.com/vortex-data/rfcs/pull/23 + +## Overview + +A standalone crate (`vortex-test/compat-gen/`) that generates deterministic `.vortex` fixture files +for backward compatibility testing. Not a workspace member — uses path deps to workspace crates. + +## API Epochs + +The Vortex file write/read API has 3 distinct epochs. The adapter layer (`adapter.rs`) is the only +file that changes when cherry-picking to old release branches. + +| Epoch | Versions | Write API | Read (in-memory) | Session | +|-------|----------------|---------------------------------------------------------|------------------------------------------------------|---------| +| **A** | 0.36.0 | `VortexWriteOptions::default().write(sink, stream) → W` | `VortexOpenOptions::in_memory().open(buf).await?` | None | +| **B** | 0.45.0–0.52.0 | `VortexWriteOptions::default().write(sink, stream) → W` | `VortexOpenOptions::in_memory().open(buf)?` (sync) | Exists, not wired | +| **C** | 0.58.0–HEAD | `session.write_options().write(sink, stream) → WriteSummary` | `session.open_options().open_buffer(buf)?` (sync) | Central | + +### Key Breaking Changes + +- **A→B**: In-memory `open()` changed from async to sync +- **B→C**: + - `VortexWriteOptions` lost `Default`, now constructed from `VortexSession` + - `write()` return type: `W` (sink) → `WriteSummary` + - `VortexOpenOptions` lost the `FileType` generic parameter + - `in_memory().open()` → `open_options().open_buffer()` + - Scan: `into_array_iter()` → `into_array_stream()` (async restored) + +## Array Construction API Stability + +Array construction is stable across ALL versions — fixture builders need NO adaptation: + +| API | Status | +|-----|--------| +| `StructArray::try_new(field_names, fields, len, validity)` | Stable 0.36.0–HEAD | +| `PrimitiveArray::new(buffer![...], validity)` | Stable 0.36.0–HEAD | +| `buffer![1, 2, 3].into_array()` | Stable 0.36.0–HEAD | +| `VarBinArray::from(vec!["a", "b"])` | Stable 0.36.0–HEAD | +| `BoolArray::from_iter([true, false])` | Stable 0.36.0–HEAD | +| `ArrayRef::from_arrow(record_batch, false)` | Stable 0.36.0–HEAD | +| `ChunkedArray::try_new(chunks, dtype)` | Stable 0.36.0–HEAD | + +## Fixture Suite + +### Trait + +```rust +pub trait Fixture: Send + Sync { + fn name(&self) -> &str; + fn build(&self) -> Vec; +} +``` + +Returns `Vec` to support chunked fixtures naturally. + +### Synthetic Fixtures + +| File | Schema | Purpose | +|------|--------|---------| +| `primitives.vortex` | `Struct{u8, u16, u32, u64, i32, i64, f32, f64}` | Primitive round-trip | +| `strings.vortex` | `Struct{Utf8}` | String encoding | +| `booleans.vortex` | `Struct{Bool}` | Bool round-trip | +| `nullable.vortex` | `Struct{Nullable, Nullable}` | Null handling | +| `struct_nested.vortex` | `Struct{Struct{i32, Utf8}, f64}` | Nested types | +| `chunked.vortex` | Chunked `Struct{u32}` (3 x 1000 rows) | Multi-chunk files | + +### Realistic Fixtures + +| File | Source | Rows | +|------|--------|------| +| `tpch_lineitem.vortex` | TPC-H SF 0.01 | ~60K | +| `tpch_orders.vortex` | TPC-H SF 0.01 | ~15K | + +## Adapter Layer + +Only `adapter.rs` changes per epoch (~15 lines). See `src/adapter.rs` for the current (Epoch C) +implementation. The git history shows all 3 epoch variants. + +## What Changes Per Version When Cherry-Picking + +| Component | Changes? | +|-----------|----------| +| Fixture trait + registry | No | +| Fixture builders (synthetic) | No | +| Fixture builders (TPC-H) | No | +| `adapter.rs` | Yes — ~15 lines, 3 variants | +| `main.rs`, `manifest.rs` | No | +| `Cargo.toml` | No (path deps resolve to local version) | + +## Usage + +```bash +# Generate fixtures for the current version +cargo run --manifest-path vortex-test/compat-gen/Cargo.toml \ + --bin compat-gen -- --version 0.62.0 --output /tmp/fixtures/ + +# Outputs: +# /tmp/fixtures/manifest.json +# /tmp/fixtures/primitives.vortex +# /tmp/fixtures/strings.vortex +# ... +``` diff --git a/vortex-test/compat-gen/src/adapter.rs b/vortex-test/compat-gen/src/adapter.rs new file mode 100644 index 00000000000..eafc87a1ea5 --- /dev/null +++ b/vortex-test/compat-gen/src/adapter.rs @@ -0,0 +1,31 @@ +// Epoch A adapter — for Vortex v0.36.0 +// +// API at this version: +// - VortexWriteOptions::default() (no session) +// - .write(sink, stream).await returns VortexResult (the sink back) +// - ArrayStream must be Unpin + +use std::path::Path; + +use futures::stream; +use tokio::runtime::Runtime; +use vortex::file::VortexWriteOptions; +use vortex_array::stream::ArrayStreamAdapter; +use vortex_array::ArrayRef; +use vortex_error::VortexResult; + +/// Write a sequence of array chunks as a `.vortex` file. +pub fn write_file(path: &Path, chunks: Vec) -> VortexResult<()> { + let dtype = chunks[0].dtype().clone(); + let stream = ArrayStreamAdapter::new(dtype, stream::iter(chunks.into_iter().map(Ok))); + + let rt = Runtime::new().expect("failed to create tokio runtime"); + rt.block_on(async { + let file = tokio::fs::File::create(path).await.map_err(|e| { + vortex_error::vortex_err!("failed to create {}: {e}", path.display()) + })?; + // At 0.36.0, write() returns VortexResult — we discard the sink. + let _sink = VortexWriteOptions::default().write(file, stream).await?; + Ok(()) + }) +} diff --git a/vortex-test/compat-gen/src/fixtures/mod.rs b/vortex-test/compat-gen/src/fixtures/mod.rs new file mode 100644 index 00000000000..b09662bec89 --- /dev/null +++ b/vortex-test/compat-gen/src/fixtures/mod.rs @@ -0,0 +1,30 @@ +mod synthetic; +mod tpch; + +use vortex_array::ArrayRef; + +/// A deterministic fixture that produces the same arrays every time. +pub trait Fixture: Send + Sync { + /// The filename for this fixture, e.g. "primitives.vortex". + fn name(&self) -> &str; + + /// Build the expected arrays. Must be deterministic. + /// + /// Returns a `Vec` to support chunked fixtures (multiple chunks). + /// Single-array fixtures return a one-element vec. + fn build(&self) -> Vec; +} + +/// All registered fixtures. +pub fn all_fixtures() -> Vec> { + vec![ + Box::new(synthetic::PrimitivesFixture), + Box::new(synthetic::StringsFixture), + Box::new(synthetic::BooleansFixture), + Box::new(synthetic::NullableFixture), + Box::new(synthetic::StructNestedFixture), + Box::new(synthetic::ChunkedFixture), + Box::new(tpch::TpchLineitemFixture), + Box::new(tpch::TpchOrdersFixture), + ] +} diff --git a/vortex-test/compat-gen/src/fixtures/synthetic.rs b/vortex-test/compat-gen/src/fixtures/synthetic.rs new file mode 100644 index 00000000000..d29c1c5d097 --- /dev/null +++ b/vortex-test/compat-gen/src/fixtures/synthetic.rs @@ -0,0 +1,169 @@ +use vortex_array::arrays::{BoolArray, ChunkedArray, PrimitiveArray, StructArray, VarBinArray}; +use vortex_array::dtype::field_names::FieldNames; +use vortex_array::dtype::{DType, Nullability, PType}; +use vortex_array::validity::Validity; +use vortex_array::{ArrayRef, IntoArray}; +use vortex_buffer::buffer; + +use super::Fixture; + +pub struct PrimitivesFixture; + +impl Fixture for PrimitivesFixture { + fn name(&self) -> &str { + "primitives.vortex" + } + + fn build(&self) -> Vec { + let arr = StructArray::try_new( + FieldNames::from(["u8", "u16", "u32", "u64", "i32", "i64", "f32", "f64"]), + vec![ + PrimitiveArray::new(buffer![0u8, 128, 255], Validity::NonNullable).into_array(), + PrimitiveArray::new(buffer![0u16, 32768, 65535], Validity::NonNullable).into_array(), + PrimitiveArray::new(buffer![0u32, 2_147_483_648, 4_294_967_295], Validity::NonNullable).into_array(), + PrimitiveArray::new(buffer![0u64, 9_223_372_036_854_775_808, u64::MAX], Validity::NonNullable).into_array(), + PrimitiveArray::new(buffer![i32::MIN, 0i32, i32::MAX], Validity::NonNullable).into_array(), + PrimitiveArray::new(buffer![i64::MIN, 0i64, i64::MAX], Validity::NonNullable).into_array(), + PrimitiveArray::new(buffer![f32::MIN, 0.0f32, f32::MAX], Validity::NonNullable).into_array(), + PrimitiveArray::new(buffer![f64::MIN, 0.0f64, f64::MAX], Validity::NonNullable).into_array(), + ], + 3, + Validity::NonNullable, + ) + .expect("failed to build primitives fixture"); + vec![arr.into_array()] + } +} + +pub struct StringsFixture; + +impl Fixture for StringsFixture { + fn name(&self) -> &str { + "strings.vortex" + } + + fn build(&self) -> Vec { + let strings = VarBinArray::from(vec!["", "hello", "こんにちは", "\u{1f980}"]); + let arr = StructArray::try_new( + FieldNames::from(["text"]), + vec![strings.into_array()], + 4, + Validity::NonNullable, + ) + .expect("failed to build strings fixture"); + vec![arr.into_array()] + } +} + +pub struct BooleansFixture; + +impl Fixture for BooleansFixture { + fn name(&self) -> &str { + "booleans.vortex" + } + + fn build(&self) -> Vec { + let bools = BoolArray::from_iter([true, false, true, true, false]); + let arr = StructArray::try_new( + FieldNames::from(["flag"]), + vec![bools.into_array()], + 5, + Validity::NonNullable, + ) + .expect("failed to build booleans fixture"); + vec![arr.into_array()] + } +} + +pub struct NullableFixture; + +impl Fixture for NullableFixture { + fn name(&self) -> &str { + "nullable.vortex" + } + + fn build(&self) -> Vec { + let nullable_ints = PrimitiveArray::from_option_iter([ + Some(1i32), + None, + Some(42), + None, + Some(-7), + ]); + let nullable_strings = VarBinArray::from(vec![ + Some("hello"), + None, + Some("world"), + Some(""), + None, + ]); + let arr = StructArray::try_new( + FieldNames::from(["int_col", "str_col"]), + vec![nullable_ints.into_array(), nullable_strings.into_array()], + 5, + Validity::NonNullable, + ) + .expect("failed to build nullable fixture"); + vec![arr.into_array()] + } +} + +pub struct StructNestedFixture; + +impl Fixture for StructNestedFixture { + fn name(&self) -> &str { + "struct_nested.vortex" + } + + fn build(&self) -> Vec { + let inner = StructArray::try_new( + FieldNames::from(["a", "b"]), + vec![ + PrimitiveArray::new(buffer![10i32, 20, 30], Validity::NonNullable).into_array(), + VarBinArray::from(vec!["x", "y", "z"]).into_array(), + ], + 3, + Validity::NonNullable, + ) + .expect("failed to build inner struct"); + + let arr = StructArray::try_new( + FieldNames::from(["inner", "value"]), + vec![ + inner.into_array(), + PrimitiveArray::new(buffer![1.1f64, 2.2, 3.3], Validity::NonNullable).into_array(), + ], + 3, + Validity::NonNullable, + ) + .expect("failed to build struct_nested fixture"); + vec![arr.into_array()] + } +} + +pub struct ChunkedFixture; + +impl Fixture for ChunkedFixture { + fn name(&self) -> &str { + "chunked.vortex" + } + + fn build(&self) -> Vec { + // 3 chunks of 1000 rows each. Values are deterministic: chunk_idx * 1000 + row_idx. + (0u32..3) + .map(|chunk_idx| { + let values: Vec = (0u32..1000).map(|i| chunk_idx * 1000 + i).collect(); + let primitives = + PrimitiveArray::new(vortex_buffer::Buffer::from(values), Validity::NonNullable); + StructArray::try_new( + FieldNames::from(["id"]), + vec![primitives.into_array()], + 1000, + Validity::NonNullable, + ) + .expect("failed to build chunk") + .into_array() + }) + .collect() + } +} diff --git a/vortex-test/compat-gen/src/fixtures/tpch.rs b/vortex-test/compat-gen/src/fixtures/tpch.rs new file mode 100644 index 00000000000..7e341f7e85c --- /dev/null +++ b/vortex-test/compat-gen/src/fixtures/tpch.rs @@ -0,0 +1,45 @@ +use arrow_array::RecordBatch; +use tpchgen::generators::{LineItemGenerator, OrderGenerator}; +use tpchgen_arrow::RecordBatchIterator; +use vortex_array::arrow::FromArrowArray; +use vortex_array::ArrayRef; + +use super::Fixture; + +const SCALE_FACTOR: f64 = 0.01; + +fn collect_batches_as_vortex(iter: impl RecordBatchIterator) -> Vec { + let batches: Vec = iter.collect(); + batches + .into_iter() + .map(|batch| ArrayRef::from_arrow(batch, false).expect("arrow conversion failed")) + .collect() +} + +pub struct TpchLineitemFixture; + +impl Fixture for TpchLineitemFixture { + fn name(&self) -> &str { + "tpch_lineitem.vortex" + } + + fn build(&self) -> Vec { + let gen = LineItemGenerator::new(SCALE_FACTOR, 1, 1); + let arrow_iter = tpchgen_arrow::LineItemArrow::new(gen).with_batch_size(65_536); + collect_batches_as_vortex(arrow_iter) + } +} + +pub struct TpchOrdersFixture; + +impl Fixture for TpchOrdersFixture { + fn name(&self) -> &str { + "tpch_orders.vortex" + } + + fn build(&self) -> Vec { + let gen = OrderGenerator::new(SCALE_FACTOR, 1, 1); + let arrow_iter = tpchgen_arrow::OrderArrow::new(gen).with_batch_size(65_536); + collect_batches_as_vortex(arrow_iter) + } +} diff --git a/vortex-test/compat-gen/src/main.rs b/vortex-test/compat-gen/src/main.rs new file mode 100644 index 00000000000..3777fc64d12 --- /dev/null +++ b/vortex-test/compat-gen/src/main.rs @@ -0,0 +1,56 @@ +mod adapter; +mod fixtures; +mod manifest; + +use std::path::PathBuf; + +use chrono::Utc; +use clap::Parser; + +use crate::fixtures::all_fixtures; +use crate::manifest::Manifest; + +#[derive(Parser)] +#[command(name = "compat-gen", about = "Generate Vortex backward-compat fixture files")] +struct Cli { + /// Version tag for this fixture set (e.g. "0.62.0"). + #[arg(long)] + version: String, + + /// Output directory for generated fixture files. + #[arg(long)] + output: PathBuf, +} + +fn main() -> vortex_error::VortexResult<()> { + let cli = Cli::parse(); + + std::fs::create_dir_all(&cli.output) + .map_err(|e| vortex_error::vortex_err!("failed to create output dir: {e}"))?; + + let fixtures = all_fixtures(); + let mut fixture_names = Vec::with_capacity(fixtures.len()); + + for fixture in &fixtures { + let chunks = fixture.build(); + let path = cli.output.join(fixture.name()); + adapter::write_file(&path, chunks)?; + fixture_names.push(fixture.name().to_string()); + eprintln!(" wrote {}", fixture.name()); + } + + let manifest = Manifest { + version: cli.version.clone(), + generated_at: Utc::now(), + fixtures: fixture_names, + }; + let manifest_path = cli.output.join("manifest.json"); + let manifest_json = serde_json::to_string_pretty(&manifest) + .map_err(|e| vortex_error::vortex_err!("failed to serialize manifest: {e}"))?; + std::fs::write(&manifest_path, manifest_json) + .map_err(|e| vortex_error::vortex_err!("failed to write manifest: {e}"))?; + eprintln!(" wrote manifest.json"); + + eprintln!("done: {} fixtures for v{}", fixtures.len(), cli.version); + Ok(()) +} diff --git a/vortex-test/compat-gen/src/manifest.rs b/vortex-test/compat-gen/src/manifest.rs new file mode 100644 index 00000000000..6a438edc1b2 --- /dev/null +++ b/vortex-test/compat-gen/src/manifest.rs @@ -0,0 +1,10 @@ +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; + +/// Manifest listing all fixtures generated for a given version. +#[derive(Debug, Serialize, Deserialize)] +pub struct Manifest { + pub version: String, + pub generated_at: DateTime, + pub fixtures: Vec, +} From 569d38c45ebadc198199ff47b99d78b649f42f39 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Wed, 11 Mar 2026 15:27:27 +0000 Subject: [PATCH 02/13] =?UTF-8?q?feat:=20compat-gen=20Epoch=20B=20adapter?= =?UTF-8?q?=20(v0.45.0=E2=80=93v0.52.0)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Write API is the same as Epoch A but with stricter stream bounds (Send + 'static). Also has write_blocking() for sync usage. Signed-off-by: Joe Isaacs Co-Authored-By: Claude Opus 4.6 --- vortex-test/compat-gen/src/adapter.rs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/vortex-test/compat-gen/src/adapter.rs b/vortex-test/compat-gen/src/adapter.rs index eafc87a1ea5..23ff2e3e71a 100644 --- a/vortex-test/compat-gen/src/adapter.rs +++ b/vortex-test/compat-gen/src/adapter.rs @@ -1,9 +1,10 @@ -// Epoch A adapter — for Vortex v0.36.0 +// Epoch B adapter — for Vortex v0.45.0 through v0.52.0 // -// API at this version: -// - VortexWriteOptions::default() (no session) -// - .write(sink, stream).await returns VortexResult (the sink back) -// - ArrayStream must be Unpin +// API changes from Epoch A: +// - VortexWriteOptions::default() still works (no session) +// - .write(sink, stream).await still returns VortexResult +// - Stream now requires Send + 'static (not just Unpin) +// - Also has .write_blocking(sink, stream) -> VortexResult use std::path::Path; @@ -24,7 +25,8 @@ pub fn write_file(path: &Path, chunks: Vec) -> VortexResult<()> { let file = tokio::fs::File::create(path).await.map_err(|e| { vortex_error::vortex_err!("failed to create {}: {e}", path.display()) })?; - // At 0.36.0, write() returns VortexResult — we discard the sink. + // At 0.45.0–0.52.0: same as Epoch A, write() returns VortexResult. + // Stream bound changed to `S: ArrayStream + Unpin + Send + 'static`. let _sink = VortexWriteOptions::default().write(file, stream).await?; Ok(()) }) From a993c6683b92454dda5e8120a0c2107083465f9b Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Wed, 11 Mar 2026 15:27:59 +0000 Subject: [PATCH 03/13] feat: compat-gen Epoch C adapter (v0.58.0+/HEAD) Session-based API: VortexSession::default() + session.write_options(). write() now returns WriteSummary and takes &mut sink. This is the adapter that compiles against the current codebase. Signed-off-by: Joe Isaacs Co-Authored-By: Claude Opus 4.6 --- vortex-test/compat-gen/src/adapter.rs | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/vortex-test/compat-gen/src/adapter.rs b/vortex-test/compat-gen/src/adapter.rs index 23ff2e3e71a..8e844396e30 100644 --- a/vortex-test/compat-gen/src/adapter.rs +++ b/vortex-test/compat-gen/src/adapter.rs @@ -1,16 +1,17 @@ -// Epoch B adapter — for Vortex v0.45.0 through v0.52.0 +// Epoch C adapter — for Vortex v0.58.0 through HEAD // -// API changes from Epoch A: -// - VortexWriteOptions::default() still works (no session) -// - .write(sink, stream).await still returns VortexResult -// - Stream now requires Send + 'static (not just Unpin) -// - Also has .write_blocking(sink, stream) -> VortexResult +// API changes from Epoch B: +// - VortexWriteOptions no longer implements Default +// - Must construct via VortexSession: session.write_options() +// - .write(&mut sink, stream).await returns VortexResult +// - WriteOptionsSessionExt trait provides session.write_options() use std::path::Path; use futures::stream; use tokio::runtime::Runtime; -use vortex::file::VortexWriteOptions; +use vortex::file::WriteOptionsSessionExt; +use vortex::VortexSession; use vortex_array::stream::ArrayStreamAdapter; use vortex_array::ArrayRef; use vortex_error::VortexResult; @@ -20,14 +21,17 @@ pub fn write_file(path: &Path, chunks: Vec) -> VortexResult<()> { let dtype = chunks[0].dtype().clone(); let stream = ArrayStreamAdapter::new(dtype, stream::iter(chunks.into_iter().map(Ok))); + let session = VortexSession::default(); let rt = Runtime::new().expect("failed to create tokio runtime"); rt.block_on(async { - let file = tokio::fs::File::create(path).await.map_err(|e| { + let mut file = tokio::fs::File::create(path).await.map_err(|e| { vortex_error::vortex_err!("failed to create {}: {e}", path.display()) })?; - // At 0.45.0–0.52.0: same as Epoch A, write() returns VortexResult. - // Stream bound changed to `S: ArrayStream + Unpin + Send + 'static`. - let _sink = VortexWriteOptions::default().write(file, stream).await?; + // At 0.58.0+: write() returns WriteSummary, takes &mut sink. + let _summary = session + .write_options() + .write(&mut file, stream) + .await?; Ok(()) }) } From e9959fe27bb87c78d34e21de66319237cc6c1698 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Wed, 11 Mar 2026 15:47:46 +0000 Subject: [PATCH 04/13] docs: update compat-gen plan with full remaining work Adds: compat-test binary design, ClickBench fixture, per-encoding stubs, validation strategy (ChunkedArray + assert_arrays_eq!), reqwest-based HTTPS fetching, CI workflow specs, and complete code size / shared-vs-branch-specific breakdown. Signed-off-by: Joe Isaacs Co-Authored-By: Claude Opus 4.6 --- vortex-test/compat-gen/PLAN.md | 274 ++++++++++++++++++++++++++++++--- 1 file changed, 249 insertions(+), 25 deletions(-) diff --git a/vortex-test/compat-gen/PLAN.md b/vortex-test/compat-gen/PLAN.md index 672c23fdd8c..ed8195d256c 100644 --- a/vortex-test/compat-gen/PLAN.md +++ b/vortex-test/compat-gen/PLAN.md @@ -5,22 +5,26 @@ RFC: https://github.com/vortex-data/rfcs/pull/23 ## Overview A standalone crate (`vortex-test/compat-gen/`) that generates deterministic `.vortex` fixture files -for backward compatibility testing. Not a workspace member — uses path deps to workspace crates. +and validates them across versions. Not a workspace member — uses path deps to workspace crates. + +Two binaries: +- **`compat-gen`**: Build fixture arrays → write `.vortex` files + `manifest.json` +- **`compat-test`**: Fetch fixtures from S3 (plain HTTPS) → read → compare via `assert_arrays_eq!` ## API Epochs The Vortex file write/read API has 3 distinct epochs. The adapter layer (`adapter.rs`) is the only file that changes when cherry-picking to old release branches. -| Epoch | Versions | Write API | Read (in-memory) | Session | -|-------|----------------|---------------------------------------------------------|------------------------------------------------------|---------| -| **A** | 0.36.0 | `VortexWriteOptions::default().write(sink, stream) → W` | `VortexOpenOptions::in_memory().open(buf).await?` | None | -| **B** | 0.45.0–0.52.0 | `VortexWriteOptions::default().write(sink, stream) → W` | `VortexOpenOptions::in_memory().open(buf)?` (sync) | Exists, not wired | -| **C** | 0.58.0–HEAD | `session.write_options().write(sink, stream) → WriteSummary` | `session.open_options().open_buffer(buf)?` (sync) | Central | +| Epoch | Versions | Write API | Read (in-memory) | Scan output | Session | +|-------|----------------|---------------------------------------------------------|------------------------------------------------------|--------------------------|---------| +| **A** | 0.36.0 | `VortexWriteOptions::default().write(sink, stream) → W` | `VortexOpenOptions::in_memory().open(buf).await?` | `into_array_stream()` async | None | +| **B** | 0.45.0–0.52.0 | `VortexWriteOptions::default().write(sink, stream) → W` | `VortexOpenOptions::in_memory().open(buf)?` (sync) | `into_array_iter()` sync | Exists, not wired | +| **C** | 0.58.0–HEAD | `session.write_options().write(sink, stream) → WriteSummary` | `session.open_options().open_buffer(buf)?` (sync) | `into_array_stream()` async | Central | ### Key Breaking Changes -- **A→B**: In-memory `open()` changed from async to sync +- **A→B**: In-memory `open()` changed from async to sync; scan switched to sync `into_array_iter()` - **B→C**: - `VortexWriteOptions` lost `Default`, now constructed from `VortexSession` - `write()` return type: `W` (sink) → `WriteSummary` @@ -42,6 +46,26 @@ Array construction is stable across ALL versions — fixture builders need NO ad | `ArrayRef::from_arrow(record_batch, false)` | Stable 0.36.0–HEAD | | `ChunkedArray::try_new(chunks, dtype)` | Stable 0.36.0–HEAD | +## Crate Layout + +``` +vortex-test/compat-gen/ + Cargo.toml # standalone, path deps to workspace + PLAN.md + src/ + main.rs # compat-gen CLI: --version, --output + test_main.rs # compat-test CLI: --fixtures-url + adapter.rs # write_file() + read_file() — ONLY branch-specific file + manifest.rs # Manifest serde struct + validate.rs # fetch from HTTPS + assert_arrays_eq! loop + fixtures/ + mod.rs # Fixture trait + all_fixtures() registry + synthetic.rs # 6 synthetic fixtures + tpch.rs # 2 TPC-H fixtures (lineitem, orders) + clickbench.rs # ClickBench hits 1k fixture + encodings.rs # per-encoding fixture stubs (todo!()) +``` + ## Fixture Suite ### Trait @@ -53,9 +77,9 @@ pub trait Fixture: Send + Sync { } ``` -Returns `Vec` to support chunked fixtures naturally. +Returns `Vec` to support chunked fixtures. Single-array fixtures return a one-element vec. -### Synthetic Fixtures +### Synthetic Fixtures (implemented) | File | Schema | Purpose | |------|--------|---------| @@ -66,28 +90,212 @@ Returns `Vec` to support chunked fixtures naturally. | `struct_nested.vortex` | `Struct{Struct{i32, Utf8}, f64}` | Nested types | | `chunked.vortex` | Chunked `Struct{u32}` (3 x 1000 rows) | Multi-chunk files | -### Realistic Fixtures +### Realistic Fixtures (implemented) + +| File | Source | Rows | Purpose | +|------|--------|------|---------| +| `tpch_lineitem.vortex` | TPC-H SF 0.01 via `tpchgen-arrow` | ~60K | Numeric + string schema | +| `tpch_orders.vortex` | TPC-H SF 0.01 via `tpchgen-arrow` | ~15K | Date + decimal types | +| `clickbench_hits_1k.vortex` | First 1000 rows of ClickBench `hits` parquet (pinned URL) | 1000 | Wide table (105 cols) | + +### Per-Encoding Fixture Stubs (todo) + +One fixture per stable encoding to exercise encoding-specific read paths. These are stubbed with +`todo!()` until the "stable encodings" RFC defines what's frozen. -| File | Source | Rows | -|------|--------|------| -| `tpch_lineitem.vortex` | TPC-H SF 0.01 | ~60K | -| `tpch_orders.vortex` | TPC-H SF 0.01 | ~15K | +| File | Encoding | Stub? | +|------|----------|-------| +| `enc_dict.vortex` | DictArray | `todo!()` | +| `enc_runend.vortex` | RunEndArray | `todo!()` | +| `enc_constant.vortex` | ConstantArray | `todo!()` | +| `enc_sparse.vortex` | SparseArray | `todo!()` | +| `enc_alp.vortex` | ALPArray | `todo!()` | +| `enc_bitpacked.vortex` | BitPackedArray | `todo!()` | +| `enc_fsst.vortex` | FSSTArray | `todo!()` | ## Adapter Layer -Only `adapter.rs` changes per epoch (~15 lines). See `src/adapter.rs` for the current (Epoch C) -implementation. The git history shows all 3 epoch variants. +Only `adapter.rs` changes per epoch. Contains two functions: + +```rust +pub fn write_file(path: &Path, chunks: Vec) -> VortexResult<()>; +pub fn read_file(bytes: ByteBuffer) -> VortexResult>; +``` + +### Epoch A — v0.36.0 + +```rust +pub fn write_file(path: &Path, chunks: Vec) -> VortexResult<()> { + let dtype = chunks[0].dtype().clone(); + let stream = ArrayStreamAdapter::new(dtype, stream::iter(chunks.into_iter().map(Ok))); + let rt = Runtime::new().expect("tokio runtime"); + rt.block_on(async { + let file = tokio::fs::File::create(path).await?; + let _sink = VortexWriteOptions::default().write(file, stream).await?; + Ok(()) + }) +} + +pub fn read_file(bytes: ByteBuffer) -> VortexResult> { + let rt = Runtime::new().expect("tokio runtime"); + rt.block_on(async { + let file = VortexOpenOptions::in_memory().open(bytes).await?; // async + let arr = file.scan()?.into_array_stream()?.read_all().await?; + Ok(vec![arr]) + }) +} +``` + +### Epoch B — v0.45.0–v0.52.0 + +```rust +pub fn write_file(path: &Path, chunks: Vec) -> VortexResult<()> { + // identical to Epoch A — same VortexWriteOptions::default() API + // ... +} + +pub fn read_file(bytes: ByteBuffer) -> VortexResult> { + let file = VortexOpenOptions::in_memory().open(bytes)?; // sync now + let arr = file.scan()?.into_array_iter()?.read_all()?; // sync + Ok(vec![arr]) +} +``` + +### Epoch C — v0.58.0+/HEAD + +```rust +pub fn write_file(path: &Path, chunks: Vec) -> VortexResult<()> { + let session = VortexSession::default(); + let dtype = chunks[0].dtype().clone(); + let stream = ArrayStreamAdapter::new(dtype, stream::iter(chunks.into_iter().map(Ok))); + let rt = Runtime::new().expect("tokio runtime"); + rt.block_on(async { + let mut file = tokio::fs::File::create(path).await?; + let _summary = session.write_options().write(&mut file, stream).await?; + Ok(()) + }) +} + +pub fn read_file(bytes: ByteBuffer) -> VortexResult> { + let session = VortexSession::default(); + let file = session.open_options().open_buffer(bytes)?; + let rt = Runtime::new().expect("tokio runtime"); + rt.block_on(async { + let arr = file.scan()?.into_array_stream()?.read_all().await?; + Ok(vec![arr]) + }) +} +``` + +## Validation Strategy + +Comparison uses `assert_arrays_eq!` with `ChunkedArray` wrapping: + +```rust +fn validate(actual: Vec, expected: Vec) -> VortexResult<()> { + let actual_dtype = actual[0].dtype().clone(); + let expected_dtype = expected[0].dtype().clone(); + let actual_chunked = ChunkedArray::try_new(actual, actual_dtype)?; + let expected_chunked = ChunkedArray::try_new(expected, expected_dtype)?; + assert_arrays_eq!(actual_chunked, expected_chunked); + Ok(()) +} +``` + +The writer may re-chunk across versions, but `assert_arrays_eq!` compares element-by-element +so chunk boundaries don't matter. + +## Fixture Fetching + +Fixtures are stored in a public S3 bucket accessible via plain HTTPS. `compat-test` uses +`reqwest` (blocking) to fetch — no AWS SDK needed. + +``` +https://vortex-compat-fixtures.s3.amazonaws.com/v{VERSION}/manifest.json +https://vortex-compat-fixtures.s3.amazonaws.com/v{VERSION}/{fixture}.vortex +``` + +Version discovery: `compat-test` takes a `--versions` flag listing which versions to test, +or discovers them from a top-level `versions.json` in the bucket. + +## CI Workflows + +### `compat-gen-upload.yml` — on tag push or manual dispatch + +```yaml +on: + push: + tags: ["[0-9]+.[0-9]+.[0-9]+"] + workflow_dispatch: + inputs: + tag: { description: "Git tag", required: true } + +jobs: + upload-fixtures: + runs-on: ubuntu-latest + permissions: + id-token: write + steps: + - uses: actions/checkout@v4 + with: { ref: "${{ inputs.tag || github.ref_name }}" } + - uses: dtolnay/rust-toolchain@stable + - run: | + VERSION=${{ inputs.tag || github.ref_name }} + cargo run --manifest-path vortex-test/compat-gen/Cargo.toml \ + --bin compat-gen -- --version "$VERSION" --output /tmp/fixtures/ + - run: | + VERSION=${{ inputs.tag || github.ref_name }} + aws s3 cp /tmp/fixtures/ s3://vortex-compat-fixtures/v${VERSION}/ --recursive +``` + +### `compat-test-weekly.yml` — weekly + manual + +```yaml +on: + schedule: + - cron: "0 6 * * 1" + workflow_dispatch: {} + +jobs: + compat-test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable + - run: | + cargo run --manifest-path vortex-test/compat-gen/Cargo.toml \ + --bin compat-test -- \ + --fixtures-url https://vortex-compat-fixtures.s3.amazonaws.com +``` + +## Code Size Summary + +| Component | ~Lines | Shared across branches? | +|-----------|--------|------------------------| +| `Cargo.toml` | 35 | Yes | +| `src/main.rs` (compat-gen CLI) | 56 | Yes | +| `src/test_main.rs` (compat-test CLI) | 40 | Yes | +| `src/adapter.rs` (write + read) | 55 | **No — 3 epoch variants** | +| `src/manifest.rs` | 10 | Yes | +| `src/validate.rs` (fetch + compare) | 60 | Yes | +| `src/fixtures/mod.rs` (trait + registry) | 40 | Yes | +| `src/fixtures/synthetic.rs` (6 fixtures) | 170 | Yes | +| `src/fixtures/tpch.rs` (2 fixtures) | 45 | Yes | +| `src/fixtures/clickbench.rs` (1 fixture) | 50 | Yes | +| `src/fixtures/encodings.rs` (stubs) | 60 | Yes | +| CI workflows (2 YAML files) | 80 | Yes | +| **Total** | **~700** | **~645 shared (92%), ~55 branch-specific (8%)** | ## What Changes Per Version When Cherry-Picking | Component | Changes? | |-----------|----------| | Fixture trait + registry | No | -| Fixture builders (synthetic) | No | -| Fixture builders (TPC-H) | No | -| `adapter.rs` | Yes — ~15 lines, 3 variants | -| `main.rs`, `manifest.rs` | No | +| All fixture builders | No | +| `adapter.rs` | **Yes — ~55 lines, 3 variants** | +| `main.rs`, `test_main.rs`, `manifest.rs`, `validate.rs` | No | | `Cargo.toml` | No (path deps resolve to local version) | +| CI workflows | No | ## Usage @@ -96,9 +304,25 @@ implementation. The git history shows all 3 epoch variants. cargo run --manifest-path vortex-test/compat-gen/Cargo.toml \ --bin compat-gen -- --version 0.62.0 --output /tmp/fixtures/ -# Outputs: -# /tmp/fixtures/manifest.json -# /tmp/fixtures/primitives.vortex -# /tmp/fixtures/strings.vortex -# ... +# Validate fixtures from S3 against the current reader +cargo run --manifest-path vortex-test/compat-gen/Cargo.toml \ + --bin compat-test -- \ + --fixtures-url https://vortex-compat-fixtures.s3.amazonaws.com + +# Validate from a local directory (for development) +cargo run --manifest-path vortex-test/compat-gen/Cargo.toml \ + --bin compat-test -- \ + --fixtures-dir /tmp/fixtures/ ``` + +## Implementation Order + +1. ~~Fixture trait + synthetic builders~~ ✅ +2. ~~TPC-H fixtures~~ ✅ +3. ~~compat-gen binary (main.rs)~~ ✅ +4. ~~Adapter write path (3 epochs)~~ ✅ +5. ClickBench fixture +6. Per-encoding fixture stubs +7. Adapter read path (3 epochs) +8. compat-test binary (test_main.rs + validate.rs) +9. CI workflows From ec562cc33e15eb0d79c1b84e8beb59564189c3f8 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Wed, 11 Mar 2026 16:02:47 +0000 Subject: [PATCH 05/13] feat: add compat-test binary, ClickBench fixture, encoding stubs, CI workflows - adapter.rs: add read_file() for Epoch C (session-based open_buffer + async scan) - fixtures/clickbench.rs: download pinned ClickBench hits_0.parquet, take 1k rows - fixtures/encodings.rs: todo!() stubs for Dict, RunEnd, Constant, Sparse, ALP, BitPacked, FSST - test_main.rs: CLI with --fixtures-url (HTTPS) or --fixtures-dir (local) - validate.rs: fetch manifest + fixtures, compare via ChunkedArray + assert_arrays_eq! - compat-gen-upload.yml: generate + upload to S3 on tag push, updates versions.json - compat-test-weekly.yml: weekly validation of all versions against HEAD reader - Cargo.toml: add reqwest, parquet deps + compat-test binary target Signed-off-by: Joe Isaacs Co-Authored-By: Claude Opus 4.6 --- .github/workflows/compat-gen-upload.yml | 69 ++++++++ .github/workflows/compat-test-weekly.yml | 27 +++ vortex-test/compat-gen/Cargo.toml | 10 ++ vortex-test/compat-gen/src/adapter.rs | 34 ++-- .../compat-gen/src/fixtures/clickbench.rs | 43 +++++ .../compat-gen/src/fixtures/encodings.rs | 30 ++++ vortex-test/compat-gen/src/fixtures/mod.rs | 11 ++ vortex-test/compat-gen/src/test_main.rs | 88 ++++++++++ vortex-test/compat-gen/src/validate.rs | 166 ++++++++++++++++++ 9 files changed, 468 insertions(+), 10 deletions(-) create mode 100644 .github/workflows/compat-gen-upload.yml create mode 100644 .github/workflows/compat-test-weekly.yml create mode 100644 vortex-test/compat-gen/src/fixtures/clickbench.rs create mode 100644 vortex-test/compat-gen/src/fixtures/encodings.rs create mode 100644 vortex-test/compat-gen/src/test_main.rs create mode 100644 vortex-test/compat-gen/src/validate.rs diff --git a/.github/workflows/compat-gen-upload.yml b/.github/workflows/compat-gen-upload.yml new file mode 100644 index 00000000000..7317e611978 --- /dev/null +++ b/.github/workflows/compat-gen-upload.yml @@ -0,0 +1,69 @@ +name: Compat Fixture Upload + +on: + push: + tags: ["[0-9]+.[0-9]+.[0-9]+"] + workflow_dispatch: + inputs: + tag: + description: "Git tag to generate fixtures for (e.g. 0.62.0)" + required: true + +env: + S3_BUCKET: vortex-compat-fixtures + +jobs: + upload-fixtures: + runs-on: ubuntu-latest + permissions: + id-token: write + contents: read + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ github.event.inputs.tag || github.ref_name }} + + - uses: dtolnay/rust-toolchain@stable + + - uses: Swatinem/rust-cache@v2 + with: + workspaces: vortex-test/compat-gen + + - name: Generate fixtures + run: | + VERSION=${{ github.event.inputs.tag || github.ref_name }} + cargo run --manifest-path vortex-test/compat-gen/Cargo.toml \ + --release --bin compat-gen -- \ + --version "$VERSION" --output /tmp/fixtures/ + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: ${{ secrets.COMPAT_FIXTURES_ROLE_ARN }} + aws-region: us-east-1 + + - name: Upload to S3 + run: | + VERSION=${{ github.event.inputs.tag || github.ref_name }} + aws s3 cp /tmp/fixtures/ \ + "s3://${S3_BUCKET}/v${VERSION}/" --recursive + + - name: Update versions.json + run: | + VERSION=${{ github.event.inputs.tag || github.ref_name }} + # Fetch existing versions.json or start with empty array + aws s3 cp "s3://${S3_BUCKET}/versions.json" /tmp/versions.json 2>/dev/null \ + || echo '[]' > /tmp/versions.json + # Append new version if not already present, sort + python3 -c " + import json, sys + with open('/tmp/versions.json') as f: + versions = json.load(f) + v = sys.argv[1] + if v not in versions: + versions.append(v) + versions.sort(key=lambda x: list(map(int, x.split('.')))) + with open('/tmp/versions.json', 'w') as f: + json.dump(versions, f, indent=2) + " "$VERSION" + aws s3 cp /tmp/versions.json "s3://${S3_BUCKET}/versions.json" diff --git a/.github/workflows/compat-test-weekly.yml b/.github/workflows/compat-test-weekly.yml new file mode 100644 index 00000000000..3ed04482601 --- /dev/null +++ b/.github/workflows/compat-test-weekly.yml @@ -0,0 +1,27 @@ +name: Compat Test + +on: + schedule: + - cron: "0 6 * * 1" # Monday 6am UTC + workflow_dispatch: {} + +env: + FIXTURES_URL: https://vortex-compat-fixtures.s3.amazonaws.com + +jobs: + compat-test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: dtolnay/rust-toolchain@stable + + - uses: Swatinem/rust-cache@v2 + with: + workspaces: vortex-test/compat-gen + + - name: Run compat tests + run: | + cargo run --manifest-path vortex-test/compat-gen/Cargo.toml \ + --release --bin compat-test -- \ + --fixtures-url "$FIXTURES_URL" diff --git a/vortex-test/compat-gen/Cargo.toml b/vortex-test/compat-gen/Cargo.toml index 5f7f6fe8c1d..259d5f1e146 100644 --- a/vortex-test/compat-gen/Cargo.toml +++ b/vortex-test/compat-gen/Cargo.toml @@ -7,6 +7,10 @@ edition = "2021" name = "compat-gen" path = "src/main.rs" +[[bin]] +name = "compat-test" +path = "src/test_main.rs" + [dependencies] # Vortex crates (path deps — resolve to whatever version is checked out) vortex = { path = "../../vortex", features = ["files", "tokio"] } @@ -19,10 +23,16 @@ tpchgen = "2" tpchgen-arrow = "2" arrow-array = "57" +# ClickBench parquet reading +parquet = "57" + # Async runtime tokio = { version = "1", features = ["full"] } futures = "0.3" +# HTTP fetching (for ClickBench fixture + compat-test S3 downloads) +reqwest = { version = "0.12", features = ["blocking"] } + # CLI + serialization clap = { version = "4", features = ["derive"] } serde = { version = "1", features = ["derive"] } diff --git a/vortex-test/compat-gen/src/adapter.rs b/vortex-test/compat-gen/src/adapter.rs index 8e844396e30..a7e45514b1f 100644 --- a/vortex-test/compat-gen/src/adapter.rs +++ b/vortex-test/compat-gen/src/adapter.rs @@ -1,33 +1,33 @@ // Epoch C adapter — for Vortex v0.58.0 through HEAD // -// API changes from Epoch B: -// - VortexWriteOptions no longer implements Default -// - Must construct via VortexSession: session.write_options() -// - .write(&mut sink, stream).await returns VortexResult -// - WriteOptionsSessionExt trait provides session.write_options() +// Write: session.write_options(), returns WriteSummary, takes &mut sink +// Read: session.open_options().open_buffer(buf) (sync), into_array_stream() (async) use std::path::Path; use futures::stream; use tokio::runtime::Runtime; -use vortex::file::WriteOptionsSessionExt; +use vortex::file::{OpenOptionsSessionExt, WriteOptionsSessionExt}; use vortex::VortexSession; -use vortex_array::stream::ArrayStreamAdapter; +use vortex_array::stream::{ArrayStreamAdapter, ArrayStreamExt}; use vortex_array::ArrayRef; +use vortex_buffer::ByteBuffer; use vortex_error::VortexResult; +fn runtime() -> Runtime { + Runtime::new().expect("failed to create tokio runtime") +} + /// Write a sequence of array chunks as a `.vortex` file. pub fn write_file(path: &Path, chunks: Vec) -> VortexResult<()> { let dtype = chunks[0].dtype().clone(); let stream = ArrayStreamAdapter::new(dtype, stream::iter(chunks.into_iter().map(Ok))); let session = VortexSession::default(); - let rt = Runtime::new().expect("failed to create tokio runtime"); - rt.block_on(async { + runtime().block_on(async { let mut file = tokio::fs::File::create(path).await.map_err(|e| { vortex_error::vortex_err!("failed to create {}: {e}", path.display()) })?; - // At 0.58.0+: write() returns WriteSummary, takes &mut sink. let _summary = session .write_options() .write(&mut file, stream) @@ -35,3 +35,17 @@ pub fn write_file(path: &Path, chunks: Vec) -> VortexResult<()> { Ok(()) }) } + +/// Read a `.vortex` file from bytes, returning the arrays. +pub fn read_file(bytes: ByteBuffer) -> VortexResult> { + let session = VortexSession::default(); + let file = session.open_options().open_buffer(bytes)?; + runtime().block_on(async { + let arr = file + .scan()? + .into_array_stream()? + .read_all() + .await?; + Ok(vec![arr]) + }) +} diff --git a/vortex-test/compat-gen/src/fixtures/clickbench.rs b/vortex-test/compat-gen/src/fixtures/clickbench.rs new file mode 100644 index 00000000000..77051575d07 --- /dev/null +++ b/vortex-test/compat-gen/src/fixtures/clickbench.rs @@ -0,0 +1,43 @@ +use std::io::Cursor; + +use arrow_array::RecordBatch; +use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; +use vortex_array::arrow::FromArrowArray; +use vortex_array::ArrayRef; + +use super::Fixture; + +/// First partition of ClickBench hits, limited to 1000 rows. +const CLICKBENCH_URL: &str = + "https://pub-3ba949c0f0354ac18db1f0f14f0a2c52.r2.dev/clickbench/parquet_many/hits_0.parquet"; + +pub struct ClickBenchHits1kFixture; + +impl Fixture for ClickBenchHits1kFixture { + fn name(&self) -> &str { + "clickbench_hits_1k.vortex" + } + + fn build(&self) -> Vec { + let bytes = reqwest::blocking::get(CLICKBENCH_URL) + .expect("failed to download ClickBench parquet") + .bytes() + .expect("failed to read ClickBench response body"); + + let reader = ParquetRecordBatchReaderBuilder::try_new(bytes) + .expect("failed to open parquet") + .with_batch_size(1000) + .with_limit(1000) + .build() + .expect("failed to build parquet reader"); + + let batches: Vec = reader + .collect::, _>>() + .expect("failed to read parquet batches"); + + batches + .into_iter() + .map(|batch| ArrayRef::from_arrow(batch, false).expect("arrow conversion failed")) + .collect() + } +} diff --git a/vortex-test/compat-gen/src/fixtures/encodings.rs b/vortex-test/compat-gen/src/fixtures/encodings.rs new file mode 100644 index 00000000000..e76aba483a1 --- /dev/null +++ b/vortex-test/compat-gen/src/fixtures/encodings.rs @@ -0,0 +1,30 @@ +use vortex_array::ArrayRef; + +use super::Fixture; + +macro_rules! encoding_stub { + ($name:ident, $file:expr) => { + pub struct $name; + + impl Fixture for $name { + fn name(&self) -> &str { + $file + } + + fn build(&self) -> Vec { + todo!(concat!( + "blocked on stable-encodings RFC — ", + $file + )) + } + } + }; +} + +encoding_stub!(DictEncodingFixture, "enc_dict.vortex"); +encoding_stub!(RunEndEncodingFixture, "enc_runend.vortex"); +encoding_stub!(ConstantEncodingFixture, "enc_constant.vortex"); +encoding_stub!(SparseEncodingFixture, "enc_sparse.vortex"); +encoding_stub!(AlpEncodingFixture, "enc_alp.vortex"); +encoding_stub!(BitPackedEncodingFixture, "enc_bitpacked.vortex"); +encoding_stub!(FsstEncodingFixture, "enc_fsst.vortex"); diff --git a/vortex-test/compat-gen/src/fixtures/mod.rs b/vortex-test/compat-gen/src/fixtures/mod.rs index b09662bec89..b56d1e897b5 100644 --- a/vortex-test/compat-gen/src/fixtures/mod.rs +++ b/vortex-test/compat-gen/src/fixtures/mod.rs @@ -1,3 +1,5 @@ +mod clickbench; +pub mod encodings; mod synthetic; mod tpch; @@ -26,5 +28,14 @@ pub fn all_fixtures() -> Vec> { Box::new(synthetic::ChunkedFixture), Box::new(tpch::TpchLineitemFixture), Box::new(tpch::TpchOrdersFixture), + Box::new(clickbench::ClickBenchHits1kFixture), + // Encoding stubs — uncomment as stable-encodings RFC lands: + // Box::new(encodings::DictEncodingFixture), + // Box::new(encodings::RunEndEncodingFixture), + // Box::new(encodings::ConstantEncodingFixture), + // Box::new(encodings::SparseEncodingFixture), + // Box::new(encodings::AlpEncodingFixture), + // Box::new(encodings::BitPackedEncodingFixture), + // Box::new(encodings::FsstEncodingFixture), ] } diff --git a/vortex-test/compat-gen/src/test_main.rs b/vortex-test/compat-gen/src/test_main.rs new file mode 100644 index 00000000000..1eb6055b5b8 --- /dev/null +++ b/vortex-test/compat-gen/src/test_main.rs @@ -0,0 +1,88 @@ +mod adapter; +mod fixtures; +mod manifest; +mod validate; + +use std::path::PathBuf; + +use clap::Parser; +use vortex_error::VortexResult; + +use crate::validate::{discover_versions, FixtureSource}; + +#[derive(Parser)] +#[command(name = "compat-test", about = "Validate Vortex backward-compat fixtures")] +struct Cli { + /// HTTPS base URL for the fixture bucket. + /// e.g. https://vortex-compat-fixtures.s3.amazonaws.com + #[arg(long)] + fixtures_url: Option, + + /// Local directory containing fixture versions (for development). + #[arg(long)] + fixtures_dir: Option, + + /// Explicit list of versions to test (comma-separated). + /// If omitted, discovers versions from versions.json or directory listing. + #[arg(long, value_delimiter = ',')] + versions: Option>, +} + +fn main() -> VortexResult<()> { + let cli = Cli::parse(); + + let source = match (&cli.fixtures_url, &cli.fixtures_dir) { + (Some(url), None) => FixtureSource::Url(url.clone()), + (None, Some(dir)) => FixtureSource::Dir(dir.clone()), + _ => { + eprintln!("error: specify exactly one of --fixtures-url or --fixtures-dir"); + std::process::exit(1); + } + }; + + let versions = match cli.versions { + Some(v) => v, + None => { + eprintln!("discovering versions..."); + discover_versions(&source)? + } + }; + + eprintln!("testing {} version(s): {}", versions.len(), versions.join(", ")); + + let results = validate::validate_all(&source, &versions)?; + + let mut total_passed = 0; + let mut total_failed = 0; + let mut total_skipped = 0; + + for r in &results { + total_passed += r.passed; + total_failed += r.failed.len(); + total_skipped += r.skipped; + if r.failed.is_empty() { + eprintln!(" v{}: {} passed, {} skipped", r.version, r.passed, r.skipped); + } else { + eprintln!( + " v{}: {} passed, {} FAILED, {} skipped", + r.version, + r.passed, + r.failed.len(), + r.skipped + ); + for (name, err) in &r.failed { + eprintln!(" FAIL {name}: {err}"); + } + } + } + + eprintln!( + "\nresult: {total_passed} passed, {total_failed} failed, {total_skipped} skipped" + ); + + if total_failed > 0 { + std::process::exit(1); + } + + Ok(()) +} diff --git a/vortex-test/compat-gen/src/validate.rs b/vortex-test/compat-gen/src/validate.rs new file mode 100644 index 00000000000..714bf2e9f2a --- /dev/null +++ b/vortex-test/compat-gen/src/validate.rs @@ -0,0 +1,166 @@ +use std::collections::HashMap; +use std::path::{Path, PathBuf}; + +use vortex_array::arrays::ChunkedArray; +use vortex_array::{assert_arrays_eq, ArrayRef, IntoArray}; +use vortex_buffer::ByteBuffer; +use vortex_error::{vortex_bail, vortex_err, VortexResult}; + +use crate::adapter; +use crate::fixtures::{all_fixtures, Fixture}; +use crate::manifest::Manifest; + +/// Result of validating one version's fixtures. +pub struct VersionResult { + pub version: String, + pub passed: usize, + pub skipped: usize, + pub failed: Vec<(String, String)>, +} + +/// Validate all versions' fixtures against the current reader. +pub fn validate_all(source: &FixtureSource, versions: &[String]) -> VortexResult> { + let fixtures = all_fixtures(); + let fixture_map: HashMap<&str, &dyn Fixture> = fixtures + .iter() + .map(|f| (f.name(), f.as_ref())) + .collect(); + + let mut results = Vec::new(); + for version in versions { + let result = validate_version(source, version, &fixture_map)?; + results.push(result); + } + Ok(results) +} + +fn validate_version( + source: &FixtureSource, + version: &str, + fixture_map: &HashMap<&str, &dyn Fixture>, +) -> VortexResult { + let manifest = source.fetch_manifest(version)?; + let mut passed = 0; + let mut skipped = 0; + let mut failed = Vec::new(); + + for fixture_name in &manifest.fixtures { + let Some(fixture) = fixture_map.get(fixture_name.as_str()) else { + eprintln!(" warn: unknown fixture {fixture_name} in v{version}, skipping"); + skipped += 1; + continue; + }; + + eprintln!(" checking {fixture_name} from v{version}..."); + let bytes = source.fetch_fixture(version, fixture_name)?; + match validate_one(bytes, *fixture) { + Ok(()) => passed += 1, + Err(e) => { + eprintln!(" FAIL: {fixture_name} from v{version}: {e}"); + failed.push((fixture_name.clone(), e.to_string())); + } + } + } + + Ok(VersionResult { + version: version.to_string(), + passed, + skipped, + failed, + }) +} + +fn validate_one(bytes: ByteBuffer, fixture: &dyn Fixture) -> VortexResult<()> { + let actual = adapter::read_file(bytes)?; + let expected = fixture.build(); + + let actual_dtype = actual[0].dtype().clone(); + let expected_dtype = expected[0].dtype().clone(); + let actual_arr = ChunkedArray::try_new(actual, actual_dtype)?.into_array(); + let expected_arr = ChunkedArray::try_new(expected, expected_dtype)?.into_array(); + + assert_arrays_eq!(actual_arr, expected_arr); + Ok(()) +} + +/// Source for fetching fixture files — either HTTPS or local directory. +pub enum FixtureSource { + Url(String), + Dir(PathBuf), +} + +impl FixtureSource { + fn fetch_manifest(&self, version: &str) -> VortexResult { + let json = match self { + FixtureSource::Url(base) => { + let url = format!("{base}/v{version}/manifest.json"); + http_get_bytes(&url)? + } + FixtureSource::Dir(dir) => { + let path = dir.join(format!("v{version}")).join("manifest.json"); + std::fs::read(&path) + .map_err(|e| vortex_err!("failed to read {}: {e}", path.display()))? + } + }; + serde_json::from_slice(&json) + .map_err(|e| vortex_err!("failed to parse manifest for v{version}: {e}")) + } + + fn fetch_fixture(&self, version: &str, name: &str) -> VortexResult { + let bytes = match self { + FixtureSource::Url(base) => { + let url = format!("{base}/v{version}/{name}"); + http_get_bytes(&url)? + } + FixtureSource::Dir(dir) => { + let path = dir.join(format!("v{version}")).join(name); + std::fs::read(&path) + .map_err(|e| vortex_err!("failed to read {}: {e}", path.display()))? + } + }; + Ok(ByteBuffer::from(bytes)) + } +} + +/// Discover versions from a versions.json file, or from local directory listing. +pub fn discover_versions(source: &FixtureSource) -> VortexResult> { + match source { + FixtureSource::Url(base) => { + let url = format!("{base}/versions.json"); + let bytes = http_get_bytes(&url)?; + let versions: Vec = serde_json::from_slice(&bytes) + .map_err(|e| vortex_err!("failed to parse versions.json: {e}"))?; + Ok(versions) + } + FixtureSource::Dir(dir) => { + let mut versions = Vec::new(); + for entry in std::fs::read_dir(dir) + .map_err(|e| vortex_err!("failed to read dir {}: {e}", dir.display()))? + { + let entry = + entry.map_err(|e| vortex_err!("failed to read dir entry: {e}"))?; + let name = entry.file_name(); + let name = name.to_string_lossy(); + if let Some(version) = name.strip_prefix('v') { + if entry.path().join("manifest.json").exists() { + versions.push(version.to_string()); + } + } + } + versions.sort(); + Ok(versions) + } + } +} + +fn http_get_bytes(url: &str) -> VortexResult> { + let response = reqwest::blocking::get(url) + .map_err(|e| vortex_err!("HTTP request failed for {url}: {e}"))?; + if !response.status().is_success() { + vortex_bail!("HTTP {} fetching {url}", response.status()); + } + response + .bytes() + .map(|b| b.to_vec()) + .map_err(|e| vortex_err!("failed to read response body from {url}: {e}")) +} From 3533cd849861a0d886e3930079d598053306dac3 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Thu, 12 Mar 2026 11:55:13 +0000 Subject: [PATCH 06/13] wip back compat tester Signed-off-by: Joe Isaacs --- .github/workflows/compat-gen-upload.yml | 22 +- .github/workflows/compat-test-weekly.yml | 5 +- Cargo.lock | 23 ++ Cargo.toml | 2 + vortex-test/compat-gen/Cargo.toml | 50 ++- vortex-test/compat-gen/README.md | 319 ++++++++++++++++++ vortex-test/compat-gen/src/adapter.rs | 43 ++- .../compat-gen/src/fixtures/clickbench.rs | 20 +- .../compat-gen/src/fixtures/encodings.rs | 10 +- vortex-test/compat-gen/src/fixtures/mod.rs | 3 +- .../compat-gen/src/fixtures/synthetic.rs | 104 +++--- vortex-test/compat-gen/src/fixtures/tpch.rs | 22 +- vortex-test/compat-gen/src/main.rs | 7 +- vortex-test/compat-gen/src/manifest.rs | 6 +- vortex-test/compat-gen/src/test_main.rs | 23 +- vortex-test/compat-gen/src/validate.rs | 38 ++- 16 files changed, 531 insertions(+), 166 deletions(-) create mode 100644 vortex-test/compat-gen/README.md diff --git a/.github/workflows/compat-gen-upload.yml b/.github/workflows/compat-gen-upload.yml index 7317e611978..e317b43222e 100644 --- a/.github/workflows/compat-gen-upload.yml +++ b/.github/workflows/compat-gen-upload.yml @@ -1,12 +1,10 @@ name: Compat Fixture Upload on: - push: - tags: ["[0-9]+.[0-9]+.[0-9]+"] workflow_dispatch: inputs: - tag: - description: "Git tag to generate fixtures for (e.g. 0.62.0)" + version: + description: "Version to generate fixtures for (e.g. 0.62.0)" required: true env: @@ -20,21 +18,15 @@ jobs: contents: read steps: - uses: actions/checkout@v4 - with: - ref: ${{ github.event.inputs.tag || github.ref_name }} - uses: dtolnay/rust-toolchain@stable - uses: Swatinem/rust-cache@v2 - with: - workspaces: vortex-test/compat-gen - name: Generate fixtures run: | - VERSION=${{ github.event.inputs.tag || github.ref_name }} - cargo run --manifest-path vortex-test/compat-gen/Cargo.toml \ - --release --bin compat-gen -- \ - --version "$VERSION" --output /tmp/fixtures/ + cargo run -p vortex-compat --release --bin compat-gen -- \ + --version "${{ inputs.version }}" --output /tmp/fixtures/ - name: Configure AWS credentials uses: aws-actions/configure-aws-credentials@v4 @@ -44,13 +36,11 @@ jobs: - name: Upload to S3 run: | - VERSION=${{ github.event.inputs.tag || github.ref_name }} aws s3 cp /tmp/fixtures/ \ - "s3://${S3_BUCKET}/v${VERSION}/" --recursive + "s3://${S3_BUCKET}/v${{ inputs.version }}/" --recursive - name: Update versions.json run: | - VERSION=${{ github.event.inputs.tag || github.ref_name }} # Fetch existing versions.json or start with empty array aws s3 cp "s3://${S3_BUCKET}/versions.json" /tmp/versions.json 2>/dev/null \ || echo '[]' > /tmp/versions.json @@ -65,5 +55,5 @@ jobs: versions.sort(key=lambda x: list(map(int, x.split('.')))) with open('/tmp/versions.json', 'w') as f: json.dump(versions, f, indent=2) - " "$VERSION" + " "${{ inputs.version }}" aws s3 cp /tmp/versions.json "s3://${S3_BUCKET}/versions.json" diff --git a/.github/workflows/compat-test-weekly.yml b/.github/workflows/compat-test-weekly.yml index 3ed04482601..8339d67dd09 100644 --- a/.github/workflows/compat-test-weekly.yml +++ b/.github/workflows/compat-test-weekly.yml @@ -17,11 +17,8 @@ jobs: - uses: dtolnay/rust-toolchain@stable - uses: Swatinem/rust-cache@v2 - with: - workspaces: vortex-test/compat-gen - name: Run compat tests run: | - cargo run --manifest-path vortex-test/compat-gen/Cargo.toml \ - --release --bin compat-test -- \ + cargo run -p vortex-compat --release --bin compat-test -- \ --fixtures-url "$FIXTURES_URL" diff --git a/Cargo.lock b/Cargo.lock index d380a3d6229..c8fcb71680a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9848,6 +9848,29 @@ dependencies = [ "vortex-session", ] +[[package]] +name = "vortex-compat" +version = "0.1.0" +dependencies = [ + "arrow-array", + "chrono", + "clap", + "futures", + "parquet", + "reqwest", + "serde", + "serde_json", + "tokio", + "tpchgen", + "tpchgen-arrow", + "vortex", + "vortex-array", + "vortex-buffer", + "vortex-error", + "vortex-session", + "vortex-utils", +] + [[package]] name = "vortex-cub" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 0da5ee805ba..69c2a6e3867 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,6 +32,7 @@ members = [ "vortex-jni", "vortex-python", "vortex-tui", + "vortex-test/compat-gen", "vortex-test/e2e", "vortex-test/e2e-cuda", "xtask", @@ -284,6 +285,7 @@ vortex-bench = { path = "./vortex-bench", default-features = false } vortex-cuda = { path = "./vortex-cuda", default-features = false } vortex-cuda-macros = { path = "./vortex-cuda/macros" } vortex-duckdb = { path = "./vortex-duckdb", default-features = false } +vortex-compat = { path = "./vortex-test/compat-gen" } vortex-test-e2e = { path = "./vortex-test/e2e", default-features = false } vortex-test-e2e-cuda = { path = "./vortex-test/e2e-cuda", default-features = false } vortex-tui = { path = "./vortex-tui" } diff --git a/vortex-test/compat-gen/Cargo.toml b/vortex-test/compat-gen/Cargo.toml index 259d5f1e146..afa3c6d9118 100644 --- a/vortex-test/compat-gen/Cargo.toml +++ b/vortex-test/compat-gen/Cargo.toml @@ -1,7 +1,19 @@ [package] name = "vortex-compat" -version = "0.1.0" -edition = "2021" +authors = { workspace = true } +description = "Backward-compatibility fixture generation and testing for Vortex" +edition = { workspace = true } +homepage = { workspace = true } +include = { workspace = true } +keywords = { workspace = true } +license = { workspace = true } +publish = false +repository = { workspace = true } +rust-version = { workspace = true } +version = { workspace = true } + +[lints] +workspace = true [[bin]] name = "compat-gen" @@ -12,29 +24,31 @@ name = "compat-test" path = "src/test_main.rs" [dependencies] -# Vortex crates (path deps — resolve to whatever version is checked out) -vortex = { path = "../../vortex", features = ["files", "tokio"] } -vortex-array = { path = "../../vortex-array" } -vortex-buffer = { path = "../../vortex-buffer" } -vortex-error = { path = "../../vortex-error" } +# Vortex crates +vortex = { workspace = true, features = ["files", "tokio"] } +vortex-array = { workspace = true, features = ["_test-harness"] } +vortex-buffer = { workspace = true } +vortex-error = { workspace = true } +vortex-session = { workspace = true } +vortex-utils = { workspace = true } # TPC-H generation -tpchgen = "2" -tpchgen-arrow = "2" -arrow-array = "57" +tpchgen = { workspace = true } +tpchgen-arrow = { workspace = true } +arrow-array = { workspace = true } # ClickBench parquet reading -parquet = "57" +parquet = { workspace = true } # Async runtime -tokio = { version = "1", features = ["full"] } -futures = "0.3" +tokio = { workspace = true, features = ["full"] } +futures = { workspace = true } # HTTP fetching (for ClickBench fixture + compat-test S3 downloads) -reqwest = { version = "0.12", features = ["blocking"] } +reqwest = { workspace = true } # CLI + serialization -clap = { version = "4", features = ["derive"] } -serde = { version = "1", features = ["derive"] } -serde_json = "1" -chrono = { version = "0.4", features = ["serde"] } +clap = { workspace = true, features = ["derive"] } +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true } +chrono = { workspace = true, features = ["serde"] } diff --git a/vortex-test/compat-gen/README.md b/vortex-test/compat-gen/README.md new file mode 100644 index 00000000000..f28023a133e --- /dev/null +++ b/vortex-test/compat-gen/README.md @@ -0,0 +1,319 @@ +# vortex-compat: Backward-Compatibility Testing + +This crate provides two binaries that together ensure Vortex can always read files +written by older versions: + +- **`compat-gen`** — generates deterministic fixture files for a given Vortex version. +- **`compat-test`** — reads fixtures from every historical version and validates + they round-trip to the expected arrays. + +Fixtures are stored in an S3 bucket. CI uploads new fixtures on every release tag +and runs weekly validation against all prior versions. + +## First-Time Setup: Bootstrap the Bucket + +After creating the S3 bucket (see [AWS Setup](#aws-setup-one-time) below), seed it +with the first fixture set: + +```bash +# 1. Generate fixtures for the current version +cargo run -p vortex-compat --release --bin compat-gen -- \ + --version 0.62.0 --output /tmp/fixtures/ + +# 2. Upload to S3 +AWS_PROFILE=vortex-ci aws s3 cp /tmp/fixtures/ \ + s3://vortex-compat-fixtures/v0.62.0/ --recursive + +# 3. Create the initial versions.json +echo '["0.62.0"]' > /tmp/versions.json +AWS_PROFILE=vortex-ci aws s3 cp /tmp/versions.json \ + s3://vortex-compat-fixtures/versions.json + +# 4. Verify the round-trip +AWS_PROFILE=vortex-ci cargo run -p vortex-compat --release --bin compat-test -- \ + --fixtures-url https://vortex-compat-fixtures.s3.amazonaws.com +``` + +## Uploading Fixtures for a New Version + +When a new Vortex version is tagged and you want to upload its fixtures manually +(CI does this automatically on tag push): + +```bash +VERSION=0.63.0 + +# 1. Generate fixtures +cargo run -p vortex-compat --release --bin compat-gen -- \ + --version "$VERSION" --output /tmp/fixtures/ + +# 2. Upload to S3 under the new version prefix +AWS_PROFILE=vortex-ci aws s3 cp /tmp/fixtures/ \ + "s3://vortex-compat-fixtures/v${VERSION}/" --recursive + +# 3. Append the version to versions.json +AWS_PROFILE=vortex-ci aws s3 cp \ + s3://vortex-compat-fixtures/versions.json /tmp/versions.json +python3 -c " +import json, sys +with open('/tmp/versions.json') as f: + versions = json.load(f) +v = sys.argv[1] +if v not in versions: + versions.append(v) + versions.sort(key=lambda x: list(map(int, x.split('.')))) +with open('/tmp/versions.json', 'w') as f: + json.dump(versions, f, indent=2) +" "$VERSION" +AWS_PROFILE=vortex-ci aws s3 cp /tmp/versions.json \ + s3://vortex-compat-fixtures/versions.json + +# 4. Verify all versions (including the new one) +AWS_PROFILE=vortex-ci cargo run -p vortex-compat --release --bin compat-test -- \ + --fixtures-url https://vortex-compat-fixtures.s3.amazonaws.com +``` + +## Re-uploading Fixtures for an Existing Version + +If a fixture was added or changed and you need to regenerate for a version that +already exists in the bucket, the upload overwrites the existing prefix: + +```bash +VERSION=0.62.0 + +# 1. Regenerate +cargo run -p vortex-compat --release --bin compat-gen -- \ + --version "$VERSION" --output /tmp/fixtures/ + +# 2. Overwrite in S3 +AWS_PROFILE=vortex-ci aws s3 cp /tmp/fixtures/ \ + "s3://vortex-compat-fixtures/v${VERSION}/" --recursive + +# 3. Verify +AWS_PROFILE=vortex-ci cargo run -p vortex-compat --release --bin compat-test -- \ + --fixtures-url https://vortex-compat-fixtures.s3.amazonaws.com +``` + +No need to update `versions.json` — the version is already listed. + +## Local-Only Workflow + +You can skip S3 entirely and work against local directories: + +```bash +# Generate into a versioned subdirectory +cargo run -p vortex-compat --release --bin compat-gen -- \ + --version 0.62.0 --output /tmp/compat-root/v0.62.0/ + +# Validate all local versions +cargo run -p vortex-compat --release --bin compat-test -- \ + --fixtures-dir /tmp/compat-root/ +``` + +If the bucket requires authenticated access, set your AWS profile: + +```bash +AWS_PROFILE=vortex-ci cargo run -p vortex-compat --release --bin compat-test -- \ + --fixtures-url https://vortex-compat-fixtures.s3.amazonaws.com +``` + +## AWS Setup (one-time) + +All resources live in the **benchmark account (245040174862)**, region **us-east-1**. + +### 1. Create the S3 bucket + +```bash +aws s3api create-bucket \ + --bucket vortex-compat-fixtures \ + --region us-east-1 +``` + +### 2. Enable public read access + +Disable the "Block Public Access" settings that prevent a public bucket policy: + +```bash +aws s3api put-public-access-block \ + --bucket vortex-compat-fixtures \ + --public-access-block-configuration \ + BlockPublicAcls=true,IgnorePublicAcls=true,BlockPublicPolicy=false,RestrictPublicBuckets=false +``` + +Then attach a bucket policy that grants unauthenticated read: + +```bash +aws s3api put-bucket-policy \ + --bucket vortex-compat-fixtures \ + --policy '{ + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "PublicRead", + "Effect": "Allow", + "Principal": "*", + "Action": ["s3:GetObject", "s3:ListBucket"], + "Resource": [ + "arn:aws:s3:::vortex-compat-fixtures", + "arn:aws:s3:::vortex-compat-fixtures/*" + ] + } + ] + }' +``` + +### 3. Create an IAM OIDC provider for GitHub Actions + +Skip this step if the account already has a GitHub OIDC provider configured. + +```bash +aws iam create-open-id-connect-provider \ + --url https://token.actions.githubusercontent.com \ + --client-id-list sts.amazonaws.com \ + --thumbprint-list 6938fd4d98bab03faadb97b34396831e3780aea1 +``` + +### 4. Create the IAM role for CI + +Create the trust policy file (`trust-policy.json`): + +```json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": { + "Federated": "arn:aws:iam::245040174862:oidc-provider/token.actions.githubusercontent.com" + }, + "Action": "sts:AssumeRoleWithWebIdentity", + "Condition": { + "StringEquals": { + "token.actions.githubusercontent.com:aud": "sts.amazonaws.com" + }, + "StringLike": { + "token.actions.githubusercontent.com:sub": "repo:spiraldb/vortex:ref:refs/tags/*" + } + } + } + ] +} +``` + +Create the role: + +```bash +aws iam create-role \ + --role-name GitHubCompatFixturesRole \ + --assume-role-policy-document file://trust-policy.json +``` + +Attach an inline permission policy: + +```bash +aws iam put-role-policy \ + --role-name GitHubCompatFixturesRole \ + --policy-name CompatFixturesS3Access \ + --policy-document '{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:PutObject", + "s3:GetObject", + "s3:ListBucket" + ], + "Resource": [ + "arn:aws:s3:::vortex-compat-fixtures", + "arn:aws:s3:::vortex-compat-fixtures/*" + ] + } + ] + }' +``` + +### 5. Store the role ARN as a GitHub secret + +```bash +gh secret set COMPAT_FIXTURES_ROLE_ARN \ + --body "arn:aws:iam::245040174862:role/GitHubCompatFixturesRole" +``` + +## CI Workflows + +### Fixture upload (`.github/workflows/compat-gen-upload.yml`) + +Triggered via **manual dispatch** with a required `version` input (e.g. `0.62.0`). +Will be updated to also trigger on release tag pushes once the workflow is proven. + +1. Checks out the current branch +2. Runs `compat-gen --version --output /tmp/fixtures/` +3. Assumes the `GitHubCompatFixturesRole` via OIDC +4. Uploads fixtures to `s3://vortex-compat-fixtures/v/` +5. Appends the version to `versions.json` + +### Weekly compat test (`.github/workflows/compat-test-weekly.yml`) + +Runs **every Monday at 06:00 UTC** and on **manual dispatch**. + +1. Checks out `main` at HEAD +2. Runs `compat-test --fixtures-url https://vortex-compat-fixtures.s3.amazonaws.com` +3. Validates every version listed in `versions.json` + +## Fixture Suite + +| Fixture | File | Description | +|---------|------|-------------| +| Primitives | `primitives.vortex` | All numeric types (u8–u64, i32, i64, f32, f64) with min/mid/max values | +| Strings | `strings.vortex` | Variable-length strings including empty, ASCII, Unicode, and emoji | +| Booleans | `booleans.vortex` | Boolean array with mixed true/false values | +| Nullable | `nullable.vortex` | Nullable int and string columns with interleaved nulls | +| Nested Struct | `struct_nested.vortex` | Two-level nested struct (inner struct within outer struct) | +| Chunked | `chunked.vortex` | Multi-chunk file: 3 chunks of 1000 rows each | +| TPC-H Lineitem | `tpch_lineitem.vortex` | TPC-H lineitem table at scale factor 0.01 | +| TPC-H Orders | `tpch_orders.vortex` | TPC-H orders table at scale factor 0.01 | +| ClickBench Hits | `clickbench_hits_1k.vortex` | First 1000 rows of the ClickBench hits table | + +Encoding-specific fixtures (Dict, RunEnd, Constant, Sparse, ALP, BitPacked, FSST) are +stubbed and will be enabled once the stable-encodings RFC lands. + +### Adding a new fixture + +1. Create a struct implementing the `Fixture` trait in `src/fixtures/`: + ```rust + pub struct MyFixture; + impl Fixture for MyFixture { + fn name(&self) -> &str { "my_fixture.vortex" } + fn build(&self) -> VortexResult> { /* deterministic array construction */ } + } + ``` +2. Register it in `all_fixtures()` in `src/fixtures/mod.rs`. +3. Run `compat-gen` locally to verify it produces a valid file. + +The `build()` method **must be deterministic** — `compat-test` calls it to produce the +expected arrays and compares against what was read from disk. + +## Adapter Epochs + +The adapter module (`src/adapter.rs`) contains the read/write logic for the Vortex file +format. As the format API evolves across major versions, new "epochs" are introduced: + +| Epoch | Vortex Versions | Key API Surface | +|-------|----------------|-----------------| +| A | v0.36.0 | Original `VortexFileWriter` / `VortexOpenOptions` | +| B | v0.45.0 – v0.52.0 | Intermediate session-based API | +| C | v0.58.0 – HEAD | `session.write_options()` / `session.open_options().open_buffer()` | + +Only Epoch C is currently active. Earlier epochs were used during initial development +and can be resurrected by cherry-picking the adapter code onto an older release branch +if retroactive fixture generation is needed. + +### Cherry-picking to older releases + +To generate fixtures for a version in Epoch A or B: + +1. Check out the target tag (e.g. `git checkout v0.45.0`) +2. Cherry-pick the compat-gen crate: `git cherry-pick --no-commit ` +3. Swap `src/adapter.rs` to the appropriate epoch's implementation +4. Resolve any dependency mismatches in `Cargo.toml` +5. Run `compat-gen` and upload the resulting fixtures diff --git a/vortex-test/compat-gen/src/adapter.rs b/vortex-test/compat-gen/src/adapter.rs index a7e45514b1f..404787415a2 100644 --- a/vortex-test/compat-gen/src/adapter.rs +++ b/vortex-test/compat-gen/src/adapter.rs @@ -7,45 +7,44 @@ use std::path::Path; use futures::stream; use tokio::runtime::Runtime; -use vortex::file::{OpenOptionsSessionExt, WriteOptionsSessionExt}; -use vortex::VortexSession; -use vortex_array::stream::{ArrayStreamAdapter, ArrayStreamExt}; +use vortex::VortexSessionDefault; +use vortex::file::OpenOptionsSessionExt; +use vortex::file::WriteOptionsSessionExt; +use vortex::io::session::RuntimeSessionExt; use vortex_array::ArrayRef; +use vortex_array::stream::ArrayStreamAdapter; +use vortex_array::stream::ArrayStreamExt; use vortex_buffer::ByteBuffer; use vortex_error::VortexResult; +use vortex_session::VortexSession; -fn runtime() -> Runtime { - Runtime::new().expect("failed to create tokio runtime") +fn runtime() -> VortexResult { + Runtime::new().map_err(|e| vortex_error::vortex_err!("failed to create tokio runtime: {e}")) } /// Write a sequence of array chunks as a `.vortex` file. +#[allow(dead_code)] pub fn write_file(path: &Path, chunks: Vec) -> VortexResult<()> { let dtype = chunks[0].dtype().clone(); let stream = ArrayStreamAdapter::new(dtype, stream::iter(chunks.into_iter().map(Ok))); - let session = VortexSession::default(); - runtime().block_on(async { - let mut file = tokio::fs::File::create(path).await.map_err(|e| { - vortex_error::vortex_err!("failed to create {}: {e}", path.display()) - })?; - let _summary = session - .write_options() - .write(&mut file, stream) - .await?; + runtime()?.block_on(async { + let session = VortexSession::default().with_tokio(); + let mut file = tokio::fs::File::create(path) + .await + .map_err(|e| vortex_error::vortex_err!("failed to create {}: {e}", path.display()))?; + let _summary = session.write_options().write(&mut file, stream).await?; Ok(()) }) } /// Read a `.vortex` file from bytes, returning the arrays. +#[allow(dead_code)] pub fn read_file(bytes: ByteBuffer) -> VortexResult> { - let session = VortexSession::default(); - let file = session.open_options().open_buffer(bytes)?; - runtime().block_on(async { - let arr = file - .scan()? - .into_array_stream()? - .read_all() - .await?; + runtime()?.block_on(async { + let session = VortexSession::default().with_tokio(); + let file = session.open_options().open_buffer(bytes)?; + let arr = file.scan()?.into_array_stream()?.read_all().await?; Ok(vec![arr]) }) } diff --git a/vortex-test/compat-gen/src/fixtures/clickbench.rs b/vortex-test/compat-gen/src/fixtures/clickbench.rs index 77051575d07..efdd8f7bd0b 100644 --- a/vortex-test/compat-gen/src/fixtures/clickbench.rs +++ b/vortex-test/compat-gen/src/fixtures/clickbench.rs @@ -1,9 +1,9 @@ -use std::io::Cursor; - use arrow_array::RecordBatch; use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; -use vortex_array::arrow::FromArrowArray; use vortex_array::ArrayRef; +use vortex_array::arrow::FromArrowArray; +use vortex_error::VortexResult; +use vortex_error::vortex_err; use super::Fixture; @@ -18,26 +18,26 @@ impl Fixture for ClickBenchHits1kFixture { "clickbench_hits_1k.vortex" } - fn build(&self) -> Vec { + fn build(&self) -> VortexResult> { let bytes = reqwest::blocking::get(CLICKBENCH_URL) - .expect("failed to download ClickBench parquet") + .map_err(|e| vortex_err!("failed to download ClickBench parquet: {e}"))? .bytes() - .expect("failed to read ClickBench response body"); + .map_err(|e| vortex_err!("failed to read ClickBench response body: {e}"))?; let reader = ParquetRecordBatchReaderBuilder::try_new(bytes) - .expect("failed to open parquet") + .map_err(|e| vortex_err!("failed to open parquet: {e}"))? .with_batch_size(1000) .with_limit(1000) .build() - .expect("failed to build parquet reader"); + .map_err(|e| vortex_err!("failed to build parquet reader: {e}"))?; let batches: Vec = reader .collect::, _>>() - .expect("failed to read parquet batches"); + .map_err(|e| vortex_err!("failed to read parquet batches: {e}"))?; batches .into_iter() - .map(|batch| ArrayRef::from_arrow(batch, false).expect("arrow conversion failed")) + .map(|batch| ArrayRef::from_arrow(batch, false)) .collect() } } diff --git a/vortex-test/compat-gen/src/fixtures/encodings.rs b/vortex-test/compat-gen/src/fixtures/encodings.rs index e76aba483a1..cc24b9f9017 100644 --- a/vortex-test/compat-gen/src/fixtures/encodings.rs +++ b/vortex-test/compat-gen/src/fixtures/encodings.rs @@ -1,4 +1,7 @@ +#![allow(dead_code)] + use vortex_array::ArrayRef; +use vortex_error::VortexResult; use super::Fixture; @@ -11,11 +14,8 @@ macro_rules! encoding_stub { $file } - fn build(&self) -> Vec { - todo!(concat!( - "blocked on stable-encodings RFC — ", - $file - )) + fn build(&self) -> VortexResult> { + todo!(concat!("blocked on stable-encodings RFC — ", $file)) } } }; diff --git a/vortex-test/compat-gen/src/fixtures/mod.rs b/vortex-test/compat-gen/src/fixtures/mod.rs index b56d1e897b5..51e23decbbc 100644 --- a/vortex-test/compat-gen/src/fixtures/mod.rs +++ b/vortex-test/compat-gen/src/fixtures/mod.rs @@ -4,6 +4,7 @@ mod synthetic; mod tpch; use vortex_array::ArrayRef; +use vortex_error::VortexResult; /// A deterministic fixture that produces the same arrays every time. pub trait Fixture: Send + Sync { @@ -14,7 +15,7 @@ pub trait Fixture: Send + Sync { /// /// Returns a `Vec` to support chunked fixtures (multiple chunks). /// Single-array fixtures return a one-element vec. - fn build(&self) -> Vec; + fn build(&self) -> VortexResult>; } /// All registered fixtures. diff --git a/vortex-test/compat-gen/src/fixtures/synthetic.rs b/vortex-test/compat-gen/src/fixtures/synthetic.rs index d29c1c5d097..24343dfa8f2 100644 --- a/vortex-test/compat-gen/src/fixtures/synthetic.rs +++ b/vortex-test/compat-gen/src/fixtures/synthetic.rs @@ -1,9 +1,13 @@ -use vortex_array::arrays::{BoolArray, ChunkedArray, PrimitiveArray, StructArray, VarBinArray}; -use vortex_array::dtype::field_names::FieldNames; -use vortex_array::dtype::{DType, Nullability, PType}; +use vortex_array::ArrayRef; +use vortex_array::IntoArray; +use vortex_array::arrays::BoolArray; +use vortex_array::arrays::PrimitiveArray; +use vortex_array::arrays::StructArray; +use vortex_array::arrays::VarBinArray; +use vortex_array::dtype::FieldNames; use vortex_array::validity::Validity; -use vortex_array::{ArrayRef, IntoArray}; use vortex_buffer::buffer; +use vortex_error::VortexResult; use super::Fixture; @@ -14,24 +18,36 @@ impl Fixture for PrimitivesFixture { "primitives.vortex" } - fn build(&self) -> Vec { + fn build(&self) -> VortexResult> { let arr = StructArray::try_new( FieldNames::from(["u8", "u16", "u32", "u64", "i32", "i64", "f32", "f64"]), vec![ PrimitiveArray::new(buffer![0u8, 128, 255], Validity::NonNullable).into_array(), - PrimitiveArray::new(buffer![0u16, 32768, 65535], Validity::NonNullable).into_array(), - PrimitiveArray::new(buffer![0u32, 2_147_483_648, 4_294_967_295], Validity::NonNullable).into_array(), - PrimitiveArray::new(buffer![0u64, 9_223_372_036_854_775_808, u64::MAX], Validity::NonNullable).into_array(), - PrimitiveArray::new(buffer![i32::MIN, 0i32, i32::MAX], Validity::NonNullable).into_array(), - PrimitiveArray::new(buffer![i64::MIN, 0i64, i64::MAX], Validity::NonNullable).into_array(), - PrimitiveArray::new(buffer![f32::MIN, 0.0f32, f32::MAX], Validity::NonNullable).into_array(), - PrimitiveArray::new(buffer![f64::MIN, 0.0f64, f64::MAX], Validity::NonNullable).into_array(), + PrimitiveArray::new(buffer![0u16, 32768, 65535], Validity::NonNullable) + .into_array(), + PrimitiveArray::new( + buffer![0u32, 2_147_483_648, 4_294_967_295], + Validity::NonNullable, + ) + .into_array(), + PrimitiveArray::new( + buffer![0u64, 9_223_372_036_854_775_808, u64::MAX], + Validity::NonNullable, + ) + .into_array(), + PrimitiveArray::new(buffer![i32::MIN, 0i32, i32::MAX], Validity::NonNullable) + .into_array(), + PrimitiveArray::new(buffer![i64::MIN, 0i64, i64::MAX], Validity::NonNullable) + .into_array(), + PrimitiveArray::new(buffer![f32::MIN, 0.0f32, f32::MAX], Validity::NonNullable) + .into_array(), + PrimitiveArray::new(buffer![f64::MIN, 0.0f64, f64::MAX], Validity::NonNullable) + .into_array(), ], 3, Validity::NonNullable, - ) - .expect("failed to build primitives fixture"); - vec![arr.into_array()] + )?; + Ok(vec![arr.into_array()]) } } @@ -42,16 +58,15 @@ impl Fixture for StringsFixture { "strings.vortex" } - fn build(&self) -> Vec { + fn build(&self) -> VortexResult> { let strings = VarBinArray::from(vec!["", "hello", "こんにちは", "\u{1f980}"]); let arr = StructArray::try_new( FieldNames::from(["text"]), vec![strings.into_array()], 4, Validity::NonNullable, - ) - .expect("failed to build strings fixture"); - vec![arr.into_array()] + )?; + Ok(vec![arr.into_array()]) } } @@ -62,16 +77,15 @@ impl Fixture for BooleansFixture { "booleans.vortex" } - fn build(&self) -> Vec { + fn build(&self) -> VortexResult> { let bools = BoolArray::from_iter([true, false, true, true, false]); let arr = StructArray::try_new( FieldNames::from(["flag"]), vec![bools.into_array()], 5, Validity::NonNullable, - ) - .expect("failed to build booleans fixture"); - vec![arr.into_array()] + )?; + Ok(vec![arr.into_array()]) } } @@ -82,29 +96,18 @@ impl Fixture for NullableFixture { "nullable.vortex" } - fn build(&self) -> Vec { - let nullable_ints = PrimitiveArray::from_option_iter([ - Some(1i32), - None, - Some(42), - None, - Some(-7), - ]); - let nullable_strings = VarBinArray::from(vec![ - Some("hello"), - None, - Some("world"), - Some(""), - None, - ]); + fn build(&self) -> VortexResult> { + let nullable_ints = + PrimitiveArray::from_option_iter([Some(1i32), None, Some(42), None, Some(-7)]); + let nullable_strings = + VarBinArray::from(vec![Some("hello"), None, Some("world"), Some(""), None]); let arr = StructArray::try_new( FieldNames::from(["int_col", "str_col"]), vec![nullable_ints.into_array(), nullable_strings.into_array()], 5, Validity::NonNullable, - ) - .expect("failed to build nullable fixture"); - vec![arr.into_array()] + )?; + Ok(vec![arr.into_array()]) } } @@ -115,7 +118,7 @@ impl Fixture for StructNestedFixture { "struct_nested.vortex" } - fn build(&self) -> Vec { + fn build(&self) -> VortexResult> { let inner = StructArray::try_new( FieldNames::from(["a", "b"]), vec![ @@ -124,8 +127,7 @@ impl Fixture for StructNestedFixture { ], 3, Validity::NonNullable, - ) - .expect("failed to build inner struct"); + )?; let arr = StructArray::try_new( FieldNames::from(["inner", "value"]), @@ -135,9 +137,8 @@ impl Fixture for StructNestedFixture { ], 3, Validity::NonNullable, - ) - .expect("failed to build struct_nested fixture"); - vec![arr.into_array()] + )?; + Ok(vec![arr.into_array()]) } } @@ -148,21 +149,20 @@ impl Fixture for ChunkedFixture { "chunked.vortex" } - fn build(&self) -> Vec { + fn build(&self) -> VortexResult> { // 3 chunks of 1000 rows each. Values are deterministic: chunk_idx * 1000 + row_idx. (0u32..3) .map(|chunk_idx| { let values: Vec = (0u32..1000).map(|i| chunk_idx * 1000 + i).collect(); let primitives = PrimitiveArray::new(vortex_buffer::Buffer::from(values), Validity::NonNullable); - StructArray::try_new( + Ok(StructArray::try_new( FieldNames::from(["id"]), vec![primitives.into_array()], 1000, Validity::NonNullable, - ) - .expect("failed to build chunk") - .into_array() + )? + .into_array()) }) .collect() } diff --git a/vortex-test/compat-gen/src/fixtures/tpch.rs b/vortex-test/compat-gen/src/fixtures/tpch.rs index 7e341f7e85c..3485a80592a 100644 --- a/vortex-test/compat-gen/src/fixtures/tpch.rs +++ b/vortex-test/compat-gen/src/fixtures/tpch.rs @@ -1,18 +1,20 @@ use arrow_array::RecordBatch; -use tpchgen::generators::{LineItemGenerator, OrderGenerator}; +use tpchgen::generators::LineItemGenerator; +use tpchgen::generators::OrderGenerator; use tpchgen_arrow::RecordBatchIterator; -use vortex_array::arrow::FromArrowArray; use vortex_array::ArrayRef; +use vortex_array::arrow::FromArrowArray; +use vortex_error::VortexResult; use super::Fixture; const SCALE_FACTOR: f64 = 0.01; -fn collect_batches_as_vortex(iter: impl RecordBatchIterator) -> Vec { +fn collect_batches_as_vortex(iter: impl RecordBatchIterator) -> VortexResult> { let batches: Vec = iter.collect(); batches .into_iter() - .map(|batch| ArrayRef::from_arrow(batch, false).expect("arrow conversion failed")) + .map(|batch| ArrayRef::from_arrow(batch, false)) .collect() } @@ -23,9 +25,9 @@ impl Fixture for TpchLineitemFixture { "tpch_lineitem.vortex" } - fn build(&self) -> Vec { - let gen = LineItemGenerator::new(SCALE_FACTOR, 1, 1); - let arrow_iter = tpchgen_arrow::LineItemArrow::new(gen).with_batch_size(65_536); + fn build(&self) -> VortexResult> { + let generator = LineItemGenerator::new(SCALE_FACTOR, 1, 1); + let arrow_iter = tpchgen_arrow::LineItemArrow::new(generator).with_batch_size(65_536); collect_batches_as_vortex(arrow_iter) } } @@ -37,9 +39,9 @@ impl Fixture for TpchOrdersFixture { "tpch_orders.vortex" } - fn build(&self) -> Vec { - let gen = OrderGenerator::new(SCALE_FACTOR, 1, 1); - let arrow_iter = tpchgen_arrow::OrderArrow::new(gen).with_batch_size(65_536); + fn build(&self) -> VortexResult> { + let generator = OrderGenerator::new(SCALE_FACTOR, 1, 1); + let arrow_iter = tpchgen_arrow::OrderArrow::new(generator).with_batch_size(65_536); collect_batches_as_vortex(arrow_iter) } } diff --git a/vortex-test/compat-gen/src/main.rs b/vortex-test/compat-gen/src/main.rs index 3777fc64d12..435d6731fbd 100644 --- a/vortex-test/compat-gen/src/main.rs +++ b/vortex-test/compat-gen/src/main.rs @@ -11,7 +11,10 @@ use crate::fixtures::all_fixtures; use crate::manifest::Manifest; #[derive(Parser)] -#[command(name = "compat-gen", about = "Generate Vortex backward-compat fixture files")] +#[command( + name = "compat-gen", + about = "Generate Vortex backward-compat fixture files" +)] struct Cli { /// Version tag for this fixture set (e.g. "0.62.0"). #[arg(long)] @@ -32,7 +35,7 @@ fn main() -> vortex_error::VortexResult<()> { let mut fixture_names = Vec::with_capacity(fixtures.len()); for fixture in &fixtures { - let chunks = fixture.build(); + let chunks = fixture.build()?; let path = cli.output.join(fixture.name()); adapter::write_file(&path, chunks)?; fixture_names.push(fixture.name().to_string()); diff --git a/vortex-test/compat-gen/src/manifest.rs b/vortex-test/compat-gen/src/manifest.rs index 6a438edc1b2..ab799dd9a9a 100644 --- a/vortex-test/compat-gen/src/manifest.rs +++ b/vortex-test/compat-gen/src/manifest.rs @@ -1,5 +1,7 @@ -use chrono::{DateTime, Utc}; -use serde::{Deserialize, Serialize}; +use chrono::DateTime; +use chrono::Utc; +use serde::Deserialize; +use serde::Serialize; /// Manifest listing all fixtures generated for a given version. #[derive(Debug, Serialize, Deserialize)] diff --git a/vortex-test/compat-gen/src/test_main.rs b/vortex-test/compat-gen/src/test_main.rs index 1eb6055b5b8..6e30720cfc8 100644 --- a/vortex-test/compat-gen/src/test_main.rs +++ b/vortex-test/compat-gen/src/test_main.rs @@ -8,10 +8,14 @@ use std::path::PathBuf; use clap::Parser; use vortex_error::VortexResult; -use crate::validate::{discover_versions, FixtureSource}; +use crate::validate::FixtureSource; +use crate::validate::discover_versions; #[derive(Parser)] -#[command(name = "compat-test", about = "Validate Vortex backward-compat fixtures")] +#[command( + name = "compat-test", + about = "Validate Vortex backward-compat fixtures" +)] struct Cli { /// HTTPS base URL for the fixture bucket. /// e.g. https://vortex-compat-fixtures.s3.amazonaws.com @@ -48,7 +52,11 @@ fn main() -> VortexResult<()> { } }; - eprintln!("testing {} version(s): {}", versions.len(), versions.join(", ")); + eprintln!( + "testing {} version(s): {}", + versions.len(), + versions.join(", ") + ); let results = validate::validate_all(&source, &versions)?; @@ -61,7 +69,10 @@ fn main() -> VortexResult<()> { total_failed += r.failed.len(); total_skipped += r.skipped; if r.failed.is_empty() { - eprintln!(" v{}: {} passed, {} skipped", r.version, r.passed, r.skipped); + eprintln!( + " v{}: {} passed, {} skipped", + r.version, r.passed, r.skipped + ); } else { eprintln!( " v{}: {} passed, {} FAILED, {} skipped", @@ -76,9 +87,7 @@ fn main() -> VortexResult<()> { } } - eprintln!( - "\nresult: {total_passed} passed, {total_failed} failed, {total_skipped} skipped" - ); + eprintln!("\nresult: {total_passed} passed, {total_failed} failed, {total_skipped} skipped"); if total_failed > 0 { std::process::exit(1); diff --git a/vortex-test/compat-gen/src/validate.rs b/vortex-test/compat-gen/src/validate.rs index 714bf2e9f2a..58218683eea 100644 --- a/vortex-test/compat-gen/src/validate.rs +++ b/vortex-test/compat-gen/src/validate.rs @@ -1,13 +1,17 @@ -use std::collections::HashMap; -use std::path::{Path, PathBuf}; +use std::path::PathBuf; +use vortex_array::IntoArray; use vortex_array::arrays::ChunkedArray; -use vortex_array::{assert_arrays_eq, ArrayRef, IntoArray}; +use vortex_array::assert_arrays_eq; use vortex_buffer::ByteBuffer; -use vortex_error::{vortex_bail, vortex_err, VortexResult}; +use vortex_error::VortexResult; +use vortex_error::vortex_bail; +use vortex_error::vortex_err; +use vortex_utils::aliases::hash_map::HashMap; use crate::adapter; -use crate::fixtures::{all_fixtures, Fixture}; +use crate::fixtures::Fixture; +use crate::fixtures::all_fixtures; use crate::manifest::Manifest; /// Result of validating one version's fixtures. @@ -19,12 +23,13 @@ pub struct VersionResult { } /// Validate all versions' fixtures against the current reader. -pub fn validate_all(source: &FixtureSource, versions: &[String]) -> VortexResult> { +pub fn validate_all( + source: &FixtureSource, + versions: &[String], +) -> VortexResult> { let fixtures = all_fixtures(); - let fixture_map: HashMap<&str, &dyn Fixture> = fixtures - .iter() - .map(|f| (f.name(), f.as_ref())) - .collect(); + let fixture_map: HashMap<&str, &dyn Fixture> = + fixtures.iter().map(|f| (f.name(), f.as_ref())).collect(); let mut results = Vec::new(); for version in versions { @@ -72,7 +77,7 @@ fn validate_version( fn validate_one(bytes: ByteBuffer, fixture: &dyn Fixture) -> VortexResult<()> { let actual = adapter::read_file(bytes)?; - let expected = fixture.build(); + let expected = fixture.build()?; let actual_dtype = actual[0].dtype().clone(); let expected_dtype = expected[0].dtype().clone(); @@ -137,14 +142,13 @@ pub fn discover_versions(source: &FixtureSource) -> VortexResult> { for entry in std::fs::read_dir(dir) .map_err(|e| vortex_err!("failed to read dir {}: {e}", dir.display()))? { - let entry = - entry.map_err(|e| vortex_err!("failed to read dir entry: {e}"))?; + let entry = entry.map_err(|e| vortex_err!("failed to read dir entry: {e}"))?; let name = entry.file_name(); let name = name.to_string_lossy(); - if let Some(version) = name.strip_prefix('v') { - if entry.path().join("manifest.json").exists() { - versions.push(version.to_string()); - } + if let Some(version) = name.strip_prefix('v') + && entry.path().join("manifest.json").exists() + { + versions.push(version.to_string()); } } versions.sort(); From 1ecddd0b102149a7ae781b70b867dd702da1ccda Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Thu, 12 Mar 2026 13:55:47 +0000 Subject: [PATCH 07/13] wip back compat tester Signed-off-by: Joe Isaacs --- .github/workflows/compat-test-weekly.yml | 2 +- Cargo.toml | 2 +- vortex-test/compat-gen/Cargo.toml | 6 +++--- vortex-test/compat-gen/src/adapter.rs | 3 +++ vortex-test/compat-gen/src/fixtures/clickbench.rs | 3 +++ vortex-test/compat-gen/src/fixtures/encodings.rs | 3 +++ vortex-test/compat-gen/src/fixtures/mod.rs | 3 +++ vortex-test/compat-gen/src/fixtures/synthetic.rs | 3 +++ vortex-test/compat-gen/src/fixtures/tpch.rs | 3 +++ vortex-test/compat-gen/src/main.rs | 3 +++ vortex-test/compat-gen/src/manifest.rs | 3 +++ vortex-test/compat-gen/src/test_main.rs | 5 ++++- vortex-test/compat-gen/src/validate.rs | 3 +++ 13 files changed, 36 insertions(+), 6 deletions(-) diff --git a/.github/workflows/compat-test-weekly.yml b/.github/workflows/compat-test-weekly.yml index 8339d67dd09..fb05ef937dc 100644 --- a/.github/workflows/compat-test-weekly.yml +++ b/.github/workflows/compat-test-weekly.yml @@ -3,7 +3,7 @@ name: Compat Test on: schedule: - cron: "0 6 * * 1" # Monday 6am UTC - workflow_dispatch: {} + workflow_dispatch: { } env: FIXTURES_URL: https://vortex-compat-fixtures.s3.amazonaws.com diff --git a/Cargo.toml b/Cargo.toml index 69c2a6e3867..6c6aa5ab60f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -282,10 +282,10 @@ vortex-zstd = { version = "0.1.0", path = "./encodings/zstd", default-features = # No version constraints for unpublished crates. vortex-bench = { path = "./vortex-bench", default-features = false } +vortex-compat = { path = "./vortex-test/compat-gen" } vortex-cuda = { path = "./vortex-cuda", default-features = false } vortex-cuda-macros = { path = "./vortex-cuda/macros" } vortex-duckdb = { path = "./vortex-duckdb", default-features = false } -vortex-compat = { path = "./vortex-test/compat-gen" } vortex-test-e2e = { path = "./vortex-test/e2e", default-features = false } vortex-test-e2e-cuda = { path = "./vortex-test/e2e-cuda", default-features = false } vortex-tui = { path = "./vortex-tui" } diff --git a/vortex-test/compat-gen/Cargo.toml b/vortex-test/compat-gen/Cargo.toml index afa3c6d9118..1d1c7731f35 100644 --- a/vortex-test/compat-gen/Cargo.toml +++ b/vortex-test/compat-gen/Cargo.toml @@ -33,22 +33,22 @@ vortex-session = { workspace = true } vortex-utils = { workspace = true } # TPC-H generation +arrow-array = { workspace = true } tpchgen = { workspace = true } tpchgen-arrow = { workspace = true } -arrow-array = { workspace = true } # ClickBench parquet reading parquet = { workspace = true } # Async runtime -tokio = { workspace = true, features = ["full"] } futures = { workspace = true } +tokio = { workspace = true, features = ["full"] } # HTTP fetching (for ClickBench fixture + compat-test S3 downloads) reqwest = { workspace = true } # CLI + serialization +chrono = { workspace = true, features = ["serde"] } clap = { workspace = true, features = ["derive"] } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } -chrono = { workspace = true, features = ["serde"] } diff --git a/vortex-test/compat-gen/src/adapter.rs b/vortex-test/compat-gen/src/adapter.rs index 404787415a2..857cb6107e3 100644 --- a/vortex-test/compat-gen/src/adapter.rs +++ b/vortex-test/compat-gen/src/adapter.rs @@ -1,3 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + // Epoch C adapter — for Vortex v0.58.0 through HEAD // // Write: session.write_options(), returns WriteSummary, takes &mut sink diff --git a/vortex-test/compat-gen/src/fixtures/clickbench.rs b/vortex-test/compat-gen/src/fixtures/clickbench.rs index efdd8f7bd0b..ca76d1c241b 100644 --- a/vortex-test/compat-gen/src/fixtures/clickbench.rs +++ b/vortex-test/compat-gen/src/fixtures/clickbench.rs @@ -1,3 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + use arrow_array::RecordBatch; use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; use vortex_array::ArrayRef; diff --git a/vortex-test/compat-gen/src/fixtures/encodings.rs b/vortex-test/compat-gen/src/fixtures/encodings.rs index cc24b9f9017..aac1a19d7d7 100644 --- a/vortex-test/compat-gen/src/fixtures/encodings.rs +++ b/vortex-test/compat-gen/src/fixtures/encodings.rs @@ -1,3 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + #![allow(dead_code)] use vortex_array::ArrayRef; diff --git a/vortex-test/compat-gen/src/fixtures/mod.rs b/vortex-test/compat-gen/src/fixtures/mod.rs index 51e23decbbc..307ee334890 100644 --- a/vortex-test/compat-gen/src/fixtures/mod.rs +++ b/vortex-test/compat-gen/src/fixtures/mod.rs @@ -1,3 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + mod clickbench; pub mod encodings; mod synthetic; diff --git a/vortex-test/compat-gen/src/fixtures/synthetic.rs b/vortex-test/compat-gen/src/fixtures/synthetic.rs index 24343dfa8f2..1017a62d99b 100644 --- a/vortex-test/compat-gen/src/fixtures/synthetic.rs +++ b/vortex-test/compat-gen/src/fixtures/synthetic.rs @@ -1,3 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + use vortex_array::ArrayRef; use vortex_array::IntoArray; use vortex_array::arrays::BoolArray; diff --git a/vortex-test/compat-gen/src/fixtures/tpch.rs b/vortex-test/compat-gen/src/fixtures/tpch.rs index 3485a80592a..2447733e5a8 100644 --- a/vortex-test/compat-gen/src/fixtures/tpch.rs +++ b/vortex-test/compat-gen/src/fixtures/tpch.rs @@ -1,3 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + use arrow_array::RecordBatch; use tpchgen::generators::LineItemGenerator; use tpchgen::generators::OrderGenerator; diff --git a/vortex-test/compat-gen/src/main.rs b/vortex-test/compat-gen/src/main.rs index 435d6731fbd..58648369c51 100644 --- a/vortex-test/compat-gen/src/main.rs +++ b/vortex-test/compat-gen/src/main.rs @@ -1,3 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + mod adapter; mod fixtures; mod manifest; diff --git a/vortex-test/compat-gen/src/manifest.rs b/vortex-test/compat-gen/src/manifest.rs index ab799dd9a9a..51a8aa6dd33 100644 --- a/vortex-test/compat-gen/src/manifest.rs +++ b/vortex-test/compat-gen/src/manifest.rs @@ -1,3 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + use chrono::DateTime; use chrono::Utc; use serde::Deserialize; diff --git a/vortex-test/compat-gen/src/test_main.rs b/vortex-test/compat-gen/src/test_main.rs index 6e30720cfc8..e43669d8aad 100644 --- a/vortex-test/compat-gen/src/test_main.rs +++ b/vortex-test/compat-gen/src/test_main.rs @@ -1,3 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + mod adapter; mod fixtures; mod manifest; @@ -18,7 +21,7 @@ use crate::validate::discover_versions; )] struct Cli { /// HTTPS base URL for the fixture bucket. - /// e.g. https://vortex-compat-fixtures.s3.amazonaws.com + /// e.g. #[arg(long)] fixtures_url: Option, diff --git a/vortex-test/compat-gen/src/validate.rs b/vortex-test/compat-gen/src/validate.rs index 58218683eea..8f1cdbcbc6b 100644 --- a/vortex-test/compat-gen/src/validate.rs +++ b/vortex-test/compat-gen/src/validate.rs @@ -1,3 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + use std::path::PathBuf; use vortex_array::IntoArray; From 83018e2f27f590763d73a6173136a57fdffb8fe9 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Thu, 12 Mar 2026 14:15:55 +0000 Subject: [PATCH 08/13] wip back compat tester Signed-off-by: Joe Isaacs --- vortex-test/compat-gen/PLAN.md | 328 --------------------------------- 1 file changed, 328 deletions(-) delete mode 100644 vortex-test/compat-gen/PLAN.md diff --git a/vortex-test/compat-gen/PLAN.md b/vortex-test/compat-gen/PLAN.md deleted file mode 100644 index ed8195d256c..00000000000 --- a/vortex-test/compat-gen/PLAN.md +++ /dev/null @@ -1,328 +0,0 @@ -# Vortex File Backward Compatibility Testing — Implementation Plan - -RFC: https://github.com/vortex-data/rfcs/pull/23 - -## Overview - -A standalone crate (`vortex-test/compat-gen/`) that generates deterministic `.vortex` fixture files -and validates them across versions. Not a workspace member — uses path deps to workspace crates. - -Two binaries: -- **`compat-gen`**: Build fixture arrays → write `.vortex` files + `manifest.json` -- **`compat-test`**: Fetch fixtures from S3 (plain HTTPS) → read → compare via `assert_arrays_eq!` - -## API Epochs - -The Vortex file write/read API has 3 distinct epochs. The adapter layer (`adapter.rs`) is the only -file that changes when cherry-picking to old release branches. - -| Epoch | Versions | Write API | Read (in-memory) | Scan output | Session | -|-------|----------------|---------------------------------------------------------|------------------------------------------------------|--------------------------|---------| -| **A** | 0.36.0 | `VortexWriteOptions::default().write(sink, stream) → W` | `VortexOpenOptions::in_memory().open(buf).await?` | `into_array_stream()` async | None | -| **B** | 0.45.0–0.52.0 | `VortexWriteOptions::default().write(sink, stream) → W` | `VortexOpenOptions::in_memory().open(buf)?` (sync) | `into_array_iter()` sync | Exists, not wired | -| **C** | 0.58.0–HEAD | `session.write_options().write(sink, stream) → WriteSummary` | `session.open_options().open_buffer(buf)?` (sync) | `into_array_stream()` async | Central | - -### Key Breaking Changes - -- **A→B**: In-memory `open()` changed from async to sync; scan switched to sync `into_array_iter()` -- **B→C**: - - `VortexWriteOptions` lost `Default`, now constructed from `VortexSession` - - `write()` return type: `W` (sink) → `WriteSummary` - - `VortexOpenOptions` lost the `FileType` generic parameter - - `in_memory().open()` → `open_options().open_buffer()` - - Scan: `into_array_iter()` → `into_array_stream()` (async restored) - -## Array Construction API Stability - -Array construction is stable across ALL versions — fixture builders need NO adaptation: - -| API | Status | -|-----|--------| -| `StructArray::try_new(field_names, fields, len, validity)` | Stable 0.36.0–HEAD | -| `PrimitiveArray::new(buffer![...], validity)` | Stable 0.36.0–HEAD | -| `buffer![1, 2, 3].into_array()` | Stable 0.36.0–HEAD | -| `VarBinArray::from(vec!["a", "b"])` | Stable 0.36.0–HEAD | -| `BoolArray::from_iter([true, false])` | Stable 0.36.0–HEAD | -| `ArrayRef::from_arrow(record_batch, false)` | Stable 0.36.0–HEAD | -| `ChunkedArray::try_new(chunks, dtype)` | Stable 0.36.0–HEAD | - -## Crate Layout - -``` -vortex-test/compat-gen/ - Cargo.toml # standalone, path deps to workspace - PLAN.md - src/ - main.rs # compat-gen CLI: --version, --output - test_main.rs # compat-test CLI: --fixtures-url - adapter.rs # write_file() + read_file() — ONLY branch-specific file - manifest.rs # Manifest serde struct - validate.rs # fetch from HTTPS + assert_arrays_eq! loop - fixtures/ - mod.rs # Fixture trait + all_fixtures() registry - synthetic.rs # 6 synthetic fixtures - tpch.rs # 2 TPC-H fixtures (lineitem, orders) - clickbench.rs # ClickBench hits 1k fixture - encodings.rs # per-encoding fixture stubs (todo!()) -``` - -## Fixture Suite - -### Trait - -```rust -pub trait Fixture: Send + Sync { - fn name(&self) -> &str; - fn build(&self) -> Vec; -} -``` - -Returns `Vec` to support chunked fixtures. Single-array fixtures return a one-element vec. - -### Synthetic Fixtures (implemented) - -| File | Schema | Purpose | -|------|--------|---------| -| `primitives.vortex` | `Struct{u8, u16, u32, u64, i32, i64, f32, f64}` | Primitive round-trip | -| `strings.vortex` | `Struct{Utf8}` | String encoding | -| `booleans.vortex` | `Struct{Bool}` | Bool round-trip | -| `nullable.vortex` | `Struct{Nullable, Nullable}` | Null handling | -| `struct_nested.vortex` | `Struct{Struct{i32, Utf8}, f64}` | Nested types | -| `chunked.vortex` | Chunked `Struct{u32}` (3 x 1000 rows) | Multi-chunk files | - -### Realistic Fixtures (implemented) - -| File | Source | Rows | Purpose | -|------|--------|------|---------| -| `tpch_lineitem.vortex` | TPC-H SF 0.01 via `tpchgen-arrow` | ~60K | Numeric + string schema | -| `tpch_orders.vortex` | TPC-H SF 0.01 via `tpchgen-arrow` | ~15K | Date + decimal types | -| `clickbench_hits_1k.vortex` | First 1000 rows of ClickBench `hits` parquet (pinned URL) | 1000 | Wide table (105 cols) | - -### Per-Encoding Fixture Stubs (todo) - -One fixture per stable encoding to exercise encoding-specific read paths. These are stubbed with -`todo!()` until the "stable encodings" RFC defines what's frozen. - -| File | Encoding | Stub? | -|------|----------|-------| -| `enc_dict.vortex` | DictArray | `todo!()` | -| `enc_runend.vortex` | RunEndArray | `todo!()` | -| `enc_constant.vortex` | ConstantArray | `todo!()` | -| `enc_sparse.vortex` | SparseArray | `todo!()` | -| `enc_alp.vortex` | ALPArray | `todo!()` | -| `enc_bitpacked.vortex` | BitPackedArray | `todo!()` | -| `enc_fsst.vortex` | FSSTArray | `todo!()` | - -## Adapter Layer - -Only `adapter.rs` changes per epoch. Contains two functions: - -```rust -pub fn write_file(path: &Path, chunks: Vec) -> VortexResult<()>; -pub fn read_file(bytes: ByteBuffer) -> VortexResult>; -``` - -### Epoch A — v0.36.0 - -```rust -pub fn write_file(path: &Path, chunks: Vec) -> VortexResult<()> { - let dtype = chunks[0].dtype().clone(); - let stream = ArrayStreamAdapter::new(dtype, stream::iter(chunks.into_iter().map(Ok))); - let rt = Runtime::new().expect("tokio runtime"); - rt.block_on(async { - let file = tokio::fs::File::create(path).await?; - let _sink = VortexWriteOptions::default().write(file, stream).await?; - Ok(()) - }) -} - -pub fn read_file(bytes: ByteBuffer) -> VortexResult> { - let rt = Runtime::new().expect("tokio runtime"); - rt.block_on(async { - let file = VortexOpenOptions::in_memory().open(bytes).await?; // async - let arr = file.scan()?.into_array_stream()?.read_all().await?; - Ok(vec![arr]) - }) -} -``` - -### Epoch B — v0.45.0–v0.52.0 - -```rust -pub fn write_file(path: &Path, chunks: Vec) -> VortexResult<()> { - // identical to Epoch A — same VortexWriteOptions::default() API - // ... -} - -pub fn read_file(bytes: ByteBuffer) -> VortexResult> { - let file = VortexOpenOptions::in_memory().open(bytes)?; // sync now - let arr = file.scan()?.into_array_iter()?.read_all()?; // sync - Ok(vec![arr]) -} -``` - -### Epoch C — v0.58.0+/HEAD - -```rust -pub fn write_file(path: &Path, chunks: Vec) -> VortexResult<()> { - let session = VortexSession::default(); - let dtype = chunks[0].dtype().clone(); - let stream = ArrayStreamAdapter::new(dtype, stream::iter(chunks.into_iter().map(Ok))); - let rt = Runtime::new().expect("tokio runtime"); - rt.block_on(async { - let mut file = tokio::fs::File::create(path).await?; - let _summary = session.write_options().write(&mut file, stream).await?; - Ok(()) - }) -} - -pub fn read_file(bytes: ByteBuffer) -> VortexResult> { - let session = VortexSession::default(); - let file = session.open_options().open_buffer(bytes)?; - let rt = Runtime::new().expect("tokio runtime"); - rt.block_on(async { - let arr = file.scan()?.into_array_stream()?.read_all().await?; - Ok(vec![arr]) - }) -} -``` - -## Validation Strategy - -Comparison uses `assert_arrays_eq!` with `ChunkedArray` wrapping: - -```rust -fn validate(actual: Vec, expected: Vec) -> VortexResult<()> { - let actual_dtype = actual[0].dtype().clone(); - let expected_dtype = expected[0].dtype().clone(); - let actual_chunked = ChunkedArray::try_new(actual, actual_dtype)?; - let expected_chunked = ChunkedArray::try_new(expected, expected_dtype)?; - assert_arrays_eq!(actual_chunked, expected_chunked); - Ok(()) -} -``` - -The writer may re-chunk across versions, but `assert_arrays_eq!` compares element-by-element -so chunk boundaries don't matter. - -## Fixture Fetching - -Fixtures are stored in a public S3 bucket accessible via plain HTTPS. `compat-test` uses -`reqwest` (blocking) to fetch — no AWS SDK needed. - -``` -https://vortex-compat-fixtures.s3.amazonaws.com/v{VERSION}/manifest.json -https://vortex-compat-fixtures.s3.amazonaws.com/v{VERSION}/{fixture}.vortex -``` - -Version discovery: `compat-test` takes a `--versions` flag listing which versions to test, -or discovers them from a top-level `versions.json` in the bucket. - -## CI Workflows - -### `compat-gen-upload.yml` — on tag push or manual dispatch - -```yaml -on: - push: - tags: ["[0-9]+.[0-9]+.[0-9]+"] - workflow_dispatch: - inputs: - tag: { description: "Git tag", required: true } - -jobs: - upload-fixtures: - runs-on: ubuntu-latest - permissions: - id-token: write - steps: - - uses: actions/checkout@v4 - with: { ref: "${{ inputs.tag || github.ref_name }}" } - - uses: dtolnay/rust-toolchain@stable - - run: | - VERSION=${{ inputs.tag || github.ref_name }} - cargo run --manifest-path vortex-test/compat-gen/Cargo.toml \ - --bin compat-gen -- --version "$VERSION" --output /tmp/fixtures/ - - run: | - VERSION=${{ inputs.tag || github.ref_name }} - aws s3 cp /tmp/fixtures/ s3://vortex-compat-fixtures/v${VERSION}/ --recursive -``` - -### `compat-test-weekly.yml` — weekly + manual - -```yaml -on: - schedule: - - cron: "0 6 * * 1" - workflow_dispatch: {} - -jobs: - compat-test: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: dtolnay/rust-toolchain@stable - - run: | - cargo run --manifest-path vortex-test/compat-gen/Cargo.toml \ - --bin compat-test -- \ - --fixtures-url https://vortex-compat-fixtures.s3.amazonaws.com -``` - -## Code Size Summary - -| Component | ~Lines | Shared across branches? | -|-----------|--------|------------------------| -| `Cargo.toml` | 35 | Yes | -| `src/main.rs` (compat-gen CLI) | 56 | Yes | -| `src/test_main.rs` (compat-test CLI) | 40 | Yes | -| `src/adapter.rs` (write + read) | 55 | **No — 3 epoch variants** | -| `src/manifest.rs` | 10 | Yes | -| `src/validate.rs` (fetch + compare) | 60 | Yes | -| `src/fixtures/mod.rs` (trait + registry) | 40 | Yes | -| `src/fixtures/synthetic.rs` (6 fixtures) | 170 | Yes | -| `src/fixtures/tpch.rs` (2 fixtures) | 45 | Yes | -| `src/fixtures/clickbench.rs` (1 fixture) | 50 | Yes | -| `src/fixtures/encodings.rs` (stubs) | 60 | Yes | -| CI workflows (2 YAML files) | 80 | Yes | -| **Total** | **~700** | **~645 shared (92%), ~55 branch-specific (8%)** | - -## What Changes Per Version When Cherry-Picking - -| Component | Changes? | -|-----------|----------| -| Fixture trait + registry | No | -| All fixture builders | No | -| `adapter.rs` | **Yes — ~55 lines, 3 variants** | -| `main.rs`, `test_main.rs`, `manifest.rs`, `validate.rs` | No | -| `Cargo.toml` | No (path deps resolve to local version) | -| CI workflows | No | - -## Usage - -```bash -# Generate fixtures for the current version -cargo run --manifest-path vortex-test/compat-gen/Cargo.toml \ - --bin compat-gen -- --version 0.62.0 --output /tmp/fixtures/ - -# Validate fixtures from S3 against the current reader -cargo run --manifest-path vortex-test/compat-gen/Cargo.toml \ - --bin compat-test -- \ - --fixtures-url https://vortex-compat-fixtures.s3.amazonaws.com - -# Validate from a local directory (for development) -cargo run --manifest-path vortex-test/compat-gen/Cargo.toml \ - --bin compat-test -- \ - --fixtures-dir /tmp/fixtures/ -``` - -## Implementation Order - -1. ~~Fixture trait + synthetic builders~~ ✅ -2. ~~TPC-H fixtures~~ ✅ -3. ~~compat-gen binary (main.rs)~~ ✅ -4. ~~Adapter write path (3 epochs)~~ ✅ -5. ClickBench fixture -6. Per-encoding fixture stubs -7. Adapter read path (3 epochs) -8. compat-test binary (test_main.rs + validate.rs) -9. CI workflows From b9a2fa8cf701f57e671096d2420ffa761cae362b Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Thu, 12 Mar 2026 14:30:49 +0000 Subject: [PATCH 09/13] wip back compat tester Signed-off-by: Joe Isaacs --- .github/workflows/compat-gen-upload.yml | 15 ++------ vortex-test/compat-gen/README.md | 32 +++++++++++++++-- .../scripts/update-versions-json.py | 34 +++++++++++++++++++ 3 files changed, 65 insertions(+), 16 deletions(-) create mode 100755 vortex-test/compat-gen/scripts/update-versions-json.py diff --git a/.github/workflows/compat-gen-upload.yml b/.github/workflows/compat-gen-upload.yml index e317b43222e..1b0f8699c2c 100644 --- a/.github/workflows/compat-gen-upload.yml +++ b/.github/workflows/compat-gen-upload.yml @@ -41,19 +41,8 @@ jobs: - name: Update versions.json run: | - # Fetch existing versions.json or start with empty array aws s3 cp "s3://${S3_BUCKET}/versions.json" /tmp/versions.json 2>/dev/null \ || echo '[]' > /tmp/versions.json - # Append new version if not already present, sort - python3 -c " - import json, sys - with open('/tmp/versions.json') as f: - versions = json.load(f) - v = sys.argv[1] - if v not in versions: - versions.append(v) - versions.sort(key=lambda x: list(map(int, x.split('.')))) - with open('/tmp/versions.json', 'w') as f: - json.dump(versions, f, indent=2) - " "${{ inputs.version }}" + python3 vortex-test/compat-gen/scripts/update-versions-json.py \ + /tmp/versions.json "${{ inputs.version }}" aws s3 cp /tmp/versions.json "s3://${S3_BUCKET}/versions.json" diff --git a/vortex-test/compat-gen/README.md b/vortex-test/compat-gen/README.md index f28023a133e..7359b03e0f7 100644 --- a/vortex-test/compat-gen/README.md +++ b/vortex-test/compat-gen/README.md @@ -10,6 +10,30 @@ written by older versions: Fixtures are stored in an S3 bucket. CI uploads new fixtures on every release tag and runs weekly validation against all prior versions. +## Fixture Contract + +Fixtures are the unit of backward-compatibility. Each fixture is a named file +(e.g. `primitives.vortex`) whose contents are defined by a deterministic `build()` +method. The following rules apply: + +- **Immutable data.** Once a fixture's `build()` is defined, its output (columns, + values, nulls, ordering) must never change. Every version that includes that + fixture must produce byte-for-byte identical logical arrays. `compat-test` + validates this by rebuilding expected arrays from `build()` and comparing them + against what was read from the stored file. + +- **New capabilities get new files.** To test a new encoding, data type, or + structural pattern, add a new fixture with a new filename. Never modify an + existing fixture to cover new ground. + +- **Older versions have fewer fixtures.** Each version's `manifest.json` lists + which fixtures were generated for that version. `compat-test` only validates + the fixtures listed in the manifest — it skips any fixture that didn't exist + at that version. + +- **`versions.json`** is the top-level index listing every version that has + uploaded fixtures. `compat-test` iterates over all listed versions. + ## First-Time Setup: Bootstrap the Bucket After creating the S3 bucket (see [AWS Setup](#aws-setup-one-time) below), seed it @@ -279,6 +303,9 @@ stubbed and will be enabled once the stable-encodings RFC lands. ### Adding a new fixture +New encodings, data types, or structural patterns always get a **new fixture file**. +Never modify an existing fixture's `build()` output (see [Fixture Contract](#fixture-contract)). + 1. Create a struct implementing the `Fixture` trait in `src/fixtures/`: ```rust pub struct MyFixture; @@ -289,9 +316,8 @@ stubbed and will be enabled once the stable-encodings RFC lands. ``` 2. Register it in `all_fixtures()` in `src/fixtures/mod.rs`. 3. Run `compat-gen` locally to verify it produces a valid file. - -The `build()` method **must be deterministic** — `compat-test` calls it to produce the -expected arrays and compares against what was read from disk. +4. Upload fixtures for the current version — the new file will appear in that + version's `manifest.json`. Older versions are unaffected. ## Adapter Epochs diff --git a/vortex-test/compat-gen/scripts/update-versions-json.py b/vortex-test/compat-gen/scripts/update-versions-json.py new file mode 100755 index 00000000000..d12dc2cd27c --- /dev/null +++ b/vortex-test/compat-gen/scripts/update-versions-json.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright the Vortex contributors + +"""Append a version to versions.json if not already present, keeping sorted order.""" + +import json +import sys + + +def main(): + if len(sys.argv) != 3: + print(f"Usage: {sys.argv[0]} ", file=sys.stderr) + sys.exit(1) + + path, version = sys.argv[1], sys.argv[2] + + try: + with open(path) as f: + versions = json.load(f) + except FileNotFoundError: + versions = [] + + if version not in versions: + versions.append(version) + versions.sort(key=lambda x: list(map(int, x.split(".")))) + + with open(path, "w") as f: + json.dump(versions, f, indent=2) + f.write("\n") + + +if __name__ == "__main__": + main() From f899256d5047c9418dcd345795ae66f2ff3d4ff2 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Thu, 12 Mar 2026 14:47:47 +0000 Subject: [PATCH 10/13] wip back compat tester Signed-off-by: Joe Isaacs --- .github/workflows/compat-gen-upload.yml | 2 +- .github/workflows/compat-test-weekly.yml | 2 +- vortex-test/compat-gen/Cargo.toml | 6 ++-- vortex-test/compat-gen/README.md | 32 +++++++++---------- vortex-test/compat-gen/src/adapter.rs | 2 -- vortex-test/compat-gen/src/lib.rs | 7 ++++ vortex-test/compat-gen/src/main.rs | 19 ++++------- .../src/{test_main.rs => validate_main.rs} | 18 +++-------- 8 files changed, 39 insertions(+), 49 deletions(-) create mode 100644 vortex-test/compat-gen/src/lib.rs rename vortex-test/compat-gen/src/{test_main.rs => validate_main.rs} (88%) diff --git a/.github/workflows/compat-gen-upload.yml b/.github/workflows/compat-gen-upload.yml index 1b0f8699c2c..055e0ccf3ff 100644 --- a/.github/workflows/compat-gen-upload.yml +++ b/.github/workflows/compat-gen-upload.yml @@ -25,7 +25,7 @@ jobs: - name: Generate fixtures run: | - cargo run -p vortex-compat --release --bin compat-gen -- \ + cargo run -p vortex-compat --release --bin gen -- \ --version "${{ inputs.version }}" --output /tmp/fixtures/ - name: Configure AWS credentials diff --git a/.github/workflows/compat-test-weekly.yml b/.github/workflows/compat-test-weekly.yml index fb05ef937dc..7766bffe7a4 100644 --- a/.github/workflows/compat-test-weekly.yml +++ b/.github/workflows/compat-test-weekly.yml @@ -20,5 +20,5 @@ jobs: - name: Run compat tests run: | - cargo run -p vortex-compat --release --bin compat-test -- \ + cargo run -p vortex-compat --release --bin validate -- \ --fixtures-url "$FIXTURES_URL" diff --git a/vortex-test/compat-gen/Cargo.toml b/vortex-test/compat-gen/Cargo.toml index 1d1c7731f35..c1e11d4fbb2 100644 --- a/vortex-test/compat-gen/Cargo.toml +++ b/vortex-test/compat-gen/Cargo.toml @@ -16,12 +16,12 @@ version = { workspace = true } workspace = true [[bin]] -name = "compat-gen" +name = "gen" path = "src/main.rs" [[bin]] -name = "compat-test" -path = "src/test_main.rs" +name = "validate" +path = "src/validate_main.rs" [dependencies] # Vortex crates diff --git a/vortex-test/compat-gen/README.md b/vortex-test/compat-gen/README.md index 7359b03e0f7..5fbb624a314 100644 --- a/vortex-test/compat-gen/README.md +++ b/vortex-test/compat-gen/README.md @@ -3,8 +3,8 @@ This crate provides two binaries that together ensure Vortex can always read files written by older versions: -- **`compat-gen`** — generates deterministic fixture files for a given Vortex version. -- **`compat-test`** — reads fixtures from every historical version and validates +- **`gen`** — generates deterministic fixture files for a given Vortex version. +- **`validate`** — reads fixtures from every historical version and validates they round-trip to the expected arrays. Fixtures are stored in an S3 bucket. CI uploads new fixtures on every release tag @@ -18,7 +18,7 @@ method. The following rules apply: - **Immutable data.** Once a fixture's `build()` is defined, its output (columns, values, nulls, ordering) must never change. Every version that includes that - fixture must produce byte-for-byte identical logical arrays. `compat-test` + fixture must produce byte-for-byte identical logical arrays. `validate` validates this by rebuilding expected arrays from `build()` and comparing them against what was read from the stored file. @@ -27,12 +27,12 @@ method. The following rules apply: existing fixture to cover new ground. - **Older versions have fewer fixtures.** Each version's `manifest.json` lists - which fixtures were generated for that version. `compat-test` only validates + which fixtures were generated for that version. `validate` only validates the fixtures listed in the manifest — it skips any fixture that didn't exist at that version. - **`versions.json`** is the top-level index listing every version that has - uploaded fixtures. `compat-test` iterates over all listed versions. + uploaded fixtures. `validate` iterates over all listed versions. ## First-Time Setup: Bootstrap the Bucket @@ -41,7 +41,7 @@ with the first fixture set: ```bash # 1. Generate fixtures for the current version -cargo run -p vortex-compat --release --bin compat-gen -- \ +cargo run -p vortex-compat --release --bin gen -- \ --version 0.62.0 --output /tmp/fixtures/ # 2. Upload to S3 @@ -54,7 +54,7 @@ AWS_PROFILE=vortex-ci aws s3 cp /tmp/versions.json \ s3://vortex-compat-fixtures/versions.json # 4. Verify the round-trip -AWS_PROFILE=vortex-ci cargo run -p vortex-compat --release --bin compat-test -- \ +AWS_PROFILE=vortex-ci cargo run -p vortex-compat --release --bin validate -- \ --fixtures-url https://vortex-compat-fixtures.s3.amazonaws.com ``` @@ -67,7 +67,7 @@ When a new Vortex version is tagged and you want to upload its fixtures manually VERSION=0.63.0 # 1. Generate fixtures -cargo run -p vortex-compat --release --bin compat-gen -- \ +cargo run -p vortex-compat --release --bin gen -- \ --version "$VERSION" --output /tmp/fixtures/ # 2. Upload to S3 under the new version prefix @@ -92,7 +92,7 @@ AWS_PROFILE=vortex-ci aws s3 cp /tmp/versions.json \ s3://vortex-compat-fixtures/versions.json # 4. Verify all versions (including the new one) -AWS_PROFILE=vortex-ci cargo run -p vortex-compat --release --bin compat-test -- \ +AWS_PROFILE=vortex-ci cargo run -p vortex-compat --release --bin validate -- \ --fixtures-url https://vortex-compat-fixtures.s3.amazonaws.com ``` @@ -105,7 +105,7 @@ already exists in the bucket, the upload overwrites the existing prefix: VERSION=0.62.0 # 1. Regenerate -cargo run -p vortex-compat --release --bin compat-gen -- \ +cargo run -p vortex-compat --release --bin gen -- \ --version "$VERSION" --output /tmp/fixtures/ # 2. Overwrite in S3 @@ -113,7 +113,7 @@ AWS_PROFILE=vortex-ci aws s3 cp /tmp/fixtures/ \ "s3://vortex-compat-fixtures/v${VERSION}/" --recursive # 3. Verify -AWS_PROFILE=vortex-ci cargo run -p vortex-compat --release --bin compat-test -- \ +AWS_PROFILE=vortex-ci cargo run -p vortex-compat --release --bin validate -- \ --fixtures-url https://vortex-compat-fixtures.s3.amazonaws.com ``` @@ -125,18 +125,18 @@ You can skip S3 entirely and work against local directories: ```bash # Generate into a versioned subdirectory -cargo run -p vortex-compat --release --bin compat-gen -- \ +cargo run -p vortex-compat --release --bin gen -- \ --version 0.62.0 --output /tmp/compat-root/v0.62.0/ # Validate all local versions -cargo run -p vortex-compat --release --bin compat-test -- \ +cargo run -p vortex-compat --release --bin validate -- \ --fixtures-dir /tmp/compat-root/ ``` If the bucket requires authenticated access, set your AWS profile: ```bash -AWS_PROFILE=vortex-ci cargo run -p vortex-compat --release --bin compat-test -- \ +AWS_PROFILE=vortex-ci cargo run -p vortex-compat --release --bin validate -- \ --fixtures-url https://vortex-compat-fixtures.s3.amazonaws.com ``` @@ -315,7 +315,7 @@ Never modify an existing fixture's `build()` output (see [Fixture Contract](#fix } ``` 2. Register it in `all_fixtures()` in `src/fixtures/mod.rs`. -3. Run `compat-gen` locally to verify it produces a valid file. +3. Run `gen` locally to verify it produces a valid file. 4. Upload fixtures for the current version — the new file will appear in that version's `manifest.json`. Older versions are unaffected. @@ -342,4 +342,4 @@ To generate fixtures for a version in Epoch A or B: 2. Cherry-pick the compat-gen crate: `git cherry-pick --no-commit ` 3. Swap `src/adapter.rs` to the appropriate epoch's implementation 4. Resolve any dependency mismatches in `Cargo.toml` -5. Run `compat-gen` and upload the resulting fixtures +5. Run `gen` and upload the resulting fixtures diff --git a/vortex-test/compat-gen/src/adapter.rs b/vortex-test/compat-gen/src/adapter.rs index 857cb6107e3..d3a92f2bfa2 100644 --- a/vortex-test/compat-gen/src/adapter.rs +++ b/vortex-test/compat-gen/src/adapter.rs @@ -26,7 +26,6 @@ fn runtime() -> VortexResult { } /// Write a sequence of array chunks as a `.vortex` file. -#[allow(dead_code)] pub fn write_file(path: &Path, chunks: Vec) -> VortexResult<()> { let dtype = chunks[0].dtype().clone(); let stream = ArrayStreamAdapter::new(dtype, stream::iter(chunks.into_iter().map(Ok))); @@ -42,7 +41,6 @@ pub fn write_file(path: &Path, chunks: Vec) -> VortexResult<()> { } /// Read a `.vortex` file from bytes, returning the arrays. -#[allow(dead_code)] pub fn read_file(bytes: ByteBuffer) -> VortexResult> { runtime()?.block_on(async { let session = VortexSession::default().with_tokio(); diff --git a/vortex-test/compat-gen/src/lib.rs b/vortex-test/compat-gen/src/lib.rs new file mode 100644 index 00000000000..6b758691198 --- /dev/null +++ b/vortex-test/compat-gen/src/lib.rs @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +pub mod adapter; +pub mod fixtures; +pub mod manifest; +pub mod validate; diff --git a/vortex-test/compat-gen/src/main.rs b/vortex-test/compat-gen/src/main.rs index 58648369c51..1f743e37032 100644 --- a/vortex-test/compat-gen/src/main.rs +++ b/vortex-test/compat-gen/src/main.rs @@ -1,23 +1,16 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -mod adapter; -mod fixtures; -mod manifest; - use std::path::PathBuf; use chrono::Utc; use clap::Parser; - -use crate::fixtures::all_fixtures; -use crate::manifest::Manifest; +use vortex_compat::fixtures::all_fixtures; +use vortex_compat::manifest::Manifest; +use vortex_error::VortexResult; #[derive(Parser)] -#[command( - name = "compat-gen", - about = "Generate Vortex backward-compat fixture files" -)] +#[command(name = "gen", about = "Generate Vortex backward-compat fixture files")] struct Cli { /// Version tag for this fixture set (e.g. "0.62.0"). #[arg(long)] @@ -28,7 +21,7 @@ struct Cli { output: PathBuf, } -fn main() -> vortex_error::VortexResult<()> { +fn main() -> VortexResult<()> { let cli = Cli::parse(); std::fs::create_dir_all(&cli.output) @@ -40,7 +33,7 @@ fn main() -> vortex_error::VortexResult<()> { for fixture in &fixtures { let chunks = fixture.build()?; let path = cli.output.join(fixture.name()); - adapter::write_file(&path, chunks)?; + vortex_compat::adapter::write_file(&path, chunks)?; fixture_names.push(fixture.name().to_string()); eprintln!(" wrote {}", fixture.name()); } diff --git a/vortex-test/compat-gen/src/test_main.rs b/vortex-test/compat-gen/src/validate_main.rs similarity index 88% rename from vortex-test/compat-gen/src/test_main.rs rename to vortex-test/compat-gen/src/validate_main.rs index e43669d8aad..245004a20fd 100644 --- a/vortex-test/compat-gen/src/test_main.rs +++ b/vortex-test/compat-gen/src/validate_main.rs @@ -1,24 +1,16 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -mod adapter; -mod fixtures; -mod manifest; -mod validate; - use std::path::PathBuf; use clap::Parser; +use vortex_compat::validate::FixtureSource; +use vortex_compat::validate::discover_versions; +use vortex_compat::validate::validate_all; use vortex_error::VortexResult; -use crate::validate::FixtureSource; -use crate::validate::discover_versions; - #[derive(Parser)] -#[command( - name = "compat-test", - about = "Validate Vortex backward-compat fixtures" -)] +#[command(name = "validate", about = "Validate Vortex backward-compat fixtures")] struct Cli { /// HTTPS base URL for the fixture bucket. /// e.g. @@ -61,7 +53,7 @@ fn main() -> VortexResult<()> { versions.join(", ") ); - let results = validate::validate_all(&source, &versions)?; + let results = validate_all(&source, &versions)?; let mut total_passed = 0; let mut total_failed = 0; From 536c0bcb9504a92b1a0906d179377c7ed6df2227 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Thu, 12 Mar 2026 14:58:40 +0000 Subject: [PATCH 11/13] wip back compat tester Signed-off-by: Joe Isaacs --- .github/workflows/compat-gen-upload.yml | 2 +- .github/workflows/compat-test-weekly.yml | 2 +- vortex-test/compat-gen/Cargo.toml | 4 +- vortex-test/compat-gen/README.md | 42 ++++++++++--------- .../compat-gen/src/fixtures/encodings.rs | 33 --------------- vortex-test/compat-gen/src/fixtures/mod.rs | 9 ---- vortex-test/compat-gen/src/main.rs | 5 ++- vortex-test/compat-gen/src/validate_main.rs | 10 +++-- 8 files changed, 37 insertions(+), 70 deletions(-) delete mode 100644 vortex-test/compat-gen/src/fixtures/encodings.rs diff --git a/.github/workflows/compat-gen-upload.yml b/.github/workflows/compat-gen-upload.yml index 055e0ccf3ff..1b0f8699c2c 100644 --- a/.github/workflows/compat-gen-upload.yml +++ b/.github/workflows/compat-gen-upload.yml @@ -25,7 +25,7 @@ jobs: - name: Generate fixtures run: | - cargo run -p vortex-compat --release --bin gen -- \ + cargo run -p vortex-compat --release --bin compat-gen -- \ --version "${{ inputs.version }}" --output /tmp/fixtures/ - name: Configure AWS credentials diff --git a/.github/workflows/compat-test-weekly.yml b/.github/workflows/compat-test-weekly.yml index 7766bffe7a4..1eaa22c15b8 100644 --- a/.github/workflows/compat-test-weekly.yml +++ b/.github/workflows/compat-test-weekly.yml @@ -20,5 +20,5 @@ jobs: - name: Run compat tests run: | - cargo run -p vortex-compat --release --bin validate -- \ + cargo run -p vortex-compat --release --bin compat-validate -- \ --fixtures-url "$FIXTURES_URL" diff --git a/vortex-test/compat-gen/Cargo.toml b/vortex-test/compat-gen/Cargo.toml index c1e11d4fbb2..66595ab6b4c 100644 --- a/vortex-test/compat-gen/Cargo.toml +++ b/vortex-test/compat-gen/Cargo.toml @@ -16,11 +16,11 @@ version = { workspace = true } workspace = true [[bin]] -name = "gen" +name = "compat-gen" path = "src/main.rs" [[bin]] -name = "validate" +name = "compat-validate" path = "src/validate_main.rs" [dependencies] diff --git a/vortex-test/compat-gen/README.md b/vortex-test/compat-gen/README.md index 5fbb624a314..e11a1675cde 100644 --- a/vortex-test/compat-gen/README.md +++ b/vortex-test/compat-gen/README.md @@ -3,8 +3,8 @@ This crate provides two binaries that together ensure Vortex can always read files written by older versions: -- **`gen`** — generates deterministic fixture files for a given Vortex version. -- **`validate`** — reads fixtures from every historical version and validates +- **`compat-gen`** — generates deterministic fixture files for a given Vortex version. +- **`compat-validate`** — reads fixtures from every historical version and validates they round-trip to the expected arrays. Fixtures are stored in an S3 bucket. CI uploads new fixtures on every release tag @@ -18,7 +18,7 @@ method. The following rules apply: - **Immutable data.** Once a fixture's `build()` is defined, its output (columns, values, nulls, ordering) must never change. Every version that includes that - fixture must produce byte-for-byte identical logical arrays. `validate` + fixture must produce byte-for-byte identical logical arrays. `compat-validate` validates this by rebuilding expected arrays from `build()` and comparing them against what was read from the stored file. @@ -27,12 +27,19 @@ method. The following rules apply: existing fixture to cover new ground. - **Older versions have fewer fixtures.** Each version's `manifest.json` lists - which fixtures were generated for that version. `validate` only validates + which fixtures were generated for that version. `compat-validate` only validates the fixtures listed in the manifest — it skips any fixture that didn't exist at that version. - **`versions.json`** is the top-level index listing every version that has - uploaded fixtures. `validate` iterates over all listed versions. + uploaded fixtures. `compat-validate` iterates over all listed versions. + +- **Watch for dependency drift.** `compat-validate` compares stored files against + `build()` output from the *current* code. If a dependency (e.g. `tpchgen`) + silently changes its output across versions, old fixtures will fail validation + even though the Vortex reader is fine. If you see unexpected failures across + all old versions for a specific fixture, check whether its `build()` deps + changed before blaming the reader. ## First-Time Setup: Bootstrap the Bucket @@ -41,7 +48,7 @@ with the first fixture set: ```bash # 1. Generate fixtures for the current version -cargo run -p vortex-compat --release --bin gen -- \ +cargo run -p vortex-compat --release --bin compat-gen -- \ --version 0.62.0 --output /tmp/fixtures/ # 2. Upload to S3 @@ -54,7 +61,7 @@ AWS_PROFILE=vortex-ci aws s3 cp /tmp/versions.json \ s3://vortex-compat-fixtures/versions.json # 4. Verify the round-trip -AWS_PROFILE=vortex-ci cargo run -p vortex-compat --release --bin validate -- \ +AWS_PROFILE=vortex-ci cargo run -p vortex-compat --release --bin compat-validate -- \ --fixtures-url https://vortex-compat-fixtures.s3.amazonaws.com ``` @@ -67,7 +74,7 @@ When a new Vortex version is tagged and you want to upload its fixtures manually VERSION=0.63.0 # 1. Generate fixtures -cargo run -p vortex-compat --release --bin gen -- \ +cargo run -p vortex-compat --release --bin compat-gen -- \ --version "$VERSION" --output /tmp/fixtures/ # 2. Upload to S3 under the new version prefix @@ -92,7 +99,7 @@ AWS_PROFILE=vortex-ci aws s3 cp /tmp/versions.json \ s3://vortex-compat-fixtures/versions.json # 4. Verify all versions (including the new one) -AWS_PROFILE=vortex-ci cargo run -p vortex-compat --release --bin validate -- \ +AWS_PROFILE=vortex-ci cargo run -p vortex-compat --release --bin compat-validate -- \ --fixtures-url https://vortex-compat-fixtures.s3.amazonaws.com ``` @@ -105,7 +112,7 @@ already exists in the bucket, the upload overwrites the existing prefix: VERSION=0.62.0 # 1. Regenerate -cargo run -p vortex-compat --release --bin gen -- \ +cargo run -p vortex-compat --release --bin compat-gen -- \ --version "$VERSION" --output /tmp/fixtures/ # 2. Overwrite in S3 @@ -113,7 +120,7 @@ AWS_PROFILE=vortex-ci aws s3 cp /tmp/fixtures/ \ "s3://vortex-compat-fixtures/v${VERSION}/" --recursive # 3. Verify -AWS_PROFILE=vortex-ci cargo run -p vortex-compat --release --bin validate -- \ +AWS_PROFILE=vortex-ci cargo run -p vortex-compat --release --bin compat-validate -- \ --fixtures-url https://vortex-compat-fixtures.s3.amazonaws.com ``` @@ -125,18 +132,18 @@ You can skip S3 entirely and work against local directories: ```bash # Generate into a versioned subdirectory -cargo run -p vortex-compat --release --bin gen -- \ +cargo run -p vortex-compat --release --bin compat-gen -- \ --version 0.62.0 --output /tmp/compat-root/v0.62.0/ # Validate all local versions -cargo run -p vortex-compat --release --bin validate -- \ +cargo run -p vortex-compat --release --bin compat-validate -- \ --fixtures-dir /tmp/compat-root/ ``` If the bucket requires authenticated access, set your AWS profile: ```bash -AWS_PROFILE=vortex-ci cargo run -p vortex-compat --release --bin validate -- \ +AWS_PROFILE=vortex-ci cargo run -p vortex-compat --release --bin compat-validate -- \ --fixtures-url https://vortex-compat-fixtures.s3.amazonaws.com ``` @@ -298,9 +305,6 @@ Runs **every Monday at 06:00 UTC** and on **manual dispatch**. | TPC-H Orders | `tpch_orders.vortex` | TPC-H orders table at scale factor 0.01 | | ClickBench Hits | `clickbench_hits_1k.vortex` | First 1000 rows of the ClickBench hits table | -Encoding-specific fixtures (Dict, RunEnd, Constant, Sparse, ALP, BitPacked, FSST) are -stubbed and will be enabled once the stable-encodings RFC lands. - ### Adding a new fixture New encodings, data types, or structural patterns always get a **new fixture file**. @@ -315,7 +319,7 @@ Never modify an existing fixture's `build()` output (see [Fixture Contract](#fix } ``` 2. Register it in `all_fixtures()` in `src/fixtures/mod.rs`. -3. Run `gen` locally to verify it produces a valid file. +3. Run `compat-gen` locally to verify it produces a valid file. 4. Upload fixtures for the current version — the new file will appear in that version's `manifest.json`. Older versions are unaffected. @@ -342,4 +346,4 @@ To generate fixtures for a version in Epoch A or B: 2. Cherry-pick the compat-gen crate: `git cherry-pick --no-commit ` 3. Swap `src/adapter.rs` to the appropriate epoch's implementation 4. Resolve any dependency mismatches in `Cargo.toml` -5. Run `gen` and upload the resulting fixtures +5. Run `compat-gen` and upload the resulting fixtures diff --git a/vortex-test/compat-gen/src/fixtures/encodings.rs b/vortex-test/compat-gen/src/fixtures/encodings.rs deleted file mode 100644 index aac1a19d7d7..00000000000 --- a/vortex-test/compat-gen/src/fixtures/encodings.rs +++ /dev/null @@ -1,33 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -#![allow(dead_code)] - -use vortex_array::ArrayRef; -use vortex_error::VortexResult; - -use super::Fixture; - -macro_rules! encoding_stub { - ($name:ident, $file:expr) => { - pub struct $name; - - impl Fixture for $name { - fn name(&self) -> &str { - $file - } - - fn build(&self) -> VortexResult> { - todo!(concat!("blocked on stable-encodings RFC — ", $file)) - } - } - }; -} - -encoding_stub!(DictEncodingFixture, "enc_dict.vortex"); -encoding_stub!(RunEndEncodingFixture, "enc_runend.vortex"); -encoding_stub!(ConstantEncodingFixture, "enc_constant.vortex"); -encoding_stub!(SparseEncodingFixture, "enc_sparse.vortex"); -encoding_stub!(AlpEncodingFixture, "enc_alp.vortex"); -encoding_stub!(BitPackedEncodingFixture, "enc_bitpacked.vortex"); -encoding_stub!(FsstEncodingFixture, "enc_fsst.vortex"); diff --git a/vortex-test/compat-gen/src/fixtures/mod.rs b/vortex-test/compat-gen/src/fixtures/mod.rs index 307ee334890..604ca52e6c8 100644 --- a/vortex-test/compat-gen/src/fixtures/mod.rs +++ b/vortex-test/compat-gen/src/fixtures/mod.rs @@ -2,7 +2,6 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors mod clickbench; -pub mod encodings; mod synthetic; mod tpch; @@ -33,13 +32,5 @@ pub fn all_fixtures() -> Vec> { Box::new(tpch::TpchLineitemFixture), Box::new(tpch::TpchOrdersFixture), Box::new(clickbench::ClickBenchHits1kFixture), - // Encoding stubs — uncomment as stable-encodings RFC lands: - // Box::new(encodings::DictEncodingFixture), - // Box::new(encodings::RunEndEncodingFixture), - // Box::new(encodings::ConstantEncodingFixture), - // Box::new(encodings::SparseEncodingFixture), - // Box::new(encodings::AlpEncodingFixture), - // Box::new(encodings::BitPackedEncodingFixture), - // Box::new(encodings::FsstEncodingFixture), ] } diff --git a/vortex-test/compat-gen/src/main.rs b/vortex-test/compat-gen/src/main.rs index 1f743e37032..5c1fcf45239 100644 --- a/vortex-test/compat-gen/src/main.rs +++ b/vortex-test/compat-gen/src/main.rs @@ -10,7 +10,10 @@ use vortex_compat::manifest::Manifest; use vortex_error::VortexResult; #[derive(Parser)] -#[command(name = "gen", about = "Generate Vortex backward-compat fixture files")] +#[command( + name = "compat-gen", + about = "Generate Vortex backward-compat fixture files" +)] struct Cli { /// Version tag for this fixture set (e.g. "0.62.0"). #[arg(long)] diff --git a/vortex-test/compat-gen/src/validate_main.rs b/vortex-test/compat-gen/src/validate_main.rs index 245004a20fd..dbaf35ba822 100644 --- a/vortex-test/compat-gen/src/validate_main.rs +++ b/vortex-test/compat-gen/src/validate_main.rs @@ -10,7 +10,10 @@ use vortex_compat::validate::validate_all; use vortex_error::VortexResult; #[derive(Parser)] -#[command(name = "validate", about = "Validate Vortex backward-compat fixtures")] +#[command( + name = "compat-validate", + about = "Validate Vortex backward-compat fixtures" +)] struct Cli { /// HTTPS base URL for the fixture bucket. /// e.g. @@ -34,8 +37,7 @@ fn main() -> VortexResult<()> { (Some(url), None) => FixtureSource::Url(url.clone()), (None, Some(dir)) => FixtureSource::Dir(dir.clone()), _ => { - eprintln!("error: specify exactly one of --fixtures-url or --fixtures-dir"); - std::process::exit(1); + vortex_error::vortex_bail!("specify exactly one of --fixtures-url or --fixtures-dir"); } }; @@ -85,7 +87,7 @@ fn main() -> VortexResult<()> { eprintln!("\nresult: {total_passed} passed, {total_failed} failed, {total_skipped} skipped"); if total_failed > 0 { - std::process::exit(1); + vortex_error::vortex_bail!("{total_failed} fixture(s) failed validation"); } Ok(()) From 92958a86b8f9d2db6ec71a44f204193fe5f33b33 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Thu, 12 Mar 2026 16:01:03 +0000 Subject: [PATCH 12/13] wip back compat tester Signed-off-by: Joe Isaacs --- .github/workflows/compat-gen-upload.yml | 28 +- vortex-test/compat-gen/README.md | 182 +++------- .../scripts/update-versions-json.py | 34 -- vortex-test/compat-gen/scripts/upload.py | 321 ++++++++++++++++++ vortex-test/compat-gen/src/main.rs | 11 +- vortex-test/compat-gen/src/manifest.rs | 11 +- vortex-test/compat-gen/src/validate.rs | 17 +- 7 files changed, 403 insertions(+), 201 deletions(-) delete mode 100755 vortex-test/compat-gen/scripts/update-versions-json.py create mode 100755 vortex-test/compat-gen/scripts/upload.py diff --git a/.github/workflows/compat-gen-upload.yml b/.github/workflows/compat-gen-upload.yml index 1b0f8699c2c..de7cc486188 100644 --- a/.github/workflows/compat-gen-upload.yml +++ b/.github/workflows/compat-gen-upload.yml @@ -7,9 +7,6 @@ on: description: "Version to generate fixtures for (e.g. 0.62.0)" required: true -env: - S3_BUCKET: vortex-compat-fixtures - jobs: upload-fixtures: runs-on: ubuntu-latest @@ -23,26 +20,13 @@ jobs: - uses: Swatinem/rust-cache@v2 - - name: Generate fixtures - run: | - cargo run -p vortex-compat --release --bin compat-gen -- \ - --version "${{ inputs.version }}" --output /tmp/fixtures/ - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v4 + uses: aws-actions/configure-aws-credentials@v5 with: - role-to-assume: ${{ secrets.COMPAT_FIXTURES_ROLE_ARN }} + role-to-assume: arn:aws:iam::245040174862:role/GitHubBenchmarkRole aws-region: us-east-1 - - name: Upload to S3 - run: | - aws s3 cp /tmp/fixtures/ \ - "s3://${S3_BUCKET}/v${{ inputs.version }}/" --recursive - - - name: Update versions.json - run: | - aws s3 cp "s3://${S3_BUCKET}/versions.json" /tmp/versions.json 2>/dev/null \ - || echo '[]' > /tmp/versions.json - python3 vortex-test/compat-gen/scripts/update-versions-json.py \ - /tmp/versions.json "${{ inputs.version }}" - aws s3 cp /tmp/versions.json "s3://${S3_BUCKET}/versions.json" + - name: Generate and upload fixtures + run: > + python3 vortex-test/compat-gen/scripts/upload.py + --version "${{ inputs.version }}" diff --git a/vortex-test/compat-gen/README.md b/vortex-test/compat-gen/README.md index e11a1675cde..6473a866ee3 100644 --- a/vortex-test/compat-gen/README.md +++ b/vortex-test/compat-gen/README.md @@ -26,10 +26,11 @@ method. The following rules apply: structural pattern, add a new fixture with a new filename. Never modify an existing fixture to cover new ground. -- **Older versions have fewer fixtures.** Each version's `manifest.json` lists - which fixtures were generated for that version. `compat-validate` only validates - the fixtures listed in the manifest — it skips any fixture that didn't exist - at that version. +- **Additive-only fixture list.** The fixture list only ever grows; fixtures are + never removed. The upload script (`scripts/upload.py`) enforces this by checking + that every fixture in the previous version's manifest still exists in the + generated output. Each fixture's `since` field in the manifest records the first + version that introduced it. - **`versions.json`** is the top-level index listing every version that has uploaded fixtures. `compat-validate` iterates over all listed versions. @@ -47,84 +48,43 @@ After creating the S3 bucket (see [AWS Setup](#aws-setup-one-time) below), seed with the first fixture set: ```bash -# 1. Generate fixtures for the current version -cargo run -p vortex-compat --release --bin compat-gen -- \ - --version 0.62.0 --output /tmp/fixtures/ - -# 2. Upload to S3 -AWS_PROFILE=vortex-ci aws s3 cp /tmp/fixtures/ \ - s3://vortex-compat-fixtures/v0.62.0/ --recursive +# Generate + upload (first version, no previous manifest to merge) +python3 vortex-test/compat-gen/scripts/upload.py --version 0.62.0 -# 3. Create the initial versions.json -echo '["0.62.0"]' > /tmp/versions.json -AWS_PROFILE=vortex-ci aws s3 cp /tmp/versions.json \ - s3://vortex-compat-fixtures/versions.json - -# 4. Verify the round-trip +# Verify the round-trip AWS_PROFILE=vortex-ci cargo run -p vortex-compat --release --bin compat-validate -- \ --fixtures-url https://vortex-compat-fixtures.s3.amazonaws.com ``` ## Uploading Fixtures for a New Version -When a new Vortex version is tagged and you want to upload its fixtures manually -(CI does this automatically on tag push): +Use the upload script, which handles building, manifest merging, and S3 upload: ```bash -VERSION=0.63.0 +# Full upload +python3 vortex-test/compat-gen/scripts/upload.py --version 0.63.0 -# 1. Generate fixtures -cargo run -p vortex-compat --release --bin compat-gen -- \ - --version "$VERSION" --output /tmp/fixtures/ - -# 2. Upload to S3 under the new version prefix -AWS_PROFILE=vortex-ci aws s3 cp /tmp/fixtures/ \ - "s3://vortex-compat-fixtures/v${VERSION}/" --recursive - -# 3. Append the version to versions.json -AWS_PROFILE=vortex-ci aws s3 cp \ - s3://vortex-compat-fixtures/versions.json /tmp/versions.json -python3 -c " -import json, sys -with open('/tmp/versions.json') as f: - versions = json.load(f) -v = sys.argv[1] -if v not in versions: - versions.append(v) - versions.sort(key=lambda x: list(map(int, x.split('.')))) -with open('/tmp/versions.json', 'w') as f: - json.dump(versions, f, indent=2) -" "$VERSION" -AWS_PROFILE=vortex-ci aws s3 cp /tmp/versions.json \ - s3://vortex-compat-fixtures/versions.json - -# 4. Verify all versions (including the new one) -AWS_PROFILE=vortex-ci cargo run -p vortex-compat --release --bin compat-validate -- \ +# Dry run (generate + merge manifest, skip S3) +python3 vortex-test/compat-gen/scripts/upload.py --version 0.63.0 --dry-run + +# Skip the cargo build (if you already have fixtures generated) +python3 vortex-test/compat-gen/scripts/upload.py \ + --version 0.63.0 --output /tmp/fixtures/ --skip-build + +# Verify all versions +cargo run -p vortex-compat --release --bin compat-validate -- \ --fixtures-url https://vortex-compat-fixtures.s3.amazonaws.com ``` ## Re-uploading Fixtures for an Existing Version -If a fixture was added or changed and you need to regenerate for a version that -already exists in the bucket, the upload overwrites the existing prefix: +The upload script will overwrite the existing prefix in S3: ```bash -VERSION=0.62.0 - -# 1. Regenerate -cargo run -p vortex-compat --release --bin compat-gen -- \ - --version "$VERSION" --output /tmp/fixtures/ - -# 2. Overwrite in S3 -AWS_PROFILE=vortex-ci aws s3 cp /tmp/fixtures/ \ - "s3://vortex-compat-fixtures/v${VERSION}/" --recursive - -# 3. Verify -AWS_PROFILE=vortex-ci cargo run -p vortex-compat --release --bin compat-validate -- \ - --fixtures-url https://vortex-compat-fixtures.s3.amazonaws.com +python3 vortex-test/compat-gen/scripts/upload.py --version 0.62.0 ``` -No need to update `versions.json` — the version is already listed. +No need to update `versions.json` — the script handles it idempotently. ## Local-Only Workflow @@ -192,57 +152,15 @@ aws s3api put-bucket-policy \ }' ``` -### 3. Create an IAM OIDC provider for GitHub Actions +### 3. Grant the benchmark role access to the compat bucket -Skip this step if the account already has a GitHub OIDC provider configured. - -```bash -aws iam create-open-id-connect-provider \ - --url https://token.actions.githubusercontent.com \ - --client-id-list sts.amazonaws.com \ - --thumbprint-list 6938fd4d98bab03faadb97b34396831e3780aea1 -``` - -### 4. Create the IAM role for CI - -Create the trust policy file (`trust-policy.json`): - -```json -{ - "Version": "2012-10-17", - "Statement": [ - { - "Effect": "Allow", - "Principal": { - "Federated": "arn:aws:iam::245040174862:oidc-provider/token.actions.githubusercontent.com" - }, - "Action": "sts:AssumeRoleWithWebIdentity", - "Condition": { - "StringEquals": { - "token.actions.githubusercontent.com:aud": "sts.amazonaws.com" - }, - "StringLike": { - "token.actions.githubusercontent.com:sub": "repo:spiraldb/vortex:ref:refs/tags/*" - } - } - } - ] -} -``` - -Create the role: - -```bash -aws iam create-role \ - --role-name GitHubCompatFixturesRole \ - --assume-role-policy-document file://trust-policy.json -``` - -Attach an inline permission policy: +The CI workflow reuses the existing `GitHubBenchmarkRole` +(`arn:aws:iam::245040174862:role/GitHubBenchmarkRole`). +Add an inline policy granting it S3 access to the compat fixtures bucket: ```bash aws iam put-role-policy \ - --role-name GitHubCompatFixturesRole \ + --role-name GitHubBenchmarkRole \ --policy-name CompatFixturesS3Access \ --policy-document '{ "Version": "2012-10-17", @@ -263,13 +181,6 @@ aws iam put-role-policy \ }' ``` -### 5. Store the role ARN as a GitHub secret - -```bash -gh secret set COMPAT_FIXTURES_ROLE_ARN \ - --body "arn:aws:iam::245040174862:role/GitHubCompatFixturesRole" -``` - ## CI Workflows ### Fixture upload (`.github/workflows/compat-gen-upload.yml`) @@ -278,10 +189,12 @@ Triggered via **manual dispatch** with a required `version` input (e.g. `0.62.0` Will be updated to also trigger on release tag pushes once the workflow is proven. 1. Checks out the current branch -2. Runs `compat-gen --version --output /tmp/fixtures/` -3. Assumes the `GitHubCompatFixturesRole` via OIDC -4. Uploads fixtures to `s3://vortex-compat-fixtures/v/` -5. Appends the version to `versions.json` +2. Runs `scripts/upload.py --version ` which: + - Builds and runs `compat-gen` to generate fixtures + - Fetches the previous version's manifest and merges `since` values + - Enforces additive-only (no fixtures removed) + - Uploads fixtures to `s3://vortex-compat-fixtures/v/` + - Updates `versions.json` with ETag-based optimistic locking ### Weekly compat test (`.github/workflows/compat-test-weekly.yml`) @@ -293,17 +206,17 @@ Runs **every Monday at 06:00 UTC** and on **manual dispatch**. ## Fixture Suite -| Fixture | File | Description | -|---------|------|-------------| -| Primitives | `primitives.vortex` | All numeric types (u8–u64, i32, i64, f32, f64) with min/mid/max values | -| Strings | `strings.vortex` | Variable-length strings including empty, ASCII, Unicode, and emoji | -| Booleans | `booleans.vortex` | Boolean array with mixed true/false values | -| Nullable | `nullable.vortex` | Nullable int and string columns with interleaved nulls | -| Nested Struct | `struct_nested.vortex` | Two-level nested struct (inner struct within outer struct) | -| Chunked | `chunked.vortex` | Multi-chunk file: 3 chunks of 1000 rows each | -| TPC-H Lineitem | `tpch_lineitem.vortex` | TPC-H lineitem table at scale factor 0.01 | -| TPC-H Orders | `tpch_orders.vortex` | TPC-H orders table at scale factor 0.01 | -| ClickBench Hits | `clickbench_hits_1k.vortex` | First 1000 rows of the ClickBench hits table | +| Fixture | File | Since | Description | +|---------|------|-------|-------------| +| Primitives | `primitives.vortex` | 0.62.0 | All numeric types (u8–u64, i32, i64, f32, f64) with min/mid/max values | +| Strings | `strings.vortex` | 0.62.0 | Variable-length strings including empty, ASCII, Unicode, and emoji | +| Booleans | `booleans.vortex` | 0.62.0 | Boolean array with mixed true/false values | +| Nullable | `nullable.vortex` | 0.62.0 | Nullable int and string columns with interleaved nulls | +| Nested Struct | `struct_nested.vortex` | 0.62.0 | Two-level nested struct (inner struct within outer struct) | +| Chunked | `chunked.vortex` | 0.62.0 | Multi-chunk file: 3 chunks of 1000 rows each | +| TPC-H Lineitem | `tpch_lineitem.vortex` | 0.62.0 | TPC-H lineitem table at scale factor 0.01 | +| TPC-H Orders | `tpch_orders.vortex` | 0.62.0 | TPC-H orders table at scale factor 0.01 | +| ClickBench Hits | `clickbench_hits_1k.vortex` | 0.62.0 | First 1000 rows of the ClickBench hits table | ### Adding a new fixture @@ -320,8 +233,9 @@ Never modify an existing fixture's `build()` output (see [Fixture Contract](#fix ``` 2. Register it in `all_fixtures()` in `src/fixtures/mod.rs`. 3. Run `compat-gen` locally to verify it produces a valid file. -4. Upload fixtures for the current version — the new file will appear in that - version's `manifest.json`. Older versions are unaffected. +4. Upload fixtures for the current version — the upload script merges the manifest + so the new fixture gets `since` set to the current version while existing + fixtures keep their original `since` values. ## Adapter Epochs diff --git a/vortex-test/compat-gen/scripts/update-versions-json.py b/vortex-test/compat-gen/scripts/update-versions-json.py deleted file mode 100755 index d12dc2cd27c..00000000000 --- a/vortex-test/compat-gen/scripts/update-versions-json.py +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright the Vortex contributors - -"""Append a version to versions.json if not already present, keeping sorted order.""" - -import json -import sys - - -def main(): - if len(sys.argv) != 3: - print(f"Usage: {sys.argv[0]} ", file=sys.stderr) - sys.exit(1) - - path, version = sys.argv[1], sys.argv[2] - - try: - with open(path) as f: - versions = json.load(f) - except FileNotFoundError: - versions = [] - - if version not in versions: - versions.append(version) - versions.sort(key=lambda x: list(map(int, x.split(".")))) - - with open(path, "w") as f: - json.dump(versions, f, indent=2) - f.write("\n") - - -if __name__ == "__main__": - main() diff --git a/vortex-test/compat-gen/scripts/upload.py b/vortex-test/compat-gen/scripts/upload.py new file mode 100755 index 00000000000..7ef452d8ef0 --- /dev/null +++ b/vortex-test/compat-gen/scripts/upload.py @@ -0,0 +1,321 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright the Vortex contributors + +"""Upload Vortex backward-compat fixtures to S3. + +Wraps the full upload lifecycle: + 1. Build + run compat-gen to produce fixture files and a naive manifest + 2. Fetch the previous version's manifest from S3 (via public HTTP) + 3. Merge `since` values: keep old `since` for existing fixtures, current + version for new ones + 4. Enforce additive-only: every fixture in the previous manifest must exist + in the generated output + 5. Upload the output directory to S3 + 6. Update versions.json with ETag-based optimistic locking + +Requires only Python 3 stdlib + `aws` CLI on PATH. +""" + +import argparse +import json +import os +import subprocess +import sys +import tempfile +import time +import urllib.error +import urllib.request + +S3_BUCKET = "vortex-compat-fixtures" +FIXTURES_URL = "https://vortex-compat-fixtures.s3.amazonaws.com" + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def log(msg: str) -> None: + print(msg, file=sys.stderr) + + +def run(cmd: list[str], *, check: bool = True, **kwargs) -> subprocess.CompletedProcess: + log(f" $ {' '.join(cmd)}") + return subprocess.run(cmd, check=check, **kwargs) + + +def http_get(url: str) -> bytes | None: + """Fetch *url* over HTTPS. Returns None on 404, raises on other errors.""" + try: + with urllib.request.urlopen(url) as resp: + return resp.read() + except urllib.error.HTTPError as exc: + if exc.code == 404 or exc.code == 403: + return None + raise + + +def version_sort_key(v: str) -> list[int]: + return list(map(int, v.split("."))) + + +# --------------------------------------------------------------------------- +# S3 helpers (reuse head_etag / put_object pattern from scripts/s3-upload.py) +# --------------------------------------------------------------------------- + + +def head_etag(bucket: str, key: str) -> str | None: + """Fetch the current ETag for an S3 object, or None if missing.""" + result = subprocess.run( + [ + "aws", "s3api", "head-object", + "--bucket", bucket, + "--key", key, + "--query", "ETag", + "--output", "text", + ], + capture_output=True, + text=True, + ) + if result.returncode != 0: + return None + etag = result.stdout.strip() + if not etag or etag == "null": + return None + return etag + + +def put_object(bucket: str, key: str, body: str, if_match: str | None) -> bool: + """Upload a single object with optional ETag precondition.""" + cmd = [ + "aws", "s3api", "put-object", + "--bucket", bucket, + "--key", key, + "--body", body, + ] + if if_match: + cmd.extend(["--if-match", if_match]) + result = subprocess.run(cmd, capture_output=True) + return result.returncode == 0 + + +def upload_versions_json(local_path: str, max_retries: int = 5) -> None: + """Upload versions.json with ETag-based optimistic locking + retry.""" + key = "versions.json" + for attempt in range(1, max_retries + 1): + etag = head_etag(S3_BUCKET, key) + if put_object(S3_BUCKET, key, local_path, etag): + log(" versions.json uploaded.") + return + + if attempt == max_retries: + break + + delay = min(2 ** attempt, 30) + log(f" versions.json upload failed (attempt {attempt}/{max_retries}), " + f"retrying in {delay}s...") + time.sleep(delay) + + log(f"ERROR: versions.json upload failed after {max_retries} attempts") + sys.exit(1) + + +# --------------------------------------------------------------------------- +# Core logic +# --------------------------------------------------------------------------- + + +def fetch_versions() -> list[str]: + """Fetch the current versions.json from S3 (public HTTP).""" + data = http_get(f"{FIXTURES_URL}/versions.json") + if data is None: + return [] + return json.loads(data) + + +def fetch_previous_manifest(versions: list[str], current_version: str) -> dict | None: + """Fetch the manifest.json for the latest version before *current_version*.""" + candidates = [v for v in versions if v != current_version] + if not candidates: + return None + candidates.sort(key=version_sort_key) + latest = candidates[-1] + log(f" previous version: {latest}") + data = http_get(f"{FIXTURES_URL}/v{latest}/manifest.json") + if data is None: + return None + return json.loads(data) + + +def normalize_manifest_fixtures(manifest: dict) -> list[dict]: + """Handle old manifest format where fixtures was a list of strings.""" + entries = manifest.get("fixtures", []) + normalized = [] + for entry in entries: + if isinstance(entry, str): + # Old format: just a filename string — no `since` info + normalized.append({"name": entry, "since": "unknown"}) + else: + normalized.append(entry) + return normalized + + +def merge_manifest( + generated_manifest_path: str, + previous_manifest: dict | None, + current_version: str, +) -> None: + """Merge `since` values from the previous manifest into the generated one. + + Also enforces the additive-only rule: every fixture in the previous manifest + must exist in the generated output. + """ + with open(generated_manifest_path) as f: + generated = json.load(f) + + if previous_manifest is None: + # First upload — nothing to merge. + return + + prev_fixtures = normalize_manifest_fixtures(previous_manifest) + prev_by_name = {e["name"]: e for e in prev_fixtures} + gen_by_name = {e["name"]: e for e in generated["fixtures"]} + + # Additive-only check: every previous fixture must still exist. + missing = sorted(set(prev_by_name) - set(gen_by_name)) + if missing: + log(f"ERROR: fixtures removed since previous version: {missing}") + log("Fixtures must never be removed — only added.") + sys.exit(1) + + # Merge: keep old `since` for existing fixtures, current version for new. + for entry in generated["fixtures"]: + name = entry["name"] + if name in prev_by_name: + entry["since"] = prev_by_name[name]["since"] + else: + entry["since"] = current_version + + with open(generated_manifest_path, "w") as f: + json.dump(generated, f, indent=2) + f.write("\n") + + log(f" merged manifest: {len(prev_by_name)} existing, " + f"{len(gen_by_name) - len(prev_by_name)} new fixtures") + + +def build_fixtures(version: str, output_dir: str) -> None: + """Run cargo to build and execute compat-gen.""" + run([ + "cargo", "run", "-p", "vortex-compat", "--release", "--bin", "compat-gen", + "--", "--version", version, "--output", output_dir, + ]) + + +def upload_fixtures(version: str, output_dir: str) -> None: + """Upload the output directory to S3.""" + run([ + "aws", "s3", "cp", output_dir, + f"s3://{S3_BUCKET}/v{version}/", + "--recursive", + ]) + + +def update_versions(version: str, tmp_dir: str) -> None: + """Append version to versions.json and upload with optimistic locking.""" + versions = fetch_versions() + + if version not in versions: + versions.append(version) + versions.sort(key=version_sort_key) + + local_path = os.path.join(tmp_dir, "versions.json") + with open(local_path, "w") as f: + json.dump(versions, f, indent=2) + f.write("\n") + + upload_versions_json(local_path) + + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Build, generate, and upload Vortex backward-compat fixtures.", + ) + parser.add_argument( + "--version", required=True, + help='Version tag for this fixture set (e.g. "0.62.0").', + ) + parser.add_argument( + "--output", + help="Output directory for generated fixtures (default: temp dir).", + ) + parser.add_argument( + "--skip-build", action="store_true", + help="Skip cargo build + compat-gen run (assumes --output already populated).", + ) + parser.add_argument( + "--dry-run", action="store_true", + help="Generate and merge manifest but skip S3 upload.", + ) + args = parser.parse_args() + + # Resolve output directory. + if args.output: + output_dir = args.output + os.makedirs(output_dir, exist_ok=True) + owns_tmp = False + else: + tmp = tempfile.mkdtemp(prefix="compat-gen-") + output_dir = os.path.join(tmp, "fixtures") + os.makedirs(output_dir) + owns_tmp = True + + try: + # Step 1: Build + generate fixtures. + if not args.skip_build: + log(f"[1/4] Generating fixtures for v{args.version}...") + build_fixtures(args.version, output_dir) + else: + log(f"[1/4] Skipping build (--skip-build), using {output_dir}") + + # Step 2: Fetch previous manifest and merge `since` values. + log("[2/4] Fetching previous manifest...") + versions = fetch_versions() + prev_manifest = fetch_previous_manifest(versions, args.version) + manifest_path = os.path.join(output_dir, "manifest.json") + merge_manifest(manifest_path, prev_manifest, args.version) + + if args.dry_run: + log("[3/4] Dry run — skipping S3 upload.") + log("[4/4] Dry run — skipping versions.json update.") + log(f"\nGenerated fixtures in: {output_dir}") + with open(manifest_path) as f: + log(f"Manifest:\n{f.read()}") + return + + # Step 3: Upload fixtures to S3. + log(f"[3/4] Uploading fixtures to s3://{S3_BUCKET}/v{args.version}/...") + upload_fixtures(args.version, output_dir) + + # Step 4: Update versions.json. + log("[4/4] Updating versions.json...") + # Use the parent of output_dir for the temp versions.json file. + tmp_dir = os.path.dirname(output_dir) if owns_tmp else tempfile.mkdtemp() + update_versions(args.version, tmp_dir) + + log(f"\nDone: fixtures for v{args.version} uploaded.") + finally: + # Clean up temp dir if we created one. + if owns_tmp and not args.dry_run: + import shutil + shutil.rmtree(os.path.dirname(output_dir), ignore_errors=True) + + +if __name__ == "__main__": + main() diff --git a/vortex-test/compat-gen/src/main.rs b/vortex-test/compat-gen/src/main.rs index 5c1fcf45239..7f6bdb2e4ac 100644 --- a/vortex-test/compat-gen/src/main.rs +++ b/vortex-test/compat-gen/src/main.rs @@ -6,6 +6,7 @@ use std::path::PathBuf; use chrono::Utc; use clap::Parser; use vortex_compat::fixtures::all_fixtures; +use vortex_compat::manifest::FixtureEntry; use vortex_compat::manifest::Manifest; use vortex_error::VortexResult; @@ -31,20 +32,24 @@ fn main() -> VortexResult<()> { .map_err(|e| vortex_error::vortex_err!("failed to create output dir: {e}"))?; let fixtures = all_fixtures(); - let mut fixture_names = Vec::with_capacity(fixtures.len()); + let mut entries = Vec::with_capacity(fixtures.len()); for fixture in &fixtures { let chunks = fixture.build()?; let path = cli.output.join(fixture.name()); vortex_compat::adapter::write_file(&path, chunks)?; - fixture_names.push(fixture.name().to_string()); + + entries.push(FixtureEntry { + name: fixture.name().to_string(), + since: cli.version.clone(), + }); eprintln!(" wrote {}", fixture.name()); } let manifest = Manifest { version: cli.version.clone(), generated_at: Utc::now(), - fixtures: fixture_names, + fixtures: entries, }; let manifest_path = cli.output.join("manifest.json"); let manifest_json = serde_json::to_string_pretty(&manifest) diff --git a/vortex-test/compat-gen/src/manifest.rs b/vortex-test/compat-gen/src/manifest.rs index 51a8aa6dd33..2c72ebce70d 100644 --- a/vortex-test/compat-gen/src/manifest.rs +++ b/vortex-test/compat-gen/src/manifest.rs @@ -11,5 +11,14 @@ use serde::Serialize; pub struct Manifest { pub version: String, pub generated_at: DateTime, - pub fixtures: Vec, + pub fixtures: Vec, +} + +/// One entry in the manifest's fixture list. +#[derive(Debug, Serialize, Deserialize)] +pub struct FixtureEntry { + /// Filename, e.g. "primitives.vortex". + pub name: String, + /// First version that introduced this fixture, e.g. "0.62.0". + pub since: String, } diff --git a/vortex-test/compat-gen/src/validate.rs b/vortex-test/compat-gen/src/validate.rs index 8f1cdbcbc6b..cc79523042a 100644 --- a/vortex-test/compat-gen/src/validate.rs +++ b/vortex-test/compat-gen/src/validate.rs @@ -52,20 +52,23 @@ fn validate_version( let mut skipped = 0; let mut failed = Vec::new(); - for fixture_name in &manifest.fixtures { - let Some(fixture) = fixture_map.get(fixture_name.as_str()) else { - eprintln!(" warn: unknown fixture {fixture_name} in v{version}, skipping"); + for entry in &manifest.fixtures { + let Some(fixture) = fixture_map.get(entry.name.as_str()) else { + eprintln!( + " warn: unknown fixture {} in v{version}, skipping", + entry.name + ); skipped += 1; continue; }; - eprintln!(" checking {fixture_name} from v{version}..."); - let bytes = source.fetch_fixture(version, fixture_name)?; + eprintln!(" checking {} from v{version}...", entry.name); + let bytes = source.fetch_fixture(version, &entry.name)?; match validate_one(bytes, *fixture) { Ok(()) => passed += 1, Err(e) => { - eprintln!(" FAIL: {fixture_name} from v{version}: {e}"); - failed.push((fixture_name.clone(), e.to_string())); + eprintln!(" FAIL: {} from v{version}: {e}", entry.name); + failed.push((entry.name.clone(), e.to_string())); } } } From e82e9cf852d263e4f20d8c6450e873c82402e41d Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Thu, 12 Mar 2026 16:13:12 +0000 Subject: [PATCH 13/13] wip back compat tester Signed-off-by: Joe Isaacs --- vortex-test/compat-gen/scripts/upload.py | 82 ++++++++++++++++-------- 1 file changed, 56 insertions(+), 26 deletions(-) diff --git a/vortex-test/compat-gen/scripts/upload.py b/vortex-test/compat-gen/scripts/upload.py index 7ef452d8ef0..63b496158ee 100755 --- a/vortex-test/compat-gen/scripts/upload.py +++ b/vortex-test/compat-gen/scripts/upload.py @@ -69,11 +69,17 @@ def head_etag(bucket: str, key: str) -> str | None: """Fetch the current ETag for an S3 object, or None if missing.""" result = subprocess.run( [ - "aws", "s3api", "head-object", - "--bucket", bucket, - "--key", key, - "--query", "ETag", - "--output", "text", + "aws", + "s3api", + "head-object", + "--bucket", + bucket, + "--key", + key, + "--query", + "ETag", + "--output", + "text", ], capture_output=True, text=True, @@ -89,10 +95,15 @@ def head_etag(bucket: str, key: str) -> str | None: def put_object(bucket: str, key: str, body: str, if_match: str | None) -> bool: """Upload a single object with optional ETag precondition.""" cmd = [ - "aws", "s3api", "put-object", - "--bucket", bucket, - "--key", key, - "--body", body, + "aws", + "s3api", + "put-object", + "--bucket", + bucket, + "--key", + key, + "--body", + body, ] if if_match: cmd.extend(["--if-match", if_match]) @@ -112,9 +123,8 @@ def upload_versions_json(local_path: str, max_retries: int = 5) -> None: if attempt == max_retries: break - delay = min(2 ** attempt, 30) - log(f" versions.json upload failed (attempt {attempt}/{max_retries}), " - f"retrying in {delay}s...") + delay = min(2**attempt, 30) + log(f" versions.json upload failed (attempt {attempt}/{max_retries}), retrying in {delay}s...") time.sleep(delay) log(f"ERROR: versions.json upload failed after {max_retries} attempts") @@ -201,25 +211,41 @@ def merge_manifest( json.dump(generated, f, indent=2) f.write("\n") - log(f" merged manifest: {len(prev_by_name)} existing, " - f"{len(gen_by_name) - len(prev_by_name)} new fixtures") + log(f" merged manifest: {len(prev_by_name)} existing, {len(gen_by_name) - len(prev_by_name)} new fixtures") def build_fixtures(version: str, output_dir: str) -> None: """Run cargo to build and execute compat-gen.""" - run([ - "cargo", "run", "-p", "vortex-compat", "--release", "--bin", "compat-gen", - "--", "--version", version, "--output", output_dir, - ]) + run( + [ + "cargo", + "run", + "-p", + "vortex-compat", + "--release", + "--bin", + "compat-gen", + "--", + "--version", + version, + "--output", + output_dir, + ] + ) def upload_fixtures(version: str, output_dir: str) -> None: """Upload the output directory to S3.""" - run([ - "aws", "s3", "cp", output_dir, - f"s3://{S3_BUCKET}/v{version}/", - "--recursive", - ]) + run( + [ + "aws", + "s3", + "cp", + output_dir, + f"s3://{S3_BUCKET}/v{version}/", + "--recursive", + ] + ) def update_versions(version: str, tmp_dir: str) -> None: @@ -248,7 +274,8 @@ def main() -> None: description="Build, generate, and upload Vortex backward-compat fixtures.", ) parser.add_argument( - "--version", required=True, + "--version", + required=True, help='Version tag for this fixture set (e.g. "0.62.0").', ) parser.add_argument( @@ -256,11 +283,13 @@ def main() -> None: help="Output directory for generated fixtures (default: temp dir).", ) parser.add_argument( - "--skip-build", action="store_true", + "--skip-build", + action="store_true", help="Skip cargo build + compat-gen run (assumes --output already populated).", ) parser.add_argument( - "--dry-run", action="store_true", + "--dry-run", + action="store_true", help="Generate and merge manifest but skip S3 upload.", ) args = parser.parse_args() @@ -314,6 +343,7 @@ def main() -> None: # Clean up temp dir if we created one. if owns_tmp and not args.dry_run: import shutil + shutil.rmtree(os.path.dirname(output_dir), ignore_errors=True)