From f9cf64fab23be315b93a8b2d21ff391ab084b48e Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Wed, 11 Mar 2026 15:26:55 +0000 Subject: [PATCH 1/6] feat: add compat-gen crate with Epoch A adapter (v0.36.0) Standalone crate for generating backward-compat fixture .vortex files. Uses Epoch A adapter targeting the v0.36.0 write API: - VortexWriteOptions::default() (no session) - .write(sink, stream).await returns the sink Fixtures: primitives, strings, booleans, nullable, struct_nested, chunked, tpch_lineitem, tpch_orders. Signed-off-by: Joe Isaacs Co-Authored-By: Claude Opus 4.6 --- vortex-test/compat-gen/Cargo.toml | 30 ++++ vortex-test/compat-gen/PLAN.md | 104 +++++++++++ vortex-test/compat-gen/src/adapter.rs | 31 ++++ vortex-test/compat-gen/src/fixtures/mod.rs | 30 ++++ .../compat-gen/src/fixtures/synthetic.rs | 169 ++++++++++++++++++ vortex-test/compat-gen/src/fixtures/tpch.rs | 45 +++++ vortex-test/compat-gen/src/main.rs | 56 ++++++ vortex-test/compat-gen/src/manifest.rs | 10 ++ 8 files changed, 475 insertions(+) create mode 100644 vortex-test/compat-gen/Cargo.toml create mode 100644 vortex-test/compat-gen/PLAN.md create mode 100644 vortex-test/compat-gen/src/adapter.rs create mode 100644 vortex-test/compat-gen/src/fixtures/mod.rs create mode 100644 vortex-test/compat-gen/src/fixtures/synthetic.rs create mode 100644 vortex-test/compat-gen/src/fixtures/tpch.rs create mode 100644 vortex-test/compat-gen/src/main.rs create mode 100644 vortex-test/compat-gen/src/manifest.rs diff --git a/vortex-test/compat-gen/Cargo.toml b/vortex-test/compat-gen/Cargo.toml new file mode 100644 index 00000000000..5f7f6fe8c1d --- /dev/null +++ b/vortex-test/compat-gen/Cargo.toml @@ -0,0 +1,30 @@ +[package] +name = "vortex-compat" +version = "0.1.0" +edition = "2021" + +[[bin]] +name = "compat-gen" +path = "src/main.rs" + +[dependencies] +# Vortex crates (path deps — resolve to whatever version is checked out) +vortex = { path = "../../vortex", features = ["files", "tokio"] } +vortex-array = { path = "../../vortex-array" } +vortex-buffer = { path = "../../vortex-buffer" } +vortex-error = { path = "../../vortex-error" } + +# TPC-H generation +tpchgen = "2" +tpchgen-arrow = "2" +arrow-array = "57" + +# Async runtime +tokio = { version = "1", features = ["full"] } +futures = "0.3" + +# CLI + serialization +clap = { version = "4", features = ["derive"] } +serde = { version = "1", features = ["derive"] } +serde_json = "1" +chrono = { version = "0.4", features = ["serde"] } diff --git a/vortex-test/compat-gen/PLAN.md b/vortex-test/compat-gen/PLAN.md new file mode 100644 index 00000000000..672c23fdd8c --- /dev/null +++ b/vortex-test/compat-gen/PLAN.md @@ -0,0 +1,104 @@ +# Vortex File Backward Compatibility Testing — Implementation Plan + +RFC: https://github.com/vortex-data/rfcs/pull/23 + +## Overview + +A standalone crate (`vortex-test/compat-gen/`) that generates deterministic `.vortex` fixture files +for backward compatibility testing. Not a workspace member — uses path deps to workspace crates. + +## API Epochs + +The Vortex file write/read API has 3 distinct epochs. The adapter layer (`adapter.rs`) is the only +file that changes when cherry-picking to old release branches. + +| Epoch | Versions | Write API | Read (in-memory) | Session | +|-------|----------------|---------------------------------------------------------|------------------------------------------------------|---------| +| **A** | 0.36.0 | `VortexWriteOptions::default().write(sink, stream) → W` | `VortexOpenOptions::in_memory().open(buf).await?` | None | +| **B** | 0.45.0–0.52.0 | `VortexWriteOptions::default().write(sink, stream) → W` | `VortexOpenOptions::in_memory().open(buf)?` (sync) | Exists, not wired | +| **C** | 0.58.0–HEAD | `session.write_options().write(sink, stream) → WriteSummary` | `session.open_options().open_buffer(buf)?` (sync) | Central | + +### Key Breaking Changes + +- **A→B**: In-memory `open()` changed from async to sync +- **B→C**: + - `VortexWriteOptions` lost `Default`, now constructed from `VortexSession` + - `write()` return type: `W` (sink) → `WriteSummary` + - `VortexOpenOptions` lost the `FileType` generic parameter + - `in_memory().open()` → `open_options().open_buffer()` + - Scan: `into_array_iter()` → `into_array_stream()` (async restored) + +## Array Construction API Stability + +Array construction is stable across ALL versions — fixture builders need NO adaptation: + +| API | Status | +|-----|--------| +| `StructArray::try_new(field_names, fields, len, validity)` | Stable 0.36.0–HEAD | +| `PrimitiveArray::new(buffer![...], validity)` | Stable 0.36.0–HEAD | +| `buffer![1, 2, 3].into_array()` | Stable 0.36.0–HEAD | +| `VarBinArray::from(vec!["a", "b"])` | Stable 0.36.0–HEAD | +| `BoolArray::from_iter([true, false])` | Stable 0.36.0–HEAD | +| `ArrayRef::from_arrow(record_batch, false)` | Stable 0.36.0–HEAD | +| `ChunkedArray::try_new(chunks, dtype)` | Stable 0.36.0–HEAD | + +## Fixture Suite + +### Trait + +```rust +pub trait Fixture: Send + Sync { + fn name(&self) -> &str; + fn build(&self) -> Vec; +} +``` + +Returns `Vec` to support chunked fixtures naturally. + +### Synthetic Fixtures + +| File | Schema | Purpose | +|------|--------|---------| +| `primitives.vortex` | `Struct{u8, u16, u32, u64, i32, i64, f32, f64}` | Primitive round-trip | +| `strings.vortex` | `Struct{Utf8}` | String encoding | +| `booleans.vortex` | `Struct{Bool}` | Bool round-trip | +| `nullable.vortex` | `Struct{Nullable, Nullable}` | Null handling | +| `struct_nested.vortex` | `Struct{Struct{i32, Utf8}, f64}` | Nested types | +| `chunked.vortex` | Chunked `Struct{u32}` (3 x 1000 rows) | Multi-chunk files | + +### Realistic Fixtures + +| File | Source | Rows | +|------|--------|------| +| `tpch_lineitem.vortex` | TPC-H SF 0.01 | ~60K | +| `tpch_orders.vortex` | TPC-H SF 0.01 | ~15K | + +## Adapter Layer + +Only `adapter.rs` changes per epoch (~15 lines). See `src/adapter.rs` for the current (Epoch C) +implementation. The git history shows all 3 epoch variants. + +## What Changes Per Version When Cherry-Picking + +| Component | Changes? | +|-----------|----------| +| Fixture trait + registry | No | +| Fixture builders (synthetic) | No | +| Fixture builders (TPC-H) | No | +| `adapter.rs` | Yes — ~15 lines, 3 variants | +| `main.rs`, `manifest.rs` | No | +| `Cargo.toml` | No (path deps resolve to local version) | + +## Usage + +```bash +# Generate fixtures for the current version +cargo run --manifest-path vortex-test/compat-gen/Cargo.toml \ + --bin compat-gen -- --version 0.62.0 --output /tmp/fixtures/ + +# Outputs: +# /tmp/fixtures/manifest.json +# /tmp/fixtures/primitives.vortex +# /tmp/fixtures/strings.vortex +# ... +``` diff --git a/vortex-test/compat-gen/src/adapter.rs b/vortex-test/compat-gen/src/adapter.rs new file mode 100644 index 00000000000..eafc87a1ea5 --- /dev/null +++ b/vortex-test/compat-gen/src/adapter.rs @@ -0,0 +1,31 @@ +// Epoch A adapter — for Vortex v0.36.0 +// +// API at this version: +// - VortexWriteOptions::default() (no session) +// - .write(sink, stream).await returns VortexResult (the sink back) +// - ArrayStream must be Unpin + +use std::path::Path; + +use futures::stream; +use tokio::runtime::Runtime; +use vortex::file::VortexWriteOptions; +use vortex_array::stream::ArrayStreamAdapter; +use vortex_array::ArrayRef; +use vortex_error::VortexResult; + +/// Write a sequence of array chunks as a `.vortex` file. +pub fn write_file(path: &Path, chunks: Vec) -> VortexResult<()> { + let dtype = chunks[0].dtype().clone(); + let stream = ArrayStreamAdapter::new(dtype, stream::iter(chunks.into_iter().map(Ok))); + + let rt = Runtime::new().expect("failed to create tokio runtime"); + rt.block_on(async { + let file = tokio::fs::File::create(path).await.map_err(|e| { + vortex_error::vortex_err!("failed to create {}: {e}", path.display()) + })?; + // At 0.36.0, write() returns VortexResult — we discard the sink. + let _sink = VortexWriteOptions::default().write(file, stream).await?; + Ok(()) + }) +} diff --git a/vortex-test/compat-gen/src/fixtures/mod.rs b/vortex-test/compat-gen/src/fixtures/mod.rs new file mode 100644 index 00000000000..b09662bec89 --- /dev/null +++ b/vortex-test/compat-gen/src/fixtures/mod.rs @@ -0,0 +1,30 @@ +mod synthetic; +mod tpch; + +use vortex_array::ArrayRef; + +/// A deterministic fixture that produces the same arrays every time. +pub trait Fixture: Send + Sync { + /// The filename for this fixture, e.g. "primitives.vortex". + fn name(&self) -> &str; + + /// Build the expected arrays. Must be deterministic. + /// + /// Returns a `Vec` to support chunked fixtures (multiple chunks). + /// Single-array fixtures return a one-element vec. + fn build(&self) -> Vec; +} + +/// All registered fixtures. +pub fn all_fixtures() -> Vec> { + vec![ + Box::new(synthetic::PrimitivesFixture), + Box::new(synthetic::StringsFixture), + Box::new(synthetic::BooleansFixture), + Box::new(synthetic::NullableFixture), + Box::new(synthetic::StructNestedFixture), + Box::new(synthetic::ChunkedFixture), + Box::new(tpch::TpchLineitemFixture), + Box::new(tpch::TpchOrdersFixture), + ] +} diff --git a/vortex-test/compat-gen/src/fixtures/synthetic.rs b/vortex-test/compat-gen/src/fixtures/synthetic.rs new file mode 100644 index 00000000000..d29c1c5d097 --- /dev/null +++ b/vortex-test/compat-gen/src/fixtures/synthetic.rs @@ -0,0 +1,169 @@ +use vortex_array::arrays::{BoolArray, ChunkedArray, PrimitiveArray, StructArray, VarBinArray}; +use vortex_array::dtype::field_names::FieldNames; +use vortex_array::dtype::{DType, Nullability, PType}; +use vortex_array::validity::Validity; +use vortex_array::{ArrayRef, IntoArray}; +use vortex_buffer::buffer; + +use super::Fixture; + +pub struct PrimitivesFixture; + +impl Fixture for PrimitivesFixture { + fn name(&self) -> &str { + "primitives.vortex" + } + + fn build(&self) -> Vec { + let arr = StructArray::try_new( + FieldNames::from(["u8", "u16", "u32", "u64", "i32", "i64", "f32", "f64"]), + vec![ + PrimitiveArray::new(buffer![0u8, 128, 255], Validity::NonNullable).into_array(), + PrimitiveArray::new(buffer![0u16, 32768, 65535], Validity::NonNullable).into_array(), + PrimitiveArray::new(buffer![0u32, 2_147_483_648, 4_294_967_295], Validity::NonNullable).into_array(), + PrimitiveArray::new(buffer![0u64, 9_223_372_036_854_775_808, u64::MAX], Validity::NonNullable).into_array(), + PrimitiveArray::new(buffer![i32::MIN, 0i32, i32::MAX], Validity::NonNullable).into_array(), + PrimitiveArray::new(buffer![i64::MIN, 0i64, i64::MAX], Validity::NonNullable).into_array(), + PrimitiveArray::new(buffer![f32::MIN, 0.0f32, f32::MAX], Validity::NonNullable).into_array(), + PrimitiveArray::new(buffer![f64::MIN, 0.0f64, f64::MAX], Validity::NonNullable).into_array(), + ], + 3, + Validity::NonNullable, + ) + .expect("failed to build primitives fixture"); + vec![arr.into_array()] + } +} + +pub struct StringsFixture; + +impl Fixture for StringsFixture { + fn name(&self) -> &str { + "strings.vortex" + } + + fn build(&self) -> Vec { + let strings = VarBinArray::from(vec!["", "hello", "こんにちは", "\u{1f980}"]); + let arr = StructArray::try_new( + FieldNames::from(["text"]), + vec![strings.into_array()], + 4, + Validity::NonNullable, + ) + .expect("failed to build strings fixture"); + vec![arr.into_array()] + } +} + +pub struct BooleansFixture; + +impl Fixture for BooleansFixture { + fn name(&self) -> &str { + "booleans.vortex" + } + + fn build(&self) -> Vec { + let bools = BoolArray::from_iter([true, false, true, true, false]); + let arr = StructArray::try_new( + FieldNames::from(["flag"]), + vec![bools.into_array()], + 5, + Validity::NonNullable, + ) + .expect("failed to build booleans fixture"); + vec![arr.into_array()] + } +} + +pub struct NullableFixture; + +impl Fixture for NullableFixture { + fn name(&self) -> &str { + "nullable.vortex" + } + + fn build(&self) -> Vec { + let nullable_ints = PrimitiveArray::from_option_iter([ + Some(1i32), + None, + Some(42), + None, + Some(-7), + ]); + let nullable_strings = VarBinArray::from(vec![ + Some("hello"), + None, + Some("world"), + Some(""), + None, + ]); + let arr = StructArray::try_new( + FieldNames::from(["int_col", "str_col"]), + vec![nullable_ints.into_array(), nullable_strings.into_array()], + 5, + Validity::NonNullable, + ) + .expect("failed to build nullable fixture"); + vec![arr.into_array()] + } +} + +pub struct StructNestedFixture; + +impl Fixture for StructNestedFixture { + fn name(&self) -> &str { + "struct_nested.vortex" + } + + fn build(&self) -> Vec { + let inner = StructArray::try_new( + FieldNames::from(["a", "b"]), + vec![ + PrimitiveArray::new(buffer![10i32, 20, 30], Validity::NonNullable).into_array(), + VarBinArray::from(vec!["x", "y", "z"]).into_array(), + ], + 3, + Validity::NonNullable, + ) + .expect("failed to build inner struct"); + + let arr = StructArray::try_new( + FieldNames::from(["inner", "value"]), + vec![ + inner.into_array(), + PrimitiveArray::new(buffer![1.1f64, 2.2, 3.3], Validity::NonNullable).into_array(), + ], + 3, + Validity::NonNullable, + ) + .expect("failed to build struct_nested fixture"); + vec![arr.into_array()] + } +} + +pub struct ChunkedFixture; + +impl Fixture for ChunkedFixture { + fn name(&self) -> &str { + "chunked.vortex" + } + + fn build(&self) -> Vec { + // 3 chunks of 1000 rows each. Values are deterministic: chunk_idx * 1000 + row_idx. + (0u32..3) + .map(|chunk_idx| { + let values: Vec = (0u32..1000).map(|i| chunk_idx * 1000 + i).collect(); + let primitives = + PrimitiveArray::new(vortex_buffer::Buffer::from(values), Validity::NonNullable); + StructArray::try_new( + FieldNames::from(["id"]), + vec![primitives.into_array()], + 1000, + Validity::NonNullable, + ) + .expect("failed to build chunk") + .into_array() + }) + .collect() + } +} diff --git a/vortex-test/compat-gen/src/fixtures/tpch.rs b/vortex-test/compat-gen/src/fixtures/tpch.rs new file mode 100644 index 00000000000..7e341f7e85c --- /dev/null +++ b/vortex-test/compat-gen/src/fixtures/tpch.rs @@ -0,0 +1,45 @@ +use arrow_array::RecordBatch; +use tpchgen::generators::{LineItemGenerator, OrderGenerator}; +use tpchgen_arrow::RecordBatchIterator; +use vortex_array::arrow::FromArrowArray; +use vortex_array::ArrayRef; + +use super::Fixture; + +const SCALE_FACTOR: f64 = 0.01; + +fn collect_batches_as_vortex(iter: impl RecordBatchIterator) -> Vec { + let batches: Vec = iter.collect(); + batches + .into_iter() + .map(|batch| ArrayRef::from_arrow(batch, false).expect("arrow conversion failed")) + .collect() +} + +pub struct TpchLineitemFixture; + +impl Fixture for TpchLineitemFixture { + fn name(&self) -> &str { + "tpch_lineitem.vortex" + } + + fn build(&self) -> Vec { + let gen = LineItemGenerator::new(SCALE_FACTOR, 1, 1); + let arrow_iter = tpchgen_arrow::LineItemArrow::new(gen).with_batch_size(65_536); + collect_batches_as_vortex(arrow_iter) + } +} + +pub struct TpchOrdersFixture; + +impl Fixture for TpchOrdersFixture { + fn name(&self) -> &str { + "tpch_orders.vortex" + } + + fn build(&self) -> Vec { + let gen = OrderGenerator::new(SCALE_FACTOR, 1, 1); + let arrow_iter = tpchgen_arrow::OrderArrow::new(gen).with_batch_size(65_536); + collect_batches_as_vortex(arrow_iter) + } +} diff --git a/vortex-test/compat-gen/src/main.rs b/vortex-test/compat-gen/src/main.rs new file mode 100644 index 00000000000..3777fc64d12 --- /dev/null +++ b/vortex-test/compat-gen/src/main.rs @@ -0,0 +1,56 @@ +mod adapter; +mod fixtures; +mod manifest; + +use std::path::PathBuf; + +use chrono::Utc; +use clap::Parser; + +use crate::fixtures::all_fixtures; +use crate::manifest::Manifest; + +#[derive(Parser)] +#[command(name = "compat-gen", about = "Generate Vortex backward-compat fixture files")] +struct Cli { + /// Version tag for this fixture set (e.g. "0.62.0"). + #[arg(long)] + version: String, + + /// Output directory for generated fixture files. + #[arg(long)] + output: PathBuf, +} + +fn main() -> vortex_error::VortexResult<()> { + let cli = Cli::parse(); + + std::fs::create_dir_all(&cli.output) + .map_err(|e| vortex_error::vortex_err!("failed to create output dir: {e}"))?; + + let fixtures = all_fixtures(); + let mut fixture_names = Vec::with_capacity(fixtures.len()); + + for fixture in &fixtures { + let chunks = fixture.build(); + let path = cli.output.join(fixture.name()); + adapter::write_file(&path, chunks)?; + fixture_names.push(fixture.name().to_string()); + eprintln!(" wrote {}", fixture.name()); + } + + let manifest = Manifest { + version: cli.version.clone(), + generated_at: Utc::now(), + fixtures: fixture_names, + }; + let manifest_path = cli.output.join("manifest.json"); + let manifest_json = serde_json::to_string_pretty(&manifest) + .map_err(|e| vortex_error::vortex_err!("failed to serialize manifest: {e}"))?; + std::fs::write(&manifest_path, manifest_json) + .map_err(|e| vortex_error::vortex_err!("failed to write manifest: {e}"))?; + eprintln!(" wrote manifest.json"); + + eprintln!("done: {} fixtures for v{}", fixtures.len(), cli.version); + Ok(()) +} diff --git a/vortex-test/compat-gen/src/manifest.rs b/vortex-test/compat-gen/src/manifest.rs new file mode 100644 index 00000000000..6a438edc1b2 --- /dev/null +++ b/vortex-test/compat-gen/src/manifest.rs @@ -0,0 +1,10 @@ +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; + +/// Manifest listing all fixtures generated for a given version. +#[derive(Debug, Serialize, Deserialize)] +pub struct Manifest { + pub version: String, + pub generated_at: DateTime, + pub fixtures: Vec, +} From 569d38c45ebadc198199ff47b99d78b649f42f39 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Wed, 11 Mar 2026 15:27:27 +0000 Subject: [PATCH 2/6] =?UTF-8?q?feat:=20compat-gen=20Epoch=20B=20adapter=20?= =?UTF-8?q?(v0.45.0=E2=80=93v0.52.0)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Write API is the same as Epoch A but with stricter stream bounds (Send + 'static). Also has write_blocking() for sync usage. Signed-off-by: Joe Isaacs Co-Authored-By: Claude Opus 4.6 --- vortex-test/compat-gen/src/adapter.rs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/vortex-test/compat-gen/src/adapter.rs b/vortex-test/compat-gen/src/adapter.rs index eafc87a1ea5..23ff2e3e71a 100644 --- a/vortex-test/compat-gen/src/adapter.rs +++ b/vortex-test/compat-gen/src/adapter.rs @@ -1,9 +1,10 @@ -// Epoch A adapter — for Vortex v0.36.0 +// Epoch B adapter — for Vortex v0.45.0 through v0.52.0 // -// API at this version: -// - VortexWriteOptions::default() (no session) -// - .write(sink, stream).await returns VortexResult (the sink back) -// - ArrayStream must be Unpin +// API changes from Epoch A: +// - VortexWriteOptions::default() still works (no session) +// - .write(sink, stream).await still returns VortexResult +// - Stream now requires Send + 'static (not just Unpin) +// - Also has .write_blocking(sink, stream) -> VortexResult use std::path::Path; @@ -24,7 +25,8 @@ pub fn write_file(path: &Path, chunks: Vec) -> VortexResult<()> { let file = tokio::fs::File::create(path).await.map_err(|e| { vortex_error::vortex_err!("failed to create {}: {e}", path.display()) })?; - // At 0.36.0, write() returns VortexResult — we discard the sink. + // At 0.45.0–0.52.0: same as Epoch A, write() returns VortexResult. + // Stream bound changed to `S: ArrayStream + Unpin + Send + 'static`. let _sink = VortexWriteOptions::default().write(file, stream).await?; Ok(()) }) From a993c6683b92454dda5e8120a0c2107083465f9b Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Wed, 11 Mar 2026 15:27:59 +0000 Subject: [PATCH 3/6] feat: compat-gen Epoch C adapter (v0.58.0+/HEAD) Session-based API: VortexSession::default() + session.write_options(). write() now returns WriteSummary and takes &mut sink. This is the adapter that compiles against the current codebase. Signed-off-by: Joe Isaacs Co-Authored-By: Claude Opus 4.6 --- vortex-test/compat-gen/src/adapter.rs | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/vortex-test/compat-gen/src/adapter.rs b/vortex-test/compat-gen/src/adapter.rs index 23ff2e3e71a..8e844396e30 100644 --- a/vortex-test/compat-gen/src/adapter.rs +++ b/vortex-test/compat-gen/src/adapter.rs @@ -1,16 +1,17 @@ -// Epoch B adapter — for Vortex v0.45.0 through v0.52.0 +// Epoch C adapter — for Vortex v0.58.0 through HEAD // -// API changes from Epoch A: -// - VortexWriteOptions::default() still works (no session) -// - .write(sink, stream).await still returns VortexResult -// - Stream now requires Send + 'static (not just Unpin) -// - Also has .write_blocking(sink, stream) -> VortexResult +// API changes from Epoch B: +// - VortexWriteOptions no longer implements Default +// - Must construct via VortexSession: session.write_options() +// - .write(&mut sink, stream).await returns VortexResult +// - WriteOptionsSessionExt trait provides session.write_options() use std::path::Path; use futures::stream; use tokio::runtime::Runtime; -use vortex::file::VortexWriteOptions; +use vortex::file::WriteOptionsSessionExt; +use vortex::VortexSession; use vortex_array::stream::ArrayStreamAdapter; use vortex_array::ArrayRef; use vortex_error::VortexResult; @@ -20,14 +21,17 @@ pub fn write_file(path: &Path, chunks: Vec) -> VortexResult<()> { let dtype = chunks[0].dtype().clone(); let stream = ArrayStreamAdapter::new(dtype, stream::iter(chunks.into_iter().map(Ok))); + let session = VortexSession::default(); let rt = Runtime::new().expect("failed to create tokio runtime"); rt.block_on(async { - let file = tokio::fs::File::create(path).await.map_err(|e| { + let mut file = tokio::fs::File::create(path).await.map_err(|e| { vortex_error::vortex_err!("failed to create {}: {e}", path.display()) })?; - // At 0.45.0–0.52.0: same as Epoch A, write() returns VortexResult. - // Stream bound changed to `S: ArrayStream + Unpin + Send + 'static`. - let _sink = VortexWriteOptions::default().write(file, stream).await?; + // At 0.58.0+: write() returns WriteSummary, takes &mut sink. + let _summary = session + .write_options() + .write(&mut file, stream) + .await?; Ok(()) }) } From e9959fe27bb87c78d34e21de66319237cc6c1698 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Wed, 11 Mar 2026 15:47:46 +0000 Subject: [PATCH 4/6] docs: update compat-gen plan with full remaining work Adds: compat-test binary design, ClickBench fixture, per-encoding stubs, validation strategy (ChunkedArray + assert_arrays_eq!), reqwest-based HTTPS fetching, CI workflow specs, and complete code size / shared-vs-branch-specific breakdown. Signed-off-by: Joe Isaacs Co-Authored-By: Claude Opus 4.6 --- vortex-test/compat-gen/PLAN.md | 274 ++++++++++++++++++++++++++++++--- 1 file changed, 249 insertions(+), 25 deletions(-) diff --git a/vortex-test/compat-gen/PLAN.md b/vortex-test/compat-gen/PLAN.md index 672c23fdd8c..ed8195d256c 100644 --- a/vortex-test/compat-gen/PLAN.md +++ b/vortex-test/compat-gen/PLAN.md @@ -5,22 +5,26 @@ RFC: https://github.com/vortex-data/rfcs/pull/23 ## Overview A standalone crate (`vortex-test/compat-gen/`) that generates deterministic `.vortex` fixture files -for backward compatibility testing. Not a workspace member — uses path deps to workspace crates. +and validates them across versions. Not a workspace member — uses path deps to workspace crates. + +Two binaries: +- **`compat-gen`**: Build fixture arrays → write `.vortex` files + `manifest.json` +- **`compat-test`**: Fetch fixtures from S3 (plain HTTPS) → read → compare via `assert_arrays_eq!` ## API Epochs The Vortex file write/read API has 3 distinct epochs. The adapter layer (`adapter.rs`) is the only file that changes when cherry-picking to old release branches. -| Epoch | Versions | Write API | Read (in-memory) | Session | -|-------|----------------|---------------------------------------------------------|------------------------------------------------------|---------| -| **A** | 0.36.0 | `VortexWriteOptions::default().write(sink, stream) → W` | `VortexOpenOptions::in_memory().open(buf).await?` | None | -| **B** | 0.45.0–0.52.0 | `VortexWriteOptions::default().write(sink, stream) → W` | `VortexOpenOptions::in_memory().open(buf)?` (sync) | Exists, not wired | -| **C** | 0.58.0–HEAD | `session.write_options().write(sink, stream) → WriteSummary` | `session.open_options().open_buffer(buf)?` (sync) | Central | +| Epoch | Versions | Write API | Read (in-memory) | Scan output | Session | +|-------|----------------|---------------------------------------------------------|------------------------------------------------------|--------------------------|---------| +| **A** | 0.36.0 | `VortexWriteOptions::default().write(sink, stream) → W` | `VortexOpenOptions::in_memory().open(buf).await?` | `into_array_stream()` async | None | +| **B** | 0.45.0–0.52.0 | `VortexWriteOptions::default().write(sink, stream) → W` | `VortexOpenOptions::in_memory().open(buf)?` (sync) | `into_array_iter()` sync | Exists, not wired | +| **C** | 0.58.0–HEAD | `session.write_options().write(sink, stream) → WriteSummary` | `session.open_options().open_buffer(buf)?` (sync) | `into_array_stream()` async | Central | ### Key Breaking Changes -- **A→B**: In-memory `open()` changed from async to sync +- **A→B**: In-memory `open()` changed from async to sync; scan switched to sync `into_array_iter()` - **B→C**: - `VortexWriteOptions` lost `Default`, now constructed from `VortexSession` - `write()` return type: `W` (sink) → `WriteSummary` @@ -42,6 +46,26 @@ Array construction is stable across ALL versions — fixture builders need NO ad | `ArrayRef::from_arrow(record_batch, false)` | Stable 0.36.0–HEAD | | `ChunkedArray::try_new(chunks, dtype)` | Stable 0.36.0–HEAD | +## Crate Layout + +``` +vortex-test/compat-gen/ + Cargo.toml # standalone, path deps to workspace + PLAN.md + src/ + main.rs # compat-gen CLI: --version, --output + test_main.rs # compat-test CLI: --fixtures-url + adapter.rs # write_file() + read_file() — ONLY branch-specific file + manifest.rs # Manifest serde struct + validate.rs # fetch from HTTPS + assert_arrays_eq! loop + fixtures/ + mod.rs # Fixture trait + all_fixtures() registry + synthetic.rs # 6 synthetic fixtures + tpch.rs # 2 TPC-H fixtures (lineitem, orders) + clickbench.rs # ClickBench hits 1k fixture + encodings.rs # per-encoding fixture stubs (todo!()) +``` + ## Fixture Suite ### Trait @@ -53,9 +77,9 @@ pub trait Fixture: Send + Sync { } ``` -Returns `Vec` to support chunked fixtures naturally. +Returns `Vec` to support chunked fixtures. Single-array fixtures return a one-element vec. -### Synthetic Fixtures +### Synthetic Fixtures (implemented) | File | Schema | Purpose | |------|--------|---------| @@ -66,28 +90,212 @@ Returns `Vec` to support chunked fixtures naturally. | `struct_nested.vortex` | `Struct{Struct{i32, Utf8}, f64}` | Nested types | | `chunked.vortex` | Chunked `Struct{u32}` (3 x 1000 rows) | Multi-chunk files | -### Realistic Fixtures +### Realistic Fixtures (implemented) + +| File | Source | Rows | Purpose | +|------|--------|------|---------| +| `tpch_lineitem.vortex` | TPC-H SF 0.01 via `tpchgen-arrow` | ~60K | Numeric + string schema | +| `tpch_orders.vortex` | TPC-H SF 0.01 via `tpchgen-arrow` | ~15K | Date + decimal types | +| `clickbench_hits_1k.vortex` | First 1000 rows of ClickBench `hits` parquet (pinned URL) | 1000 | Wide table (105 cols) | + +### Per-Encoding Fixture Stubs (todo) + +One fixture per stable encoding to exercise encoding-specific read paths. These are stubbed with +`todo!()` until the "stable encodings" RFC defines what's frozen. -| File | Source | Rows | -|------|--------|------| -| `tpch_lineitem.vortex` | TPC-H SF 0.01 | ~60K | -| `tpch_orders.vortex` | TPC-H SF 0.01 | ~15K | +| File | Encoding | Stub? | +|------|----------|-------| +| `enc_dict.vortex` | DictArray | `todo!()` | +| `enc_runend.vortex` | RunEndArray | `todo!()` | +| `enc_constant.vortex` | ConstantArray | `todo!()` | +| `enc_sparse.vortex` | SparseArray | `todo!()` | +| `enc_alp.vortex` | ALPArray | `todo!()` | +| `enc_bitpacked.vortex` | BitPackedArray | `todo!()` | +| `enc_fsst.vortex` | FSSTArray | `todo!()` | ## Adapter Layer -Only `adapter.rs` changes per epoch (~15 lines). See `src/adapter.rs` for the current (Epoch C) -implementation. The git history shows all 3 epoch variants. +Only `adapter.rs` changes per epoch. Contains two functions: + +```rust +pub fn write_file(path: &Path, chunks: Vec) -> VortexResult<()>; +pub fn read_file(bytes: ByteBuffer) -> VortexResult>; +``` + +### Epoch A — v0.36.0 + +```rust +pub fn write_file(path: &Path, chunks: Vec) -> VortexResult<()> { + let dtype = chunks[0].dtype().clone(); + let stream = ArrayStreamAdapter::new(dtype, stream::iter(chunks.into_iter().map(Ok))); + let rt = Runtime::new().expect("tokio runtime"); + rt.block_on(async { + let file = tokio::fs::File::create(path).await?; + let _sink = VortexWriteOptions::default().write(file, stream).await?; + Ok(()) + }) +} + +pub fn read_file(bytes: ByteBuffer) -> VortexResult> { + let rt = Runtime::new().expect("tokio runtime"); + rt.block_on(async { + let file = VortexOpenOptions::in_memory().open(bytes).await?; // async + let arr = file.scan()?.into_array_stream()?.read_all().await?; + Ok(vec![arr]) + }) +} +``` + +### Epoch B — v0.45.0–v0.52.0 + +```rust +pub fn write_file(path: &Path, chunks: Vec) -> VortexResult<()> { + // identical to Epoch A — same VortexWriteOptions::default() API + // ... +} + +pub fn read_file(bytes: ByteBuffer) -> VortexResult> { + let file = VortexOpenOptions::in_memory().open(bytes)?; // sync now + let arr = file.scan()?.into_array_iter()?.read_all()?; // sync + Ok(vec![arr]) +} +``` + +### Epoch C — v0.58.0+/HEAD + +```rust +pub fn write_file(path: &Path, chunks: Vec) -> VortexResult<()> { + let session = VortexSession::default(); + let dtype = chunks[0].dtype().clone(); + let stream = ArrayStreamAdapter::new(dtype, stream::iter(chunks.into_iter().map(Ok))); + let rt = Runtime::new().expect("tokio runtime"); + rt.block_on(async { + let mut file = tokio::fs::File::create(path).await?; + let _summary = session.write_options().write(&mut file, stream).await?; + Ok(()) + }) +} + +pub fn read_file(bytes: ByteBuffer) -> VortexResult> { + let session = VortexSession::default(); + let file = session.open_options().open_buffer(bytes)?; + let rt = Runtime::new().expect("tokio runtime"); + rt.block_on(async { + let arr = file.scan()?.into_array_stream()?.read_all().await?; + Ok(vec![arr]) + }) +} +``` + +## Validation Strategy + +Comparison uses `assert_arrays_eq!` with `ChunkedArray` wrapping: + +```rust +fn validate(actual: Vec, expected: Vec) -> VortexResult<()> { + let actual_dtype = actual[0].dtype().clone(); + let expected_dtype = expected[0].dtype().clone(); + let actual_chunked = ChunkedArray::try_new(actual, actual_dtype)?; + let expected_chunked = ChunkedArray::try_new(expected, expected_dtype)?; + assert_arrays_eq!(actual_chunked, expected_chunked); + Ok(()) +} +``` + +The writer may re-chunk across versions, but `assert_arrays_eq!` compares element-by-element +so chunk boundaries don't matter. + +## Fixture Fetching + +Fixtures are stored in a public S3 bucket accessible via plain HTTPS. `compat-test` uses +`reqwest` (blocking) to fetch — no AWS SDK needed. + +``` +https://vortex-compat-fixtures.s3.amazonaws.com/v{VERSION}/manifest.json +https://vortex-compat-fixtures.s3.amazonaws.com/v{VERSION}/{fixture}.vortex +``` + +Version discovery: `compat-test` takes a `--versions` flag listing which versions to test, +or discovers them from a top-level `versions.json` in the bucket. + +## CI Workflows + +### `compat-gen-upload.yml` — on tag push or manual dispatch + +```yaml +on: + push: + tags: ["[0-9]+.[0-9]+.[0-9]+"] + workflow_dispatch: + inputs: + tag: { description: "Git tag", required: true } + +jobs: + upload-fixtures: + runs-on: ubuntu-latest + permissions: + id-token: write + steps: + - uses: actions/checkout@v4 + with: { ref: "${{ inputs.tag || github.ref_name }}" } + - uses: dtolnay/rust-toolchain@stable + - run: | + VERSION=${{ inputs.tag || github.ref_name }} + cargo run --manifest-path vortex-test/compat-gen/Cargo.toml \ + --bin compat-gen -- --version "$VERSION" --output /tmp/fixtures/ + - run: | + VERSION=${{ inputs.tag || github.ref_name }} + aws s3 cp /tmp/fixtures/ s3://vortex-compat-fixtures/v${VERSION}/ --recursive +``` + +### `compat-test-weekly.yml` — weekly + manual + +```yaml +on: + schedule: + - cron: "0 6 * * 1" + workflow_dispatch: {} + +jobs: + compat-test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable + - run: | + cargo run --manifest-path vortex-test/compat-gen/Cargo.toml \ + --bin compat-test -- \ + --fixtures-url https://vortex-compat-fixtures.s3.amazonaws.com +``` + +## Code Size Summary + +| Component | ~Lines | Shared across branches? | +|-----------|--------|------------------------| +| `Cargo.toml` | 35 | Yes | +| `src/main.rs` (compat-gen CLI) | 56 | Yes | +| `src/test_main.rs` (compat-test CLI) | 40 | Yes | +| `src/adapter.rs` (write + read) | 55 | **No — 3 epoch variants** | +| `src/manifest.rs` | 10 | Yes | +| `src/validate.rs` (fetch + compare) | 60 | Yes | +| `src/fixtures/mod.rs` (trait + registry) | 40 | Yes | +| `src/fixtures/synthetic.rs` (6 fixtures) | 170 | Yes | +| `src/fixtures/tpch.rs` (2 fixtures) | 45 | Yes | +| `src/fixtures/clickbench.rs` (1 fixture) | 50 | Yes | +| `src/fixtures/encodings.rs` (stubs) | 60 | Yes | +| CI workflows (2 YAML files) | 80 | Yes | +| **Total** | **~700** | **~645 shared (92%), ~55 branch-specific (8%)** | ## What Changes Per Version When Cherry-Picking | Component | Changes? | |-----------|----------| | Fixture trait + registry | No | -| Fixture builders (synthetic) | No | -| Fixture builders (TPC-H) | No | -| `adapter.rs` | Yes — ~15 lines, 3 variants | -| `main.rs`, `manifest.rs` | No | +| All fixture builders | No | +| `adapter.rs` | **Yes — ~55 lines, 3 variants** | +| `main.rs`, `test_main.rs`, `manifest.rs`, `validate.rs` | No | | `Cargo.toml` | No (path deps resolve to local version) | +| CI workflows | No | ## Usage @@ -96,9 +304,25 @@ implementation. The git history shows all 3 epoch variants. cargo run --manifest-path vortex-test/compat-gen/Cargo.toml \ --bin compat-gen -- --version 0.62.0 --output /tmp/fixtures/ -# Outputs: -# /tmp/fixtures/manifest.json -# /tmp/fixtures/primitives.vortex -# /tmp/fixtures/strings.vortex -# ... +# Validate fixtures from S3 against the current reader +cargo run --manifest-path vortex-test/compat-gen/Cargo.toml \ + --bin compat-test -- \ + --fixtures-url https://vortex-compat-fixtures.s3.amazonaws.com + +# Validate from a local directory (for development) +cargo run --manifest-path vortex-test/compat-gen/Cargo.toml \ + --bin compat-test -- \ + --fixtures-dir /tmp/fixtures/ ``` + +## Implementation Order + +1. ~~Fixture trait + synthetic builders~~ ✅ +2. ~~TPC-H fixtures~~ ✅ +3. ~~compat-gen binary (main.rs)~~ ✅ +4. ~~Adapter write path (3 epochs)~~ ✅ +5. ClickBench fixture +6. Per-encoding fixture stubs +7. Adapter read path (3 epochs) +8. compat-test binary (test_main.rs + validate.rs) +9. CI workflows From ec562cc33e15eb0d79c1b84e8beb59564189c3f8 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Wed, 11 Mar 2026 16:02:47 +0000 Subject: [PATCH 5/6] feat: add compat-test binary, ClickBench fixture, encoding stubs, CI workflows - adapter.rs: add read_file() for Epoch C (session-based open_buffer + async scan) - fixtures/clickbench.rs: download pinned ClickBench hits_0.parquet, take 1k rows - fixtures/encodings.rs: todo!() stubs for Dict, RunEnd, Constant, Sparse, ALP, BitPacked, FSST - test_main.rs: CLI with --fixtures-url (HTTPS) or --fixtures-dir (local) - validate.rs: fetch manifest + fixtures, compare via ChunkedArray + assert_arrays_eq! - compat-gen-upload.yml: generate + upload to S3 on tag push, updates versions.json - compat-test-weekly.yml: weekly validation of all versions against HEAD reader - Cargo.toml: add reqwest, parquet deps + compat-test binary target Signed-off-by: Joe Isaacs Co-Authored-By: Claude Opus 4.6 --- .github/workflows/compat-gen-upload.yml | 69 ++++++++ .github/workflows/compat-test-weekly.yml | 27 +++ vortex-test/compat-gen/Cargo.toml | 10 ++ vortex-test/compat-gen/src/adapter.rs | 34 ++-- .../compat-gen/src/fixtures/clickbench.rs | 43 +++++ .../compat-gen/src/fixtures/encodings.rs | 30 ++++ vortex-test/compat-gen/src/fixtures/mod.rs | 11 ++ vortex-test/compat-gen/src/test_main.rs | 88 ++++++++++ vortex-test/compat-gen/src/validate.rs | 166 ++++++++++++++++++ 9 files changed, 468 insertions(+), 10 deletions(-) create mode 100644 .github/workflows/compat-gen-upload.yml create mode 100644 .github/workflows/compat-test-weekly.yml create mode 100644 vortex-test/compat-gen/src/fixtures/clickbench.rs create mode 100644 vortex-test/compat-gen/src/fixtures/encodings.rs create mode 100644 vortex-test/compat-gen/src/test_main.rs create mode 100644 vortex-test/compat-gen/src/validate.rs diff --git a/.github/workflows/compat-gen-upload.yml b/.github/workflows/compat-gen-upload.yml new file mode 100644 index 00000000000..7317e611978 --- /dev/null +++ b/.github/workflows/compat-gen-upload.yml @@ -0,0 +1,69 @@ +name: Compat Fixture Upload + +on: + push: + tags: ["[0-9]+.[0-9]+.[0-9]+"] + workflow_dispatch: + inputs: + tag: + description: "Git tag to generate fixtures for (e.g. 0.62.0)" + required: true + +env: + S3_BUCKET: vortex-compat-fixtures + +jobs: + upload-fixtures: + runs-on: ubuntu-latest + permissions: + id-token: write + contents: read + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ github.event.inputs.tag || github.ref_name }} + + - uses: dtolnay/rust-toolchain@stable + + - uses: Swatinem/rust-cache@v2 + with: + workspaces: vortex-test/compat-gen + + - name: Generate fixtures + run: | + VERSION=${{ github.event.inputs.tag || github.ref_name }} + cargo run --manifest-path vortex-test/compat-gen/Cargo.toml \ + --release --bin compat-gen -- \ + --version "$VERSION" --output /tmp/fixtures/ + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: ${{ secrets.COMPAT_FIXTURES_ROLE_ARN }} + aws-region: us-east-1 + + - name: Upload to S3 + run: | + VERSION=${{ github.event.inputs.tag || github.ref_name }} + aws s3 cp /tmp/fixtures/ \ + "s3://${S3_BUCKET}/v${VERSION}/" --recursive + + - name: Update versions.json + run: | + VERSION=${{ github.event.inputs.tag || github.ref_name }} + # Fetch existing versions.json or start with empty array + aws s3 cp "s3://${S3_BUCKET}/versions.json" /tmp/versions.json 2>/dev/null \ + || echo '[]' > /tmp/versions.json + # Append new version if not already present, sort + python3 -c " + import json, sys + with open('/tmp/versions.json') as f: + versions = json.load(f) + v = sys.argv[1] + if v not in versions: + versions.append(v) + versions.sort(key=lambda x: list(map(int, x.split('.')))) + with open('/tmp/versions.json', 'w') as f: + json.dump(versions, f, indent=2) + " "$VERSION" + aws s3 cp /tmp/versions.json "s3://${S3_BUCKET}/versions.json" diff --git a/.github/workflows/compat-test-weekly.yml b/.github/workflows/compat-test-weekly.yml new file mode 100644 index 00000000000..3ed04482601 --- /dev/null +++ b/.github/workflows/compat-test-weekly.yml @@ -0,0 +1,27 @@ +name: Compat Test + +on: + schedule: + - cron: "0 6 * * 1" # Monday 6am UTC + workflow_dispatch: {} + +env: + FIXTURES_URL: https://vortex-compat-fixtures.s3.amazonaws.com + +jobs: + compat-test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: dtolnay/rust-toolchain@stable + + - uses: Swatinem/rust-cache@v2 + with: + workspaces: vortex-test/compat-gen + + - name: Run compat tests + run: | + cargo run --manifest-path vortex-test/compat-gen/Cargo.toml \ + --release --bin compat-test -- \ + --fixtures-url "$FIXTURES_URL" diff --git a/vortex-test/compat-gen/Cargo.toml b/vortex-test/compat-gen/Cargo.toml index 5f7f6fe8c1d..259d5f1e146 100644 --- a/vortex-test/compat-gen/Cargo.toml +++ b/vortex-test/compat-gen/Cargo.toml @@ -7,6 +7,10 @@ edition = "2021" name = "compat-gen" path = "src/main.rs" +[[bin]] +name = "compat-test" +path = "src/test_main.rs" + [dependencies] # Vortex crates (path deps — resolve to whatever version is checked out) vortex = { path = "../../vortex", features = ["files", "tokio"] } @@ -19,10 +23,16 @@ tpchgen = "2" tpchgen-arrow = "2" arrow-array = "57" +# ClickBench parquet reading +parquet = "57" + # Async runtime tokio = { version = "1", features = ["full"] } futures = "0.3" +# HTTP fetching (for ClickBench fixture + compat-test S3 downloads) +reqwest = { version = "0.12", features = ["blocking"] } + # CLI + serialization clap = { version = "4", features = ["derive"] } serde = { version = "1", features = ["derive"] } diff --git a/vortex-test/compat-gen/src/adapter.rs b/vortex-test/compat-gen/src/adapter.rs index 8e844396e30..a7e45514b1f 100644 --- a/vortex-test/compat-gen/src/adapter.rs +++ b/vortex-test/compat-gen/src/adapter.rs @@ -1,33 +1,33 @@ // Epoch C adapter — for Vortex v0.58.0 through HEAD // -// API changes from Epoch B: -// - VortexWriteOptions no longer implements Default -// - Must construct via VortexSession: session.write_options() -// - .write(&mut sink, stream).await returns VortexResult -// - WriteOptionsSessionExt trait provides session.write_options() +// Write: session.write_options(), returns WriteSummary, takes &mut sink +// Read: session.open_options().open_buffer(buf) (sync), into_array_stream() (async) use std::path::Path; use futures::stream; use tokio::runtime::Runtime; -use vortex::file::WriteOptionsSessionExt; +use vortex::file::{OpenOptionsSessionExt, WriteOptionsSessionExt}; use vortex::VortexSession; -use vortex_array::stream::ArrayStreamAdapter; +use vortex_array::stream::{ArrayStreamAdapter, ArrayStreamExt}; use vortex_array::ArrayRef; +use vortex_buffer::ByteBuffer; use vortex_error::VortexResult; +fn runtime() -> Runtime { + Runtime::new().expect("failed to create tokio runtime") +} + /// Write a sequence of array chunks as a `.vortex` file. pub fn write_file(path: &Path, chunks: Vec) -> VortexResult<()> { let dtype = chunks[0].dtype().clone(); let stream = ArrayStreamAdapter::new(dtype, stream::iter(chunks.into_iter().map(Ok))); let session = VortexSession::default(); - let rt = Runtime::new().expect("failed to create tokio runtime"); - rt.block_on(async { + runtime().block_on(async { let mut file = tokio::fs::File::create(path).await.map_err(|e| { vortex_error::vortex_err!("failed to create {}: {e}", path.display()) })?; - // At 0.58.0+: write() returns WriteSummary, takes &mut sink. let _summary = session .write_options() .write(&mut file, stream) @@ -35,3 +35,17 @@ pub fn write_file(path: &Path, chunks: Vec) -> VortexResult<()> { Ok(()) }) } + +/// Read a `.vortex` file from bytes, returning the arrays. +pub fn read_file(bytes: ByteBuffer) -> VortexResult> { + let session = VortexSession::default(); + let file = session.open_options().open_buffer(bytes)?; + runtime().block_on(async { + let arr = file + .scan()? + .into_array_stream()? + .read_all() + .await?; + Ok(vec![arr]) + }) +} diff --git a/vortex-test/compat-gen/src/fixtures/clickbench.rs b/vortex-test/compat-gen/src/fixtures/clickbench.rs new file mode 100644 index 00000000000..77051575d07 --- /dev/null +++ b/vortex-test/compat-gen/src/fixtures/clickbench.rs @@ -0,0 +1,43 @@ +use std::io::Cursor; + +use arrow_array::RecordBatch; +use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; +use vortex_array::arrow::FromArrowArray; +use vortex_array::ArrayRef; + +use super::Fixture; + +/// First partition of ClickBench hits, limited to 1000 rows. +const CLICKBENCH_URL: &str = + "https://pub-3ba949c0f0354ac18db1f0f14f0a2c52.r2.dev/clickbench/parquet_many/hits_0.parquet"; + +pub struct ClickBenchHits1kFixture; + +impl Fixture for ClickBenchHits1kFixture { + fn name(&self) -> &str { + "clickbench_hits_1k.vortex" + } + + fn build(&self) -> Vec { + let bytes = reqwest::blocking::get(CLICKBENCH_URL) + .expect("failed to download ClickBench parquet") + .bytes() + .expect("failed to read ClickBench response body"); + + let reader = ParquetRecordBatchReaderBuilder::try_new(bytes) + .expect("failed to open parquet") + .with_batch_size(1000) + .with_limit(1000) + .build() + .expect("failed to build parquet reader"); + + let batches: Vec = reader + .collect::, _>>() + .expect("failed to read parquet batches"); + + batches + .into_iter() + .map(|batch| ArrayRef::from_arrow(batch, false).expect("arrow conversion failed")) + .collect() + } +} diff --git a/vortex-test/compat-gen/src/fixtures/encodings.rs b/vortex-test/compat-gen/src/fixtures/encodings.rs new file mode 100644 index 00000000000..e76aba483a1 --- /dev/null +++ b/vortex-test/compat-gen/src/fixtures/encodings.rs @@ -0,0 +1,30 @@ +use vortex_array::ArrayRef; + +use super::Fixture; + +macro_rules! encoding_stub { + ($name:ident, $file:expr) => { + pub struct $name; + + impl Fixture for $name { + fn name(&self) -> &str { + $file + } + + fn build(&self) -> Vec { + todo!(concat!( + "blocked on stable-encodings RFC — ", + $file + )) + } + } + }; +} + +encoding_stub!(DictEncodingFixture, "enc_dict.vortex"); +encoding_stub!(RunEndEncodingFixture, "enc_runend.vortex"); +encoding_stub!(ConstantEncodingFixture, "enc_constant.vortex"); +encoding_stub!(SparseEncodingFixture, "enc_sparse.vortex"); +encoding_stub!(AlpEncodingFixture, "enc_alp.vortex"); +encoding_stub!(BitPackedEncodingFixture, "enc_bitpacked.vortex"); +encoding_stub!(FsstEncodingFixture, "enc_fsst.vortex"); diff --git a/vortex-test/compat-gen/src/fixtures/mod.rs b/vortex-test/compat-gen/src/fixtures/mod.rs index b09662bec89..b56d1e897b5 100644 --- a/vortex-test/compat-gen/src/fixtures/mod.rs +++ b/vortex-test/compat-gen/src/fixtures/mod.rs @@ -1,3 +1,5 @@ +mod clickbench; +pub mod encodings; mod synthetic; mod tpch; @@ -26,5 +28,14 @@ pub fn all_fixtures() -> Vec> { Box::new(synthetic::ChunkedFixture), Box::new(tpch::TpchLineitemFixture), Box::new(tpch::TpchOrdersFixture), + Box::new(clickbench::ClickBenchHits1kFixture), + // Encoding stubs — uncomment as stable-encodings RFC lands: + // Box::new(encodings::DictEncodingFixture), + // Box::new(encodings::RunEndEncodingFixture), + // Box::new(encodings::ConstantEncodingFixture), + // Box::new(encodings::SparseEncodingFixture), + // Box::new(encodings::AlpEncodingFixture), + // Box::new(encodings::BitPackedEncodingFixture), + // Box::new(encodings::FsstEncodingFixture), ] } diff --git a/vortex-test/compat-gen/src/test_main.rs b/vortex-test/compat-gen/src/test_main.rs new file mode 100644 index 00000000000..1eb6055b5b8 --- /dev/null +++ b/vortex-test/compat-gen/src/test_main.rs @@ -0,0 +1,88 @@ +mod adapter; +mod fixtures; +mod manifest; +mod validate; + +use std::path::PathBuf; + +use clap::Parser; +use vortex_error::VortexResult; + +use crate::validate::{discover_versions, FixtureSource}; + +#[derive(Parser)] +#[command(name = "compat-test", about = "Validate Vortex backward-compat fixtures")] +struct Cli { + /// HTTPS base URL for the fixture bucket. + /// e.g. https://vortex-compat-fixtures.s3.amazonaws.com + #[arg(long)] + fixtures_url: Option, + + /// Local directory containing fixture versions (for development). + #[arg(long)] + fixtures_dir: Option, + + /// Explicit list of versions to test (comma-separated). + /// If omitted, discovers versions from versions.json or directory listing. + #[arg(long, value_delimiter = ',')] + versions: Option>, +} + +fn main() -> VortexResult<()> { + let cli = Cli::parse(); + + let source = match (&cli.fixtures_url, &cli.fixtures_dir) { + (Some(url), None) => FixtureSource::Url(url.clone()), + (None, Some(dir)) => FixtureSource::Dir(dir.clone()), + _ => { + eprintln!("error: specify exactly one of --fixtures-url or --fixtures-dir"); + std::process::exit(1); + } + }; + + let versions = match cli.versions { + Some(v) => v, + None => { + eprintln!("discovering versions..."); + discover_versions(&source)? + } + }; + + eprintln!("testing {} version(s): {}", versions.len(), versions.join(", ")); + + let results = validate::validate_all(&source, &versions)?; + + let mut total_passed = 0; + let mut total_failed = 0; + let mut total_skipped = 0; + + for r in &results { + total_passed += r.passed; + total_failed += r.failed.len(); + total_skipped += r.skipped; + if r.failed.is_empty() { + eprintln!(" v{}: {} passed, {} skipped", r.version, r.passed, r.skipped); + } else { + eprintln!( + " v{}: {} passed, {} FAILED, {} skipped", + r.version, + r.passed, + r.failed.len(), + r.skipped + ); + for (name, err) in &r.failed { + eprintln!(" FAIL {name}: {err}"); + } + } + } + + eprintln!( + "\nresult: {total_passed} passed, {total_failed} failed, {total_skipped} skipped" + ); + + if total_failed > 0 { + std::process::exit(1); + } + + Ok(()) +} diff --git a/vortex-test/compat-gen/src/validate.rs b/vortex-test/compat-gen/src/validate.rs new file mode 100644 index 00000000000..714bf2e9f2a --- /dev/null +++ b/vortex-test/compat-gen/src/validate.rs @@ -0,0 +1,166 @@ +use std::collections::HashMap; +use std::path::{Path, PathBuf}; + +use vortex_array::arrays::ChunkedArray; +use vortex_array::{assert_arrays_eq, ArrayRef, IntoArray}; +use vortex_buffer::ByteBuffer; +use vortex_error::{vortex_bail, vortex_err, VortexResult}; + +use crate::adapter; +use crate::fixtures::{all_fixtures, Fixture}; +use crate::manifest::Manifest; + +/// Result of validating one version's fixtures. +pub struct VersionResult { + pub version: String, + pub passed: usize, + pub skipped: usize, + pub failed: Vec<(String, String)>, +} + +/// Validate all versions' fixtures against the current reader. +pub fn validate_all(source: &FixtureSource, versions: &[String]) -> VortexResult> { + let fixtures = all_fixtures(); + let fixture_map: HashMap<&str, &dyn Fixture> = fixtures + .iter() + .map(|f| (f.name(), f.as_ref())) + .collect(); + + let mut results = Vec::new(); + for version in versions { + let result = validate_version(source, version, &fixture_map)?; + results.push(result); + } + Ok(results) +} + +fn validate_version( + source: &FixtureSource, + version: &str, + fixture_map: &HashMap<&str, &dyn Fixture>, +) -> VortexResult { + let manifest = source.fetch_manifest(version)?; + let mut passed = 0; + let mut skipped = 0; + let mut failed = Vec::new(); + + for fixture_name in &manifest.fixtures { + let Some(fixture) = fixture_map.get(fixture_name.as_str()) else { + eprintln!(" warn: unknown fixture {fixture_name} in v{version}, skipping"); + skipped += 1; + continue; + }; + + eprintln!(" checking {fixture_name} from v{version}..."); + let bytes = source.fetch_fixture(version, fixture_name)?; + match validate_one(bytes, *fixture) { + Ok(()) => passed += 1, + Err(e) => { + eprintln!(" FAIL: {fixture_name} from v{version}: {e}"); + failed.push((fixture_name.clone(), e.to_string())); + } + } + } + + Ok(VersionResult { + version: version.to_string(), + passed, + skipped, + failed, + }) +} + +fn validate_one(bytes: ByteBuffer, fixture: &dyn Fixture) -> VortexResult<()> { + let actual = adapter::read_file(bytes)?; + let expected = fixture.build(); + + let actual_dtype = actual[0].dtype().clone(); + let expected_dtype = expected[0].dtype().clone(); + let actual_arr = ChunkedArray::try_new(actual, actual_dtype)?.into_array(); + let expected_arr = ChunkedArray::try_new(expected, expected_dtype)?.into_array(); + + assert_arrays_eq!(actual_arr, expected_arr); + Ok(()) +} + +/// Source for fetching fixture files — either HTTPS or local directory. +pub enum FixtureSource { + Url(String), + Dir(PathBuf), +} + +impl FixtureSource { + fn fetch_manifest(&self, version: &str) -> VortexResult { + let json = match self { + FixtureSource::Url(base) => { + let url = format!("{base}/v{version}/manifest.json"); + http_get_bytes(&url)? + } + FixtureSource::Dir(dir) => { + let path = dir.join(format!("v{version}")).join("manifest.json"); + std::fs::read(&path) + .map_err(|e| vortex_err!("failed to read {}: {e}", path.display()))? + } + }; + serde_json::from_slice(&json) + .map_err(|e| vortex_err!("failed to parse manifest for v{version}: {e}")) + } + + fn fetch_fixture(&self, version: &str, name: &str) -> VortexResult { + let bytes = match self { + FixtureSource::Url(base) => { + let url = format!("{base}/v{version}/{name}"); + http_get_bytes(&url)? + } + FixtureSource::Dir(dir) => { + let path = dir.join(format!("v{version}")).join(name); + std::fs::read(&path) + .map_err(|e| vortex_err!("failed to read {}: {e}", path.display()))? + } + }; + Ok(ByteBuffer::from(bytes)) + } +} + +/// Discover versions from a versions.json file, or from local directory listing. +pub fn discover_versions(source: &FixtureSource) -> VortexResult> { + match source { + FixtureSource::Url(base) => { + let url = format!("{base}/versions.json"); + let bytes = http_get_bytes(&url)?; + let versions: Vec = serde_json::from_slice(&bytes) + .map_err(|e| vortex_err!("failed to parse versions.json: {e}"))?; + Ok(versions) + } + FixtureSource::Dir(dir) => { + let mut versions = Vec::new(); + for entry in std::fs::read_dir(dir) + .map_err(|e| vortex_err!("failed to read dir {}: {e}", dir.display()))? + { + let entry = + entry.map_err(|e| vortex_err!("failed to read dir entry: {e}"))?; + let name = entry.file_name(); + let name = name.to_string_lossy(); + if let Some(version) = name.strip_prefix('v') { + if entry.path().join("manifest.json").exists() { + versions.push(version.to_string()); + } + } + } + versions.sort(); + Ok(versions) + } + } +} + +fn http_get_bytes(url: &str) -> VortexResult> { + let response = reqwest::blocking::get(url) + .map_err(|e| vortex_err!("HTTP request failed for {url}: {e}"))?; + if !response.status().is_success() { + vortex_bail!("HTTP {} fetching {url}", response.status()); + } + response + .bytes() + .map(|b| b.to_vec()) + .map_err(|e| vortex_err!("failed to read response body from {url}: {e}")) +} From 0d52464340551cac6bd41502b1d056916e90fde4 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Wed, 11 Mar 2026 16:05:37 +0000 Subject: [PATCH 6/6] =?UTF-8?q?feat:=20swap=20adapter=20to=20Epoch=20B=20(?= =?UTF-8?q?v0.45.0=E2=80=93v0.52.0)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VortexWriteOptions::default() (same write as A), sync in-memory open, into_array_iter() (sync scan). Signed-off-by: Joe Isaacs Co-Authored-By: Claude Opus 4.6 --- vortex-test/compat-gen/src/adapter.rs | 40 ++++++++++++--------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/vortex-test/compat-gen/src/adapter.rs b/vortex-test/compat-gen/src/adapter.rs index a7e45514b1f..7f173e2b1da 100644 --- a/vortex-test/compat-gen/src/adapter.rs +++ b/vortex-test/compat-gen/src/adapter.rs @@ -1,15 +1,16 @@ -// Epoch C adapter — for Vortex v0.58.0 through HEAD +// Epoch B adapter — for Vortex v0.45.0 through v0.52.0 // -// Write: session.write_options(), returns WriteSummary, takes &mut sink -// Read: session.open_options().open_buffer(buf) (sync), into_array_stream() (async) +// Write: VortexWriteOptions::default(), returns sink W (same as Epoch A) +// Read: VortexOpenOptions::in_memory().open(buf) (NOW SYNC) +// Scan: into_array_iter() (sync iterator) use std::path::Path; use futures::stream; use tokio::runtime::Runtime; -use vortex::file::{OpenOptionsSessionExt, WriteOptionsSessionExt}; -use vortex::VortexSession; -use vortex_array::stream::{ArrayStreamAdapter, ArrayStreamExt}; +use vortex::file::{VortexOpenOptions, VortexWriteOptions}; +use vortex_array::iter::ArrayIteratorExt; +use vortex_array::stream::ArrayStreamAdapter; use vortex_array::ArrayRef; use vortex_buffer::ByteBuffer; use vortex_error::VortexResult; @@ -23,29 +24,24 @@ pub fn write_file(path: &Path, chunks: Vec) -> VortexResult<()> { let dtype = chunks[0].dtype().clone(); let stream = ArrayStreamAdapter::new(dtype, stream::iter(chunks.into_iter().map(Ok))); - let session = VortexSession::default(); runtime().block_on(async { - let mut file = tokio::fs::File::create(path).await.map_err(|e| { + let file = tokio::fs::File::create(path).await.map_err(|e| { vortex_error::vortex_err!("failed to create {}: {e}", path.display()) })?; - let _summary = session - .write_options() - .write(&mut file, stream) - .await?; + // At 0.45.0–0.52.0: same write API as Epoch A. + let _sink = VortexWriteOptions::default().write(file, stream).await?; Ok(()) }) } /// Read a `.vortex` file from bytes, returning the arrays. pub fn read_file(bytes: ByteBuffer) -> VortexResult> { - let session = VortexSession::default(); - let file = session.open_options().open_buffer(bytes)?; - runtime().block_on(async { - let arr = file - .scan()? - .into_array_stream()? - .read_all() - .await?; - Ok(vec![arr]) - }) + // No async runtime needed — both open and scan are sync at this epoch. + let file = VortexOpenOptions::in_memory() + .open(bytes)?; // sync at 0.45.0+ + let arr = file + .scan()? + .into_array_iter()? // sync iterator (replaced into_array_stream) + .read_all()?; // sync read_all + Ok(vec![arr]) }