Skip to content

Commit d45d3ee

Browse files
committed
clean up
Signed-off-by: Connor Tsui <connor.tsui20@gmail.com>
1 parent c51561d commit d45d3ee

File tree

19 files changed

+362
-230
lines changed

19 files changed

+362
-230
lines changed

vortex-btrblocks/src/builder.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ fn default_excluded() -> HashSet<SchemeId> {
8080
/// # Examples
8181
///
8282
/// ```rust
83-
/// use vortex_btrblocks::{BtrBlocksCompressorBuilder, Scheme};
83+
/// use vortex_btrblocks::{BtrBlocksCompressorBuilder, Scheme, SchemeExt};
8484
/// use vortex_btrblocks::compressor::integer::DictScheme;
8585
///
8686
/// // Default compressor - all non-excluded schemes allowed.

vortex-btrblocks/src/canonical_compressor.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ use crate::CascadingCompressor;
1919
/// # Examples
2020
///
2121
/// ```rust
22-
/// use vortex_btrblocks::{BtrBlocksCompressor, BtrBlocksCompressorBuilder, Scheme};
22+
/// use vortex_btrblocks::{BtrBlocksCompressor, BtrBlocksCompressorBuilder, Scheme, SchemeExt};
2323
/// use vortex_btrblocks::compressor::integer::DictScheme;
2424
///
2525
/// // Default compressor - all schemes allowed.

vortex-btrblocks/src/compressor/float/dictionary.rs

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,24 +19,30 @@ use super::FloatStats;
1919

2020
macro_rules! typed_encode {
2121
($stats:ident, $typed:ident, $validity:ident, $typ:ty) => {{
22-
let distinct = $typed.distinct.as_ref().vortex_expect(
22+
let distinct = $typed.distinct().vortex_expect(
2323
"this must be present since `DictScheme` declared that we need distinct values",
2424
);
2525

26-
let values: Buffer<$typ> = distinct.distinct_values.iter().map(|x| x.0).collect();
26+
let values: Buffer<$typ> = distinct.distinct_values().iter().map(|x| x.0).collect();
2727

2828
let max_code = values.len();
2929
let codes = if max_code <= u8::MAX as usize {
30-
let buf =
31-
<DictEncoder as Encode<$typ, u8>>::encode(&values, $stats.src.as_slice::<$typ>());
30+
let buf = <DictEncoder as Encode<$typ, u8>>::encode(
31+
&values,
32+
$stats.source().as_slice::<$typ>(),
33+
);
3234
PrimitiveArray::new(buf, $validity.clone()).into_array()
3335
} else if max_code <= u16::MAX as usize {
34-
let buf =
35-
<DictEncoder as Encode<$typ, u16>>::encode(&values, $stats.src.as_slice::<$typ>());
36+
let buf = <DictEncoder as Encode<$typ, u16>>::encode(
37+
&values,
38+
$stats.source().as_slice::<$typ>(),
39+
);
3640
PrimitiveArray::new(buf, $validity.clone()).into_array()
3741
} else {
38-
let buf =
39-
<DictEncoder as Encode<$typ, u32>>::encode(&values, $stats.src.as_slice::<$typ>());
42+
let buf = <DictEncoder as Encode<$typ, u32>>::encode(
43+
&values,
44+
$stats.source().as_slice::<$typ>(),
45+
);
4046
PrimitiveArray::new(buf, $validity.clone()).into_array()
4147
};
4248

@@ -53,8 +59,8 @@ macro_rules! typed_encode {
5359

5460
/// Compresses a floating-point array into a dictionary arrays according to attached stats.
5561
pub fn dictionary_encode(stats: &FloatStats) -> DictArray {
56-
let validity = stats.src.validity();
57-
match &stats.erased {
62+
let validity = stats.source().validity();
63+
match stats.erased() {
5864
ErasedStats::F16(typed) => typed_encode!(stats, typed, validity, f16),
5965
ErasedStats::F32(typed) => typed_encode!(stats, typed, validity, f32),
6066
ErasedStats::F64(typed) => typed_encode!(stats, typed, validity, f64),

vortex-btrblocks/src/compressor/float/mod.rs

Lines changed: 27 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -106,15 +106,15 @@ impl rle::RLEConfig for FloatRLEConfig {
106106

107107
impl RLEStats for FloatStats {
108108
fn value_count(&self) -> u32 {
109-
self.value_count
109+
FloatStats::value_count(self)
110110
}
111111

112112
fn average_run_length(&self) -> u32 {
113-
self.average_run_length
113+
FloatStats::average_run_length(self)
114114
}
115115

116116
fn source(&self) -> &PrimitiveArray {
117-
&self.src
117+
FloatStats::source(self)
118118
}
119119
}
120120

@@ -178,13 +178,13 @@ impl Scheme for ConstantScheme {
178178

179179
let stats = data.float_stats();
180180

181-
if stats.null_count as usize == stats.src.len() || stats.value_count == 0 {
181+
if stats.null_count() as usize == stats.source().len() || stats.value_count() == 0 {
182182
return Ok(0.0);
183183
}
184184

185185
// Can only have 1 distinct value.
186186
if stats.distinct_count().is_some_and(|count| count == 1) {
187-
return Ok(stats.value_count as f64);
187+
return Ok(stats.value_count() as f64);
188188
}
189189

190190
Ok(0.0)
@@ -205,16 +205,19 @@ impl Scheme for ConstantScheme {
205205
match scalar_idx {
206206
Some(idx) => {
207207
let scalar = stats.source().scalar_at(idx)?;
208-
let const_arr = ConstantArray::new(scalar, stats.src.len()).into_array();
208+
let const_arr = ConstantArray::new(scalar, stats.source().len()).into_array();
209209
if !stats.source().all_valid()? {
210-
Ok(MaskedArray::try_new(const_arr, stats.src.validity().clone())?.into_array())
210+
Ok(
211+
MaskedArray::try_new(const_arr, stats.source().validity().clone())?
212+
.into_array(),
213+
)
211214
} else {
212215
Ok(const_arr)
213216
}
214217
}
215218
None => Ok(ConstantArray::new(
216-
Scalar::null(stats.src.dtype().clone()),
217-
stats.src.len(),
219+
Scalar::null(stats.source().dtype().clone()),
220+
stats.source().len(),
218221
)
219222
.into_array()),
220223
}
@@ -250,7 +253,7 @@ impl Scheme for ALPScheme {
250253
return Ok(0.0);
251254
}
252255

253-
estimate_compression_ratio_with_sampling(self, compressor, data, ctx, excludes)
256+
estimate_compression_ratio_with_sampling(self, compressor, data.array(), ctx, excludes)
254257
}
255258

256259
fn compress(
@@ -311,7 +314,7 @@ impl Scheme for ALPRDScheme {
311314
return Ok(0.0);
312315
}
313316

314-
estimate_compression_ratio_with_sampling(self, compressor, data, ctx, excludes)
317+
estimate_compression_ratio_with_sampling(self, compressor, data.array(), ctx, excludes)
315318
}
316319

317320
fn compress(
@@ -365,18 +368,24 @@ impl Scheme for DictScheme {
365368
) -> VortexResult<f64> {
366369
let stats = data.float_stats();
367370

368-
if stats.value_count == 0 {
371+
if stats.value_count() == 0 {
369372
return Ok(0.0);
370373
}
371374

372375
// If the array is high cardinality (>50% unique values), we do not want to compress as a
373376
// dictionary.
374377
if stats
375378
.distinct_count()
376-
.is_some_and(|count| count <= stats.value_count / 2)
379+
.is_some_and(|count| count <= stats.value_count() / 2)
377380
{
378381
// Take a sample and run compression on the sample to determine before/after size.
379-
return estimate_compression_ratio_with_sampling(self, compressor, data, ctx, excludes);
382+
return estimate_compression_ratio_with_sampling(
383+
self,
384+
compressor,
385+
data.array(),
386+
ctx,
387+
excludes,
388+
);
380389
}
381390

382391
Ok(0.0)
@@ -442,14 +451,14 @@ impl Scheme for NullDominated {
442451

443452
let stats = data.float_stats();
444453

445-
if stats.value_count == 0 {
454+
if stats.value_count() == 0 {
446455
// All nulls should use ConstantScheme.
447456
return Ok(0.0);
448457
}
449458

450459
// If the majority is null, will compress well.
451-
if stats.null_count as f64 / stats.src.len() as f64 > 0.9 {
452-
return Ok(stats.src.len() as f64 / stats.value_count as f64);
460+
if stats.null_count() as f64 / stats.source().len() as f64 > 0.9 {
461+
return Ok(stats.source().len() as f64 / stats.value_count() as f64);
453462
}
454463

455464
// Otherwise we don't go this route.
@@ -468,7 +477,7 @@ impl Scheme for NullDominated {
468477
let stats = data.float_stats();
469478

470479
// We pass None as we only run this pathway for NULL-dominated float arrays.
471-
let sparse_encoded = SparseArray::encode(&stats.src.clone().into_array(), None)?;
480+
let sparse_encoded = SparseArray::encode(&stats.source().clone().into_array(), None)?;
472481

473482
if let Some(sparse) = sparse_encoded.as_opt::<Sparse>() {
474483
let indices = sparse.patches().indices().to_primitive().narrow()?;

vortex-btrblocks/src/compressor/integer/dictionary.rs

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,24 +18,30 @@ use super::IntegerStats;
1818

1919
macro_rules! typed_encode {
2020
($stats:ident, $typed:ident, $validity:ident, $typ:ty) => {{
21-
let distinct = $typed.distinct.as_ref().vortex_expect(
21+
let distinct = $typed.distinct().vortex_expect(
2222
"this must be present since `DictScheme` declared that we need distinct values",
2323
);
2424

25-
let values: Buffer<$typ> = distinct.distinct_values.keys().map(|x| x.0).collect();
25+
let values: Buffer<$typ> = distinct.distinct_values().keys().map(|x| x.0).collect();
2626

2727
let max_code = values.len();
2828
let codes = if max_code <= u8::MAX as usize {
29-
let buf =
30-
<DictEncoder as Encode<$typ, u8>>::encode(&values, $stats.src.as_slice::<$typ>());
29+
let buf = <DictEncoder as Encode<$typ, u8>>::encode(
30+
&values,
31+
$stats.source().as_slice::<$typ>(),
32+
);
3133
PrimitiveArray::new(buf, $validity.clone()).into_array()
3234
} else if max_code <= u16::MAX as usize {
33-
let buf =
34-
<DictEncoder as Encode<$typ, u16>>::encode(&values, $stats.src.as_slice::<$typ>());
35+
let buf = <DictEncoder as Encode<$typ, u16>>::encode(
36+
&values,
37+
$stats.source().as_slice::<$typ>(),
38+
);
3539
PrimitiveArray::new(buf, $validity.clone()).into_array()
3640
} else {
37-
let buf =
38-
<DictEncoder as Encode<$typ, u32>>::encode(&values, $stats.src.as_slice::<$typ>());
41+
let buf = <DictEncoder as Encode<$typ, u32>>::encode(
42+
&values,
43+
$stats.source().as_slice::<$typ>(),
44+
);
3945
PrimitiveArray::new(buf, $validity.clone()).into_array()
4046
};
4147

@@ -57,9 +63,9 @@ macro_rules! typed_encode {
5763
)]
5864
pub fn dictionary_encode(stats: &IntegerStats) -> DictArray {
5965
// We need to preserve the nullability somehow from the original
60-
let src_validity = stats.src.validity();
66+
let src_validity = stats.source().validity();
6167

62-
match &stats.erased {
68+
match stats.erased() {
6369
ErasedStats::U8(typed) => typed_encode!(stats, typed, src_validity, u8),
6470
ErasedStats::U16(typed) => typed_encode!(stats, typed, src_validity, u16),
6571
ErasedStats::U32(typed) => typed_encode!(stats, typed, src_validity, u32),

0 commit comments

Comments
 (0)