From eaa3ae4ae03ee3d891d0064b83b5a77bbb368fcc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Thu, 19 Mar 2026 20:56:35 +0100 Subject: [PATCH] Avoid unnecessary buffer zero-fill in Snappy decompression Write directly into spare capacity instead of resize+zero-fill, eliminating unnecessary memset for the decompression output buffer. Co-Authored-By: Claude Opus 4.6 (1M context) --- parquet/src/compression.rs | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/parquet/src/compression.rs b/parquet/src/compression.rs index fe2fb59c5b8c..93e4f42904b9 100644 --- a/parquet/src/compression.rs +++ b/parquet/src/compression.rs @@ -201,7 +201,7 @@ mod snappy_codec { use snap::raw::{Decoder, Encoder, decompress_len, max_compress_len}; use crate::compression::Codec; - use crate::errors::Result; + use crate::errors::{ParquetError, Result}; /// Codec for Snappy compression format. pub struct SnappyCodec { @@ -231,10 +231,23 @@ mod snappy_codec { None => decompress_len(input_buf)?, }; let offset = output_buf.len(); - output_buf.resize(offset + len, 0); - self.decoder - .decompress(input_buf, &mut output_buf[offset..]) - .map_err(|e| e.into()) + output_buf.reserve(len); + // SAFETY: we pass the spare capacity to snappy which will write exactly + // `len` bytes on success. The `set_len` below is only reached when + // decompression succeeds. `MaybeUninit` has the same layout as `u8`. + let spare = output_buf.spare_capacity_mut(); + let spare_bytes = unsafe { + std::slice::from_raw_parts_mut(spare.as_mut_ptr().cast::(), spare.len()) + }; + let n = self + .decoder + .decompress(input_buf, &mut spare_bytes[..len]) + .map_err(|e| -> ParquetError { e.into() })?; + // SAFETY: snappy wrote exactly `n` bytes into the spare capacity + unsafe { + output_buf.set_len(offset + n); + } + Ok(n) } fn compress(&mut self, input_buf: &[u8], output_buf: &mut Vec) -> Result<()> {