From 689aaa034d8383c0ddfc9c92bff432c8803699b0 Mon Sep 17 00:00:00 2001 From: anesthetice <118751106+anesthetice@users.noreply.github.com> Date: Wed, 7 Aug 2024 23:06:24 +0200 Subject: [PATCH 1/4] zstd comp + decomp functions --- crates/rnote-engine/Cargo.toml | 1 + .../src/fileformats/rnoteformat/mod.rs | 98 ++++++++++++++++++- 2 files changed, 95 insertions(+), 4 deletions(-) diff --git a/crates/rnote-engine/Cargo.toml b/crates/rnote-engine/Cargo.toml index c8add5331c..f52e88a73c 100644 --- a/crates/rnote-engine/Cargo.toml +++ b/crates/rnote-engine/Cargo.toml @@ -55,6 +55,7 @@ tracing = { workspace = true } unicode-segmentation = { workspace = true } usvg = { workspace = true } xmlwriter = { workspace = true } +zstd = { workspace = true, features = ["zstdmt"] } # the long-term plan is to remove the gtk4 dependency entirely after switching to another renderer. gtk4 = { workspace = true, optional = true } diff --git a/crates/rnote-engine/src/fileformats/rnoteformat/mod.rs b/crates/rnote-engine/src/fileformats/rnoteformat/mod.rs index 9826a02bc4..548bf94270 100644 --- a/crates/rnote-engine/src/fileformats/rnoteformat/mod.rs +++ b/crates/rnote-engine/src/fileformats/rnoteformat/mod.rs @@ -42,10 +42,10 @@ fn decompress_from_gzip(compressed: &[u8]) -> Result, anyhow::Error> { .len() .checked_sub(4) // only happens if the file has less than 4 bytes - .ok_or_else(|| { - anyhow::anyhow!("Invalid file") - .context("Failed to get the size of the decompressed data") - })?; + .ok_or( + anyhow::anyhow!("Not a valid gzip-compressed file") + .context("Failed to get the size of the decompressed data"), + )?; decompressed_size.copy_from_slice(&compressed[idx_start..]); // u32 -> usize to avoid issues on 32-bit architectures // also more reasonable since the uncompressed size is given by 4 bytes @@ -57,6 +57,96 @@ fn decompress_from_gzip(compressed: &[u8]) -> Result, anyhow::Error> { Ok(bytes) } +/// Decompress bytes with zstd +pub fn decompress_from_zstd(compressed: &[u8]) -> Result, anyhow::Error> { + // Optimization for the zstd format, less pretty than for gzip but this does shave off a bit of time + // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame_header + let mut bytes: Vec = { + let frame_header_descriptor = compressed.get(4).ok_or( + anyhow::anyhow!("Not a valid zstd-compressed file") + .context("Failed to get the frame header descriptor of the file"), + )?; + + let frame_content_size_flag = frame_header_descriptor >> 6; + let single_segment_flag = (frame_header_descriptor >> 5) & 1; + let did_field_size = { + let dictionary_id_flag = frame_header_descriptor & 11; + if dictionary_id_flag == 3 { + 4 + } else { + dictionary_id_flag + } + }; + // frame header size start index + let fcs_sidx = (6 + did_field_size - single_segment_flag) as usize; + // magic number: 4 bytes + window descriptor: 1 byte if single segment flag is not set + frame header descriptor: 1 byte + dict. field size: 0-4 bytes + // testing suggests that dicts. don't improve the compression ratio and worsen writing/reading speeds, therefore they won't be used + // thus this part could be simplified, but wouldn't strictly adhere to zstd standards + + match frame_content_size_flag { + // not worth it to potentially pre-allocate a maximum of 255 bytes + 0 => Vec::new(), + 1 => { + let mut decompressed_size: [u8; 2] = [0; 2]; + decompressed_size.copy_from_slice( + compressed.get(fcs_sidx..fcs_sidx + 2).ok_or( + anyhow::anyhow!("Not a valid zstd-compressed file").context( + "Failed to get the uncompressed size of the data from two bytes", + ), + )?, + ); + Vec::with_capacity(usize::from(256 + u16::from_le_bytes(decompressed_size))) + } + 2 => { + let mut decompressed_size: [u8; 4] = [0; 4]; + decompressed_size.copy_from_slice( + compressed.get(fcs_sidx..fcs_sidx + 4).ok_or( + anyhow::anyhow!("Not a valid zstd-compressed file").context( + "Failed to get the uncompressed size of the data from four bytes", + ), + )?, + ); + Vec::with_capacity( + u32::from_le_bytes(decompressed_size) + .try_into() + .unwrap_or(usize::MAX), + ) + } + // in practice this should not happen, as a rnote file being larger than 4 GiB is very unlikely + 3 => { + let mut decompressed_size: [u8; 8] = [0; 8]; + decompressed_size.copy_from_slice(compressed.get(fcs_sidx..fcs_sidx + 8).ok_or( + anyhow::anyhow!("Not a valid zstd-compressed file").context( + "Failed to get the uncompressed size of the data from eight bytes", + ), + )?); + Vec::with_capacity( + u64::from_le_bytes(decompressed_size) + .try_into() + .unwrap_or(usize::MAX), + ) + } + // unreachable since our u8 is formed by only 2 bits + 4.. => unreachable!(), + } + }; + let mut decoder = zstd::Decoder::new(compressed)?; + decoder.read_to_end(&mut bytes)?; + Ok(bytes) +} + +/// Compress bytes with zstd +pub fn compress_to_zstd(to_compress: &[u8]) -> Result, anyhow::Error> { + let mut encoder = zstd::Encoder::new(Vec::::new(), 9)?; + encoder.set_pledged_src_size(Some(to_compress.len() as u64))?; + encoder.include_contentsize(true)?; + if let Ok(num_workers) = std::thread::available_parallelism() { + encoder.multithread(num_workers.get() as u32)?; + } + encoder.write_all(to_compress)?; + Ok(encoder.finish()?) +} + /// The rnote file wrapper. /// /// Used to extract and match the version up front, before deserializing the data. From 1b64bfb0dd630d07f2938f008cf389e0a4982e3e Mon Sep 17 00:00:00 2001 From: anesthetice <118751106+anesthetice@users.noreply.github.com> Date: Wed, 7 Aug 2024 23:19:42 +0200 Subject: [PATCH 2/4] deps that I forgot to commit --- Cargo.lock | 29 +++++++++++++++++++++++++++++ Cargo.toml | 1 + 2 files changed, 30 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index b38a33f7f1..1c4329a54d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3404,6 +3404,7 @@ dependencies = [ "unicode-segmentation", "usvg", "xmlwriter", + "zstd", ] [[package]] @@ -4877,6 +4878,34 @@ version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" +[[package]] +name = "zstd" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcf2b778a664581e31e389454a7072dab1647606d44f7feea22cd5abb9c9f3f9" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54a3ab4db68cea366acc5c897c7b4d4d1b8994a9cd6e6f841f8964566a419059" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.13+zstd.1.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38ff0f21cfee8f97d94cef41359e0c89aa6113028ab0291aa8ca0038995a95aa" +dependencies = [ + "cc", + "pkg-config", +] + [[package]] name = "zune-core" version = "0.4.12" diff --git a/Cargo.toml b/Cargo.toml index 6a6aa16a7c..2dd154d581 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -86,6 +86,7 @@ winresource = "0.1.17" xmlwriter = "0.1.0" # Enabling feature > v20_9 causes linker errors on mingw poppler-rs = { version = "0.23.0", features = ["v20_9"] } +zstd = { version = "0.13", features = ["zstdmt"] } [patch.crates-io] # once a new piet (current v0.6.2) is released with updated cairo and kurbo deps, this can be removed. From 11fb351078ac576923910d514463c98b0c332ba5 Mon Sep 17 00:00:00 2001 From: anesthetice <118751106+anesthetice@users.noreply.github.com> Date: Thu, 8 Aug 2024 15:30:48 +0200 Subject: [PATCH 3/4] simpler than I thought + backwards compatible --- .../src/fileformats/rnoteformat/mod.rs | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/crates/rnote-engine/src/fileformats/rnoteformat/mod.rs b/crates/rnote-engine/src/fileformats/rnoteformat/mod.rs index 548bf94270..d73a339a71 100644 --- a/crates/rnote-engine/src/fileformats/rnoteformat/mod.rs +++ b/crates/rnote-engine/src/fileformats/rnoteformat/mod.rs @@ -170,9 +170,22 @@ impl RnoteFile { impl FileFormatLoader for RnoteFile { fn load_from_bytes(bytes: &[u8]) -> anyhow::Result { - let wrapper = serde_json::from_slice::( - &decompress_from_gzip(bytes).context("decompressing bytes failed.")?, - ) + let wrapper = serde_json::from_slice::(&{ + // zstd magic number + if bytes.starts_with(&[0x28, 0xb5, 0x2f, 0xfd]) { + tracing::info!("using zstd to decompress"); + decompress_from_zstd(bytes)? + } + // gzip ID1 and ID2 + else if bytes.starts_with(&[0x1f, 0x8b]) { + tracing::info!("using gzip to decompress"); + decompress_from_gzip(bytes)? + } else { + Err(anyhow::anyhow!( + "Unknown compression format, expected zstd or gzip" + ))? + } + }) .context("deserializing RnotefileWrapper from bytes failed.")?; // Conversions for older file format versions happen here @@ -224,7 +237,7 @@ impl FileFormatSaver for RnoteFile { version: semver::Version::parse(Self::SEMVER).unwrap(), data: ijson::to_value(self).context("converting RnoteFile to JSON value failed.")?, }; - let compressed = compress_to_gzip( + let compressed = compress_to_zstd( &serde_json::to_vec(&wrapper).context("Serializing RnoteFileWrapper failed.")?, ) .context("compressing bytes failed.")?; From a5ae6b946ffabb32e2b49acceba1ae13794e1944 Mon Sep 17 00:00:00 2001 From: anesthetice <118751106+anesthetice@users.noreply.github.com> Date: Thu, 8 Aug 2024 18:49:30 +0200 Subject: [PATCH 4/4] removed tracing code and compress_to_gzip --- crates/rnote-engine/src/fileformats/rnoteformat/mod.rs | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/crates/rnote-engine/src/fileformats/rnoteformat/mod.rs b/crates/rnote-engine/src/fileformats/rnoteformat/mod.rs index d73a339a71..9338d032d5 100644 --- a/crates/rnote-engine/src/fileformats/rnoteformat/mod.rs +++ b/crates/rnote-engine/src/fileformats/rnoteformat/mod.rs @@ -21,13 +21,6 @@ use anyhow::Context; use serde::{Deserialize, Serialize}; use std::io::{Read, Write}; -/// Compress bytes with gzip. -fn compress_to_gzip(to_compress: &[u8]) -> Result, anyhow::Error> { - let mut encoder = flate2::write::GzEncoder::new(Vec::::new(), flate2::Compression::new(5)); - encoder.write_all(to_compress)?; - Ok(encoder.finish()?) -} - /// Decompress from gzip. fn decompress_from_gzip(compressed: &[u8]) -> Result, anyhow::Error> { // Optimization for the gzip format, defined by RFC 1952 @@ -95,6 +88,7 @@ pub fn decompress_from_zstd(compressed: &[u8]) -> Result, anyhow::Error> ), )?, ); + // 256 offset Vec::with_capacity(usize::from(256 + u16::from_le_bytes(decompressed_size))) } 2 => { @@ -173,12 +167,10 @@ impl FileFormatLoader for RnoteFile { let wrapper = serde_json::from_slice::(&{ // zstd magic number if bytes.starts_with(&[0x28, 0xb5, 0x2f, 0xfd]) { - tracing::info!("using zstd to decompress"); decompress_from_zstd(bytes)? } // gzip ID1 and ID2 else if bytes.starts_with(&[0x1f, 0x8b]) { - tracing::info!("using gzip to decompress"); decompress_from_gzip(bytes)? } else { Err(anyhow::anyhow!(