flxzt · anesthetice · Aug 7, 2024 · Aug 7, 2024 · Aug 8, 2024 · Aug 8, 2024
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -86,6 +86,7 @@ winresource = "0.1.17"
 xmlwriter = "0.1.0"
 # Enabling feature > v20_9 causes linker errors on mingw
 poppler-rs = { version = "0.23.0", features = ["v20_9"] }
+zstd = { version =  "0.13", features = ["zstdmt"] }
 
 [patch.crates-io]
 # once a new piet (current v0.6.2) is released with updated cairo and kurbo deps, this can be removed.

diff --git a/crates/rnote-engine/Cargo.toml b/crates/rnote-engine/Cargo.toml
@@ -55,6 +55,7 @@ tracing = { workspace = true }
 unicode-segmentation = { workspace = true }
 usvg = { workspace = true }
 xmlwriter = { workspace = true }
+zstd = { workspace = true, features = ["zstdmt"] }
 # the long-term plan is to remove the gtk4 dependency entirely after switching to another renderer.
 gtk4 = { workspace = true, optional = true }
 

diff --git a/crates/rnote-engine/src/fileformats/rnoteformat/mod.rs b/crates/rnote-engine/src/fileformats/rnoteformat/mod.rs
@@ -21,13 +21,6 @@ use anyhow::Context;
 use serde::{Deserialize, Serialize};
 use std::io::{Read, Write};
 
-/// Compress bytes with gzip.
-fn compress_to_gzip(to_compress: &[u8]) -> Result<Vec<u8>, anyhow::Error> {
-    let mut encoder = flate2::write::GzEncoder::new(Vec::<u8>::new(), flate2::Compression::new(5));
-    encoder.write_all(to_compress)?;
-    Ok(encoder.finish()?)
-}
-
 /// Decompress from gzip.
 fn decompress_from_gzip(compressed: &[u8]) -> Result<Vec<u8>, anyhow::Error> {
     // Optimization for the gzip format, defined by RFC 1952
@@ -42,10 +35,10 @@ fn decompress_from_gzip(compressed: &[u8]) -> Result<Vec<u8>, anyhow::Error> {
             .len()
             .checked_sub(4)
             // only happens if the file has less than 4 bytes
-            .ok_or_else(|| {
-                anyhow::anyhow!("Invalid file")
-                    .context("Failed to get the size of the decompressed data")
-            })?;
+            .ok_or(
+                anyhow::anyhow!("Not a valid gzip-compressed file")
+                    .context("Failed to get the size of the decompressed data"),
+            )?;
         decompressed_size.copy_from_slice(&compressed[idx_start..]);
         // u32 -> usize to avoid issues on 32-bit architectures
         // also more reasonable since the uncompressed size is given by 4 bytes
@@ -57,6 +50,97 @@ fn decompress_from_gzip(compressed: &[u8]) -> Result<Vec<u8>, anyhow::Error> {
     Ok(bytes)
 }
 
+/// Decompress bytes with zstd
+pub fn decompress_from_zstd(compressed: &[u8]) -> Result<Vec<u8>, anyhow::Error> {
+    // Optimization for the zstd format, less pretty than for gzip but this does shave off a bit of time
+    // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame_header
+    let mut bytes: Vec<u8> = {
+        let frame_header_descriptor = compressed.get(4).ok_or(
+            anyhow::anyhow!("Not a valid zstd-compressed file")
+                .context("Failed to get the frame header descriptor of the file"),
+        )?;
+
+        let frame_content_size_flag = frame_header_descriptor >> 6;
+        let single_segment_flag = (frame_header_descriptor >> 5) & 1;
+        let did_field_size = {
+            let dictionary_id_flag = frame_header_descriptor & 11;
+            if dictionary_id_flag == 3 {
+                4
+            } else {
+                dictionary_id_flag
+            }
+        };
+        // frame header size start index
+        let fcs_sidx = (6 + did_field_size - single_segment_flag) as usize;
+        // magic number: 4 bytes + window descriptor: 1 byte if single segment flag is not set + frame header descriptor: 1 byte + dict. field size: 0-4 bytes
+        // testing suggests that dicts. don't improve the compression ratio and worsen writing/reading speeds, therefore they won't be used
+        // thus this part could be simplified, but wouldn't strictly adhere to zstd standards
+
+        match frame_content_size_flag {
+            // not worth it to potentially pre-allocate a maximum of 255 bytes
+            0 => Vec::new(),
+            1 => {
+                let mut decompressed_size: [u8; 2] = [0; 2];
+                decompressed_size.copy_from_slice(
+                    compressed.get(fcs_sidx..fcs_sidx + 2).ok_or(
+                        anyhow::anyhow!("Not a valid zstd-compressed file").context(
+                            "Failed to get the uncompressed size of the data from two bytes",
+                        ),
+                    )?,
+                );
+                // 256 offset
+                Vec::with_capacity(usize::from(256 + u16::from_le_bytes(decompressed_size)))
+            }
+            2 => {
+                let mut decompressed_size: [u8; 4] = [0; 4];
+                decompressed_size.copy_from_slice(
+                    compressed.get(fcs_sidx..fcs_sidx + 4).ok_or(
+                        anyhow::anyhow!("Not a valid zstd-compressed file").context(
+                            "Failed to get the uncompressed size of the data from four bytes",
+                        ),
+                    )?,
+                );
+                Vec::with_capacity(
+                    u32::from_le_bytes(decompressed_size)
+                        .try_into()
+                        .unwrap_or(usize::MAX),
+                )
+            }
+            // in practice this should not happen, as a rnote file being larger than 4 GiB is very unlikely
+            3 => {
+                let mut decompressed_size: [u8; 8] = [0; 8];
+                decompressed_size.copy_from_slice(compressed.get(fcs_sidx..fcs_sidx + 8).ok_or(
+                    anyhow::anyhow!("Not a valid zstd-compressed file").context(
+                        "Failed to get the uncompressed size of the data from eight bytes",
+                    ),
+                )?);
+                Vec::with_capacity(
+                    u64::from_le_bytes(decompressed_size)
+                        .try_into()
+                        .unwrap_or(usize::MAX),
+                )
+            }
+            // unreachable since our u8 is formed by only 2 bits
+            4.. => unreachable!(),
+        }
+    };
+    let mut decoder = zstd::Decoder::new(compressed)?;
+    decoder.read_to_end(&mut bytes)?;
+    Ok(bytes)
+}
+
+/// Compress bytes with zstd
+pub fn compress_to_zstd(to_compress: &[u8]) -> Result<Vec<u8>, anyhow::Error> {
+    let mut encoder = zstd::Encoder::new(Vec::<u8>::new(), 9)?;
+    encoder.set_pledged_src_size(Some(to_compress.len() as u64))?;
+    encoder.include_contentsize(true)?;
+    if let Ok(num_workers) = std::thread::available_parallelism() {
+        encoder.multithread(num_workers.get() as u32)?;
+    }
+    encoder.write_all(to_compress)?;
+    Ok(encoder.finish()?)
+}
+
 /// The rnote file wrapper.
 ///
 /// Used to extract and match the version up front, before deserializing the data.
@@ -80,9 +164,20 @@ impl RnoteFile {
 
 impl FileFormatLoader for RnoteFile {
     fn load_from_bytes(bytes: &[u8]) -> anyhow::Result<Self> {
-        let wrapper = serde_json::from_slice::<RnotefileWrapper>(
-            &decompress_from_gzip(bytes).context("decompressing bytes failed.")?,
-        )
+        let wrapper = serde_json::from_slice::<RnotefileWrapper>(&{
+            // zstd magic number
+            if bytes.starts_with(&[0x28, 0xb5, 0x2f, 0xfd]) {
+                decompress_from_zstd(bytes)?
+            }
+            // gzip ID1 and ID2
+            else if bytes.starts_with(&[0x1f, 0x8b]) {
+                decompress_from_gzip(bytes)?
+            } else {
+                Err(anyhow::anyhow!(
+                    "Unknown compression format, expected zstd or gzip"
+                ))?
+            }
+        })
         .context("deserializing RnotefileWrapper from bytes failed.")?;
 
         // Conversions for older file format versions happen here
@@ -134,7 +229,7 @@ impl FileFormatSaver for RnoteFile {
             version: semver::Version::parse(Self::SEMVER).unwrap(),
             data: ijson::to_value(self).context("converting RnoteFile to JSON value failed.")?,
         };
-        let compressed = compress_to_gzip(
+        let compressed = compress_to_zstd(
             &serde_json::to_vec(&wrapper).context("Serializing RnoteFileWrapper failed.")?,
         )
         .context("compressing bytes failed.")?;