Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ winresource = "0.1.17"
xmlwriter = "0.1.0"
# Enabling feature > v20_9 causes linker errors on mingw
poppler-rs = { version = "0.23.0", features = ["v20_9"] }
zstd = { version = "0.13", features = ["zstdmt"] }

[patch.crates-io]
# once a new piet (current v0.6.2) is released with updated cairo and kurbo deps, this can be removed.
Expand Down
1 change: 1 addition & 0 deletions crates/rnote-engine/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ tracing = { workspace = true }
unicode-segmentation = { workspace = true }
usvg = { workspace = true }
xmlwriter = { workspace = true }
zstd = { workspace = true, features = ["zstdmt"] }
# the long-term plan is to remove the gtk4 dependency entirely after switching to another renderer.
gtk4 = { workspace = true, optional = true }

Expand Down
125 changes: 110 additions & 15 deletions crates/rnote-engine/src/fileformats/rnoteformat/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,6 @@ use anyhow::Context;
use serde::{Deserialize, Serialize};
use std::io::{Read, Write};

/// Compress bytes with gzip.
fn compress_to_gzip(to_compress: &[u8]) -> Result<Vec<u8>, anyhow::Error> {
let mut encoder = flate2::write::GzEncoder::new(Vec::<u8>::new(), flate2::Compression::new(5));
encoder.write_all(to_compress)?;
Ok(encoder.finish()?)
}

/// Decompress from gzip.
fn decompress_from_gzip(compressed: &[u8]) -> Result<Vec<u8>, anyhow::Error> {
// Optimization for the gzip format, defined by RFC 1952
Expand All @@ -42,10 +35,10 @@ fn decompress_from_gzip(compressed: &[u8]) -> Result<Vec<u8>, anyhow::Error> {
.len()
.checked_sub(4)
// only happens if the file has less than 4 bytes
.ok_or_else(|| {
anyhow::anyhow!("Invalid file")
.context("Failed to get the size of the decompressed data")
})?;
.ok_or(
anyhow::anyhow!("Not a valid gzip-compressed file")
.context("Failed to get the size of the decompressed data"),
)?;
decompressed_size.copy_from_slice(&compressed[idx_start..]);
// u32 -> usize to avoid issues on 32-bit architectures
// also more reasonable since the uncompressed size is given by 4 bytes
Expand All @@ -57,6 +50,97 @@ fn decompress_from_gzip(compressed: &[u8]) -> Result<Vec<u8>, anyhow::Error> {
Ok(bytes)
}

/// Decompress bytes with zstd
pub fn decompress_from_zstd(compressed: &[u8]) -> Result<Vec<u8>, anyhow::Error> {
// Optimization for the zstd format, less pretty than for gzip but this does shave off a bit of time
// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame_header
let mut bytes: Vec<u8> = {
let frame_header_descriptor = compressed.get(4).ok_or(
anyhow::anyhow!("Not a valid zstd-compressed file")
.context("Failed to get the frame header descriptor of the file"),
)?;

let frame_content_size_flag = frame_header_descriptor >> 6;
let single_segment_flag = (frame_header_descriptor >> 5) & 1;
let did_field_size = {
let dictionary_id_flag = frame_header_descriptor & 11;
if dictionary_id_flag == 3 {
4
} else {
dictionary_id_flag
}
};
// frame header size start index
let fcs_sidx = (6 + did_field_size - single_segment_flag) as usize;
// magic number: 4 bytes + window descriptor: 1 byte if single segment flag is not set + frame header descriptor: 1 byte + dict. field size: 0-4 bytes
// testing suggests that dicts. don't improve the compression ratio and worsen writing/reading speeds, therefore they won't be used
// thus this part could be simplified, but wouldn't strictly adhere to zstd standards

match frame_content_size_flag {
// not worth it to potentially pre-allocate a maximum of 255 bytes
0 => Vec::new(),
1 => {
let mut decompressed_size: [u8; 2] = [0; 2];
decompressed_size.copy_from_slice(
compressed.get(fcs_sidx..fcs_sidx + 2).ok_or(
anyhow::anyhow!("Not a valid zstd-compressed file").context(
"Failed to get the uncompressed size of the data from two bytes",
),
)?,
);
// 256 offset
Vec::with_capacity(usize::from(256 + u16::from_le_bytes(decompressed_size)))
}
2 => {
let mut decompressed_size: [u8; 4] = [0; 4];
decompressed_size.copy_from_slice(
compressed.get(fcs_sidx..fcs_sidx + 4).ok_or(
anyhow::anyhow!("Not a valid zstd-compressed file").context(
"Failed to get the uncompressed size of the data from four bytes",
),
)?,
);
Vec::with_capacity(
u32::from_le_bytes(decompressed_size)
.try_into()
.unwrap_or(usize::MAX),
)
}
// in practice this should not happen, as a rnote file being larger than 4 GiB is very unlikely
3 => {
let mut decompressed_size: [u8; 8] = [0; 8];
decompressed_size.copy_from_slice(compressed.get(fcs_sidx..fcs_sidx + 8).ok_or(
anyhow::anyhow!("Not a valid zstd-compressed file").context(
"Failed to get the uncompressed size of the data from eight bytes",
),
)?);
Vec::with_capacity(
u64::from_le_bytes(decompressed_size)
.try_into()
.unwrap_or(usize::MAX),
)
}
// unreachable since our u8 is formed by only 2 bits
4.. => unreachable!(),
}
};
let mut decoder = zstd::Decoder::new(compressed)?;
decoder.read_to_end(&mut bytes)?;
Ok(bytes)
}

/// Compress bytes with zstd
pub fn compress_to_zstd(to_compress: &[u8]) -> Result<Vec<u8>, anyhow::Error> {
let mut encoder = zstd::Encoder::new(Vec::<u8>::new(), 9)?;
encoder.set_pledged_src_size(Some(to_compress.len() as u64))?;
encoder.include_contentsize(true)?;
if let Ok(num_workers) = std::thread::available_parallelism() {
encoder.multithread(num_workers.get() as u32)?;
}
encoder.write_all(to_compress)?;
Ok(encoder.finish()?)
}

/// The rnote file wrapper.
///
/// Used to extract and match the version up front, before deserializing the data.
Expand All @@ -80,9 +164,20 @@ impl RnoteFile {

impl FileFormatLoader for RnoteFile {
fn load_from_bytes(bytes: &[u8]) -> anyhow::Result<Self> {
let wrapper = serde_json::from_slice::<RnotefileWrapper>(
&decompress_from_gzip(bytes).context("decompressing bytes failed.")?,
)
let wrapper = serde_json::from_slice::<RnotefileWrapper>(&{
// zstd magic number
if bytes.starts_with(&[0x28, 0xb5, 0x2f, 0xfd]) {
decompress_from_zstd(bytes)?
}
// gzip ID1 and ID2
else if bytes.starts_with(&[0x1f, 0x8b]) {
decompress_from_gzip(bytes)?
} else {
Err(anyhow::anyhow!(
"Unknown compression format, expected zstd or gzip"
))?
}
})
.context("deserializing RnotefileWrapper from bytes failed.")?;

// Conversions for older file format versions happen here
Expand Down Expand Up @@ -134,7 +229,7 @@ impl FileFormatSaver for RnoteFile {
version: semver::Version::parse(Self::SEMVER).unwrap(),
data: ijson::to_value(self).context("converting RnoteFile to JSON value failed.")?,
};
let compressed = compress_to_gzip(
let compressed = compress_to_zstd(
&serde_json::to_vec(&wrapper).context("Serializing RnoteFileWrapper failed.")?,
)
.context("compressing bytes failed.")?;
Expand Down