From 20436da7c57f976d3e1c7bed556a0ed3d44f8a7d Mon Sep 17 00:00:00 2001 From: hellobertrand <5901952+hellobertrand@users.noreply.github.com> Date: Sat, 28 Feb 2026 11:09:25 +0100 Subject: [PATCH 1/3] Adopts dual-scale encoding for chunk size Implements a "Dual Scale" mechanism for the chunk size code in the file header. This uses the most significant bit to select between a 4 KB or 64 KB multiplier, significantly expanding the range of supported block sizes up to approximately 8 MB. Explicitly defines a base value of zero (bits 0-6) to map to the minimum block size of 4 KB, improving control over small blocks. This enhancement improves flexibility for various use cases and data scales. Updates documentation to reflect the new encoding scheme and its interpretation. --- docs/FORMAT.md | 9 ++++++--- docs/WHITEPAPER.md | 9 ++++++--- src/lib/zxc_common.c | 20 ++++++++++++++++---- 3 files changed, 28 insertions(+), 10 deletions(-) diff --git a/docs/FORMAT.md b/docs/FORMAT.md index e00257ce..0a0a1b4d 100644 --- a/docs/FORMAT.md +++ b/docs/FORMAT.md @@ -54,8 +54,11 @@ Offset Size Field - **Magic Word** (`u32`): `0x9CB02EF5`. - **Format Version** (`u8`): currently `5`. - **Chunk Size Code** (`u8`): - - `0` means default legacy value = 64 units. - - otherwise actual chunk size = `code * 4096` bytes. + - Uses a "Dual Scale" flag on the MSB (Bit 7). + - Bits 0..6 encode the base value $V$ (1 to 127). If $0$, it defaults to $1$ (4 KB). + - If Bit 7 is `0`, multiplier is 4 KB ($Multi = 4096$). + - If Bit 7 is `1`, multiplier is 64 KB ($Multi = 65536$). + - Actual block size is computed as `V * Multi` bytes. - **Flags** (`u8`): - Bit 7 (`0x80`): `HAS_CHECKSUM`. - Bits 0..3: checksum algorithm id (`0` = RapidHash-based folding). @@ -430,7 +433,7 @@ F5 2E B0 9C | 05 | 40 | 80 | 00 00 00 00 00 00 00 | 26 2E - `F5 2E B0 9C` → magic word (LE) = `0x9CB02EF5`. - `05` → format version 5. -- `40` → chunk-size code 64 (`64 * 4096 = 262144` bytes, i.e. 256 KiB). +- `40` → chunk-size code 64 (Bit 7=0, V=64) (`64 * 4096 = 262144` bytes, i.e. 256 KiB). - `80` → checksum enabled (`HAS_CHECKSUM=1`, algo id 0). - next 7 bytes are reserved zeros. - `26 2E` → header CRC16. diff --git a/docs/WHITEPAPER.md b/docs/WHITEPAPER.md index f546f99a..f913987f 100644 --- a/docs/WHITEPAPER.md +++ b/docs/WHITEPAPER.md @@ -103,9 +103,12 @@ The file begins with a **16-byte** header that identifies the format and specifi * **Magic Word (4 bytes)**: `0x9 0xCB 0x02E 0xF5`. * **Version (1 byte)**: Current version is `5`. -* **Chunk Size Code (1 byte)**: Defines the processing block size: - - `0` = Default mode (256 KB, for backward compatibility) - - `N` = Chunk size is `N × 4096` bytes (e.g., `62` = 248 KB) +* **Chunk Size Code (1 byte)**: Defines the processing block size using a "Dual Scale" flag: + - **Bit 7 (MSB)**: Multiplier scale. `0` = 4 KB multiplier, `1` = 64 KB multiplier. + - **Bits 0-6**: Base value `V` (from 1 to 127). `0` defaults to `1` (minimum block size is 4 KB). + - *Example 1 (Fine)*: `16` (MSB=0, V=16) → 16 × 4 KB = 64 KB. + - *Example 2 (Large)*: `132` (MSB=1, V=4) → 4 × 64 KB = 256 KB. + - *Example 3 (Max)*: `255` (MSB=1, V=127) → 127 × 64 KB = ~8.1 MB. * **Flags (1 byte)**: Global configuration flags. - **Bit 7 (MSB)**: `HAS_CHECKSUM`. If `1`, checksums are enabled for the stream. Every block will carry a trailing 4-byte checksum, and the footer will contain a global checksum. If `0`, no checksums are present. - **Bits 4-6**: Reserved. diff --git a/src/lib/zxc_common.c b/src/lib/zxc_common.c index ed1ab1f1..8cff88db 100644 --- a/src/lib/zxc_common.c +++ b/src/lib/zxc_common.c @@ -181,10 +181,16 @@ int zxc_write_file_header(uint8_t* RESTRICT dst, const size_t dst_capacity, zxc_store_le32(dst, ZXC_MAGIC_WORD); dst[4] = ZXC_FILE_FORMAT_VERSION; - dst[5] = (uint8_t)(ZXC_BLOCK_SIZE / ZXC_BLOCK_UNIT); + + // Dual-scale chunk size encoding + const uint32_t units = (uint32_t)(ZXC_BLOCK_SIZE / ZXC_BLOCK_UNIT); + const uint32_t is_large = (units > 127); + dst[5] = (uint8_t)((is_large << 7) | (units >> (is_large << 2))); + + // Flags are at offset 6 dst[6] = has_checksum ? (ZXC_FILE_FLAG_HAS_CHECKSUM | ZXC_CHECKSUM_RAPIDHASH) : 0; - // Bytes 7-13: Reserved (must be 0) + // Bytes 7-13: Reserved (must be 0, 7 bytes) ZXC_MEMSET(dst + 7, 0, 7); // Bytes 14-15: CRC (16-bit) @@ -221,9 +227,15 @@ int zxc_read_file_header(const uint8_t* RESTRICT src, const size_t src_size, if (UNLIKELY(zxc_le16(src + 14) != zxc_hash16(temp))) return ZXC_ERROR_BAD_HEADER; if (out_block_size) { - const size_t units = src[5] ? src[5] : 64; // Default to 64 block units (256KB) - *out_block_size = units * ZXC_BLOCK_UNIT; + // Read Dual-Scale Chunk Size Code + const uint8_t code = src[5]; + const size_t scale = (code & 0x80) ? (16 * ZXC_BLOCK_UNIT) : ZXC_BLOCK_UNIT; + size_t value = code & 0x7F; + if (UNLIKELY(value == 0)) value = 1; + + *out_block_size = value * scale; } + // Flags are at offset 6 if (out_has_checksum) *out_has_checksum = (src[6] & ZXC_FILE_FLAG_HAS_CHECKSUM) ? 1 : 0; return ZXC_OK; From dfef9a0bf3bff58c9acaf458071e5a5a0b9ff021 Mon Sep 17 00:00:00 2001 From: hellobertrand <5901952+hellobertrand@users.noreply.github.com> Date: Sat, 28 Feb 2026 15:00:27 +0100 Subject: [PATCH 2/3] Clarifies chunk size code details Refines the documentation for the chunk size code, making it more concise and easier to understand. Replaces verbose calculation examples with direct summaries of the effective block size ranges for both fine and large scales in the whitepaper. Also standardizes variable notation in the format documentation. --- docs/FORMAT.md | 6 +++--- docs/WHITEPAPER.md | 5 ++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/docs/FORMAT.md b/docs/FORMAT.md index 0a0a1b4d..70933301 100644 --- a/docs/FORMAT.md +++ b/docs/FORMAT.md @@ -55,9 +55,9 @@ Offset Size Field - **Format Version** (`u8`): currently `5`. - **Chunk Size Code** (`u8`): - Uses a "Dual Scale" flag on the MSB (Bit 7). - - Bits 0..6 encode the base value $V$ (1 to 127). If $0$, it defaults to $1$ (4 KB). - - If Bit 7 is `0`, multiplier is 4 KB ($Multi = 4096$). - - If Bit 7 is `1`, multiplier is 64 KB ($Multi = 65536$). + - Bits 0..6 encode the base value `V` (1 to 127). If `0`, it defaults to `1` (4 KB). + - If Bit 7 is `0`, multiplier is 4 KB (`Multi = 4096`). + - If Bit 7 is `1`, multiplier is 64 KB (`Multi = 65536`). - Actual block size is computed as `V * Multi` bytes. - **Flags** (`u8`): - Bit 7 (`0x80`): `HAS_CHECKSUM`. diff --git a/docs/WHITEPAPER.md b/docs/WHITEPAPER.md index f913987f..4bab8619 100644 --- a/docs/WHITEPAPER.md +++ b/docs/WHITEPAPER.md @@ -106,9 +106,8 @@ The file begins with a **16-byte** header that identifies the format and specifi * **Chunk Size Code (1 byte)**: Defines the processing block size using a "Dual Scale" flag: - **Bit 7 (MSB)**: Multiplier scale. `0` = 4 KB multiplier, `1` = 64 KB multiplier. - **Bits 0-6**: Base value `V` (from 1 to 127). `0` defaults to `1` (minimum block size is 4 KB). - - *Example 1 (Fine)*: `16` (MSB=0, V=16) → 16 × 4 KB = 64 KB. - - *Example 2 (Large)*: `132` (MSB=1, V=4) → 4 × 64 KB = 256 KB. - - *Example 3 (Max)*: `255` (MSB=1, V=127) → 127 × 64 KB = ~8.1 MB. + - **MSB=0 (Fine Scale)**: Range from `4 KB` (V=1) to `508 KB` (V=127). + - **MSB=1 (Large Scale)**: Range from `64 KB` (V=1) to `8128 KB` (V=127). * **Flags (1 byte)**: Global configuration flags. - **Bit 7 (MSB)**: `HAS_CHECKSUM`. If `1`, checksums are enabled for the stream. Every block will carry a trailing 4-byte checksum, and the footer will contain a global checksum. If `0`, no checksums are present. - **Bits 4-6**: Reserved. From 7cf6597e9ce6cf35f38ac5b016f5b8c5c1c23e93 Mon Sep 17 00:00:00 2001 From: hellobertrand <5901952+hellobertrand@users.noreply.github.com> Date: Sat, 28 Feb 2026 15:27:07 +0100 Subject: [PATCH 3/3] Fix cppcheck: dual-scale chunk size encoding --- src/lib/zxc_common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib/zxc_common.c b/src/lib/zxc_common.c index 8cff88db..2df3e052 100644 --- a/src/lib/zxc_common.c +++ b/src/lib/zxc_common.c @@ -183,9 +183,9 @@ int zxc_write_file_header(uint8_t* RESTRICT dst, const size_t dst_capacity, dst[4] = ZXC_FILE_FORMAT_VERSION; // Dual-scale chunk size encoding + // Large scale multiplier is 64 KB, fine scale is 4 KB (ratio: 64 / 4 = 16) const uint32_t units = (uint32_t)(ZXC_BLOCK_SIZE / ZXC_BLOCK_UNIT); - const uint32_t is_large = (units > 127); - dst[5] = (uint8_t)((is_large << 7) | (units >> (is_large << 2))); + dst[5] = units <= 127 ? (uint8_t)units : (uint8_t)((units / 16) | 0x80); // Flags are at offset 6 dst[6] = has_checksum ? (ZXC_FILE_FLAG_HAS_CHECKSUM | ZXC_CHECKSUM_RAPIDHASH) : 0;