From f32b3b30b7770c2c28d090a790264a11f18e057d Mon Sep 17 00:00:00 2001 From: Liam Gray Date: Tue, 5 Aug 2025 14:18:39 +0100 Subject: [PATCH 1/6] Medium string hashing inline(never) instead of cold, medium length performance improvement, url -4% --- rapidhash/src/inner/rapid_const.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/rapidhash/src/inner/rapid_const.rs b/rapidhash/src/inner/rapid_const.rs index 4a065c6..bf0adc7 100644 --- a/rapidhash/src/inner/rapid_const.rs +++ b/rapidhash/src/inner/rapid_const.rs @@ -75,9 +75,8 @@ pub(super) const fn rapidhash_core(mut seed: u64, secrets: &[u64; 7], data: &[u8]) -> u64 { let mut a = 0; let mut b = 0; From 8ed39b4bf9141a4e20faecf7f43baa59572b55aa Mon Sep 17 00:00:00 2001 From: Liam Gray Date: Tue, 5 Aug 2025 16:54:14 +0100 Subject: [PATCH 2/6] Improve small string hashing further --- rapidhash/src/inner/rapid_const.rs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/rapidhash/src/inner/rapid_const.rs b/rapidhash/src/inner/rapid_const.rs index bf0adc7..c7bfa94 100644 --- a/rapidhash/src/inner/rapid_const.rs +++ b/rapidhash/src/inner/rapid_const.rs @@ -45,22 +45,22 @@ pub(super) const fn rapidhash_core= 4 { - if data.len() >= 8 { - a ^= read_u64(data, 0); - b ^= read_u64(data, data.len() - 8); - } else { - a ^= read_u32(data, 0) as u64; - b ^= read_u32(data, data.len() - 4) as u64; - } + + if data.len() >= 8 { + a = read_u64(data, 0); + b = read_u64(data, data.len() - 8); + } else if data.len() >= 4 { + a = read_u32(data, 0) as u64; + b = read_u32(data, data.len() - 4) as u64; } else if !data.is_empty() { - a ^= ((data[0] as u64) << 45) | data[data.len() - 1] as u64; - b ^= data[data.len() >> 1] as u64; + a = ((data[0] as u64) << 45) | data[data.len() - 1] as u64; + b = data[data.len() >> 1] as u64; } seed = seed.wrapping_add(data.len() as u64); rapidhash_finish::(a, b , seed, secrets) } else { + // rapidhash_core_16_288::(seed, secrets, data) if data.len() <= 288 { // This can cause other code to not be inlined, and slow everything down. So at the cost of // marginally slower (-10%) 16..288 hashing, From 22f052dcff56598c6f3f1e96cae7875634513e44 Mon Sep 17 00:00:00 2001 From: Liam Gray Date: Wed, 6 Aug 2025 01:06:37 +0100 Subject: [PATCH 3/6] Reduce small input size codepath further --- rapidhash-bench-wasm/Cargo.toml | 2 +- rapidhash/src/inner/rapid_const.rs | 23 +++++++++++------------ 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/rapidhash-bench-wasm/Cargo.toml b/rapidhash-bench-wasm/Cargo.toml index 333096e..ba69729 100644 --- a/rapidhash-bench-wasm/Cargo.toml +++ b/rapidhash-bench-wasm/Cargo.toml @@ -11,7 +11,7 @@ name = "wasm" harness = false [dependencies] -rapidhash = { path = "../rapidhash" } +rapidhash = { path = "../rapidhash", default-features = false } foldhash = "0.1.5" fxhash = "0.2.1" diff --git a/rapidhash/src/inner/rapid_const.rs b/rapidhash/src/inner/rapid_const.rs index c7bfa94..653d9ee 100644 --- a/rapidhash/src/inner/rapid_const.rs +++ b/rapidhash/src/inner/rapid_const.rs @@ -60,21 +60,16 @@ pub(super) const fn rapidhash_core(a, b , seed, secrets) } else { - // rapidhash_core_16_288::(seed, secrets, data) - if data.len() <= 288 { - // This can cause other code to not be inlined, and slow everything down. So at the cost of - // marginally slower (-10%) 16..288 hashing, - // NOT COMPACT: len is 16..=288 - rapidhash_core_16_288::(seed, secrets, data) - } else { - // len is >288, on a cold path to avoid inlining as this doesn't impact large strings, but - // can otherwise prevent - rapidhash_core_cold::(seed, secrets, data) - } + rapidhash_core_16_288::(seed, secrets, data) } } -// allow rustc to inline this, but it should prefer inlining the .hash and .finish +// Never inline this, keep the small string path as small as possible to improve the inlining +// chances of the write_length_prefix and finish functions. If those two don't get inlined, the +// overall performance can be 5x worse when hashing a single string under 100 bytes. <=288 inputs +// pay the cost of one extra if, and >288 inputs pay one more function call, but this is nominal +// in comparison to the overall hashing cost. +#[cold] #[inline(never)] #[must_use] const fn rapidhash_core_16_288(mut seed: u64, secrets: &[u64; 7], data: &[u8]) -> u64 { @@ -83,6 +78,10 @@ const fn rapidhash_core_16_288 48 { + if slice.len() > 288 { + return rapidhash_core_cold::(seed, secrets, data); + } + // most CPUs appear to benefit from this unrolled loop let mut see1 = seed; let mut see2 = seed; From cd1e0be8ea3e7c097b07815331c39dfa52703de5 Mon Sep 17 00:00:00 2001 From: Liam Gray Date: Wed, 6 Aug 2025 01:55:52 +0100 Subject: [PATCH 4/6] Add foldhash optimisations acknowledgement --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 986392b..7880539 100644 --- a/README.md +++ b/README.md @@ -281,6 +281,6 @@ This project is licensed under both the MIT and Apache-2.0 licenses. You are fre With thanks to [Nicolas De Carli](https://github.com/Nicoshev) for the original [rapidhash](https://github.com/Nicoshev/rapidhash) C++ implementation, which is licensed under the [MIT License](https://github.com/Nicoshev/rapidhash/blob/master/LICENSE). -With thanks to [Orson Peters](https://github.com/orlp) for his work on [foldhash](https://github.com/orlp/foldhash), which inspired much of the integer hashing optimisations in this crate. +With thanks to [Orson Peters](https://github.com/orlp) for his work on [foldhash](https://github.com/orlp/foldhash), which inspired much of the integer hashing optimisations in this crate. Some of the RapidHasher string hashing [optimisations](https://github.com/orlp/foldhash/pull/35) have made their way back into foldhash as a thanks, and both hashers are now very similar in performance and quality. The rust community wins! With thanks to [Justin Bradford](https://github.com/jabr) for letting us use the rapidhash crate name 🍻 From 38b0436abab3a3fe9cf9d3bc43b28ec16e1bff12 Mon Sep 17 00:00:00 2001 From: Liam Gray Date: Sat, 9 Aug 2025 13:58:12 +0100 Subject: [PATCH 5/6] Commented lines for high granularity benchmarking on short inputs --- rapidhash-bench/benches/bench/basic.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rapidhash-bench/benches/bench/basic.rs b/rapidhash-bench/benches/bench/basic.rs index 1515823..5a039d0 100644 --- a/rapidhash-bench/benches/bench/basic.rs +++ b/rapidhash-bench/benches/bench/basic.rs @@ -69,7 +69,8 @@ where fn bench_group(c: &mut Criterion, group_name: &str) { let mut group = c.benchmark_group(group_name.to_string()); let sizes = [2usize, 8, 16, 25, 50, 64, 80, 160, 256, 350, 1024, 4096, 65536, 1024 * 1024 * 500]; - for &size in &sizes { + // let sizes = 0usize..68; // for micro short input testing + for size in sizes { profile_bytes::(size, &mut group); } profile_int::("u8", &mut group); @@ -113,7 +114,7 @@ fn profile_bytes_raw u64>( fn bench_group_raw u64>(c: &mut Criterion, group_name: &str, hash: &H) { let mut group = c.benchmark_group(group_name.to_string()); let sizes = [2usize, 8, 16, 25, 50, 64, 80, 160, 256, 350, 1024, 4096, 65536, 1024 * 1024 * 500]; - for &size in &sizes { + for size in sizes { profile_bytes_raw(hash, size, &mut group); } } From b297584fdbe10c6e0bf955192e03cda0f1050524 Mon Sep 17 00:00:00 2001 From: Liam Gray Date: Sat, 9 Aug 2025 14:30:28 +0100 Subject: [PATCH 6/6] Use non-portable read_u64 helper for RapidHasher --- rapidhash/src/inner/{mix.rs => mix_np.rs} | 0 rapidhash/src/inner/mod.rs | 7 +- rapidhash/src/inner/rapid_const.rs | 110 +++++++++--------- rapidhash/src/inner/rapid_hasher.rs | 2 +- rapidhash/src/inner/read_np.rs | 134 ++++++++++++++++++++++ rapidhash/src/inner/seeding.rs | 2 +- 6 files changed, 196 insertions(+), 59 deletions(-) rename rapidhash/src/inner/{mix.rs => mix_np.rs} (100%) create mode 100644 rapidhash/src/inner/read_np.rs diff --git a/rapidhash/src/inner/mix.rs b/rapidhash/src/inner/mix_np.rs similarity index 100% rename from rapidhash/src/inner/mix.rs rename to rapidhash/src/inner/mix_np.rs diff --git a/rapidhash/src/inner/mod.rs b/rapidhash/src/inner/mod.rs index d9f7461..b78f07b 100644 --- a/rapidhash/src/inner/mod.rs +++ b/rapidhash/src/inner/mod.rs @@ -33,8 +33,9 @@ mod rapid_const; mod rapid_hasher; mod state; pub(crate) mod seeding; -mod mix; +mod mix_np; mod seed; +mod read_np; #[doc(inline)] pub use rapid_hasher::*; @@ -53,7 +54,7 @@ mod tests { use std::hash::{BuildHasher, Hash, Hasher}; use std::collections::BTreeSet; use rand::Rng; - use crate::inner::mix::rapid_mix_np; + use crate::inner::mix_np::rapid_mix_np; use super::seed::{DEFAULT_RAPID_SECRETS, DEFAULT_SEED}; use super::rapid_const::{rapidhash_rs, rapidhash_rs_seeded}; @@ -66,6 +67,7 @@ mod tests { } /// `#[derive(Hash)]` writes a length prefix first, check understanding. + #[cfg(target_endian = "little")] #[test] fn derive_hash_works() { let object = Object { bytes: b"hello world".to_vec() }; @@ -186,6 +188,7 @@ mod tests { } /// Compare to the C rapidhash implementation to ensure we match perfectly. + #[cfg(target_endian = "little")] #[test] fn compare_to_c() { use rand::Rng; diff --git a/rapidhash/src/inner/rapid_const.rs b/rapidhash/src/inner/rapid_const.rs index 653d9ee..a8a1b17 100644 --- a/rapidhash/src/inner/rapid_const.rs +++ b/rapidhash/src/inner/rapid_const.rs @@ -1,5 +1,5 @@ -use crate::util::read::{read_u32, read_u64}; -use super::mix::{rapid_mix_np, rapid_mum_np}; +use super::mix_np::{rapid_mix_np, rapid_mum_np}; +use super::read_np::{read_u32_np, read_u64_np}; #[cfg(test)] use super::{DEFAULT_RAPID_SECRETS, RapidSecrets}; @@ -47,11 +47,11 @@ pub(super) const fn rapidhash_core= 8 { - a = read_u64(data, 0); - b = read_u64(data, data.len() - 8); + a = read_u64_np(data, 0); + b = read_u64_np(data, data.len() - 8); } else if data.len() >= 4 { - a = read_u32(data, 0) as u64; - b = read_u32(data, data.len() - 4) as u64; + a = read_u32_np(data, 0) as u64; + b = read_u32_np(data, data.len() - 4) as u64; } else if !data.is_empty() { a = ((data[0] as u64) << 45) | data[data.len() - 1] as u64; b = data[data.len() >> 1] as u64; @@ -87,9 +87,9 @@ const fn rapidhash_core_16_288= 48 { - seed = rapid_mix_np::(read_u64(slice, 0) ^ secrets[0], read_u64(slice, 8) ^ seed); - see1 = rapid_mix_np::(read_u64(slice, 16) ^ secrets[1], read_u64(slice, 24) ^ see1); - see2 = rapid_mix_np::(read_u64(slice, 32) ^ secrets[2], read_u64(slice, 40) ^ see2); + seed = rapid_mix_np::(read_u64_np(slice, 0) ^ secrets[0], read_u64_np(slice, 8) ^ seed); + see1 = rapid_mix_np::(read_u64_np(slice, 16) ^ secrets[1], read_u64_np(slice, 24) ^ see1); + see2 = rapid_mix_np::(read_u64_np(slice, 32) ^ secrets[2], read_u64_np(slice, 40) ^ see2); let (_, split) = slice.split_at(48); slice = split; } @@ -98,14 +98,14 @@ const fn rapidhash_core_16_288 16 { - seed = rapid_mix_np::(read_u64(slice, 0) ^ secrets[0], read_u64(slice, 8) ^ seed); + seed = rapid_mix_np::(read_u64_np(slice, 0) ^ secrets[0], read_u64_np(slice, 8) ^ seed); if slice.len() > 32 { - seed = rapid_mix_np::(read_u64(slice, 16) ^ secrets[1], read_u64(slice, 24) ^ seed); + seed = rapid_mix_np::(read_u64_np(slice, 16) ^ secrets[1], read_u64_np(slice, 24) ^ seed); } } - a ^= read_u64(data, data.len() - 16); - b ^= read_u64(data, data.len() - 8); + a ^= read_u64_np(data, data.len() - 16); + b ^= read_u64_np(data, data.len() - 8); seed = seed.wrapping_add(data.len() as u64); rapidhash_finish::(a, b , seed, secrets) @@ -132,46 +132,46 @@ const fn rapidhash_core_cold= 224 { - seed = rapid_mix_np::(read_u64(slice, 0) ^ secrets[0], read_u64(slice, 8) ^ seed); - see1 = rapid_mix_np::(read_u64(slice, 16) ^ secrets[1], read_u64(slice, 24) ^ see1); - see2 = rapid_mix_np::(read_u64(slice, 32) ^ secrets[2], read_u64(slice, 40) ^ see2); - see3 = rapid_mix_np::(read_u64(slice, 48) ^ secrets[3], read_u64(slice, 56) ^ see3); - see4 = rapid_mix_np::(read_u64(slice, 64) ^ secrets[4], read_u64(slice, 72) ^ see4); - see5 = rapid_mix_np::(read_u64(slice, 80) ^ secrets[5], read_u64(slice, 88) ^ see5); - see6 = rapid_mix_np::(read_u64(slice, 96) ^ secrets[6], read_u64(slice, 104) ^ see6); - - seed = rapid_mix_np::(read_u64(slice, 112) ^ secrets[0], read_u64(slice, 120) ^ seed); - see1 = rapid_mix_np::(read_u64(slice, 128) ^ secrets[1], read_u64(slice, 136) ^ see1); - see2 = rapid_mix_np::(read_u64(slice, 144) ^ secrets[2], read_u64(slice, 152) ^ see2); - see3 = rapid_mix_np::(read_u64(slice, 160) ^ secrets[3], read_u64(slice, 168) ^ see3); - see4 = rapid_mix_np::(read_u64(slice, 176) ^ secrets[4], read_u64(slice, 184) ^ see4); - see5 = rapid_mix_np::(read_u64(slice, 192) ^ secrets[5], read_u64(slice, 200) ^ see5); - see6 = rapid_mix_np::(read_u64(slice, 208) ^ secrets[6], read_u64(slice, 216) ^ see6); + seed = rapid_mix_np::(read_u64_np(slice, 0) ^ secrets[0], read_u64_np(slice, 8) ^ seed); + see1 = rapid_mix_np::(read_u64_np(slice, 16) ^ secrets[1], read_u64_np(slice, 24) ^ see1); + see2 = rapid_mix_np::(read_u64_np(slice, 32) ^ secrets[2], read_u64_np(slice, 40) ^ see2); + see3 = rapid_mix_np::(read_u64_np(slice, 48) ^ secrets[3], read_u64_np(slice, 56) ^ see3); + see4 = rapid_mix_np::(read_u64_np(slice, 64) ^ secrets[4], read_u64_np(slice, 72) ^ see4); + see5 = rapid_mix_np::(read_u64_np(slice, 80) ^ secrets[5], read_u64_np(slice, 88) ^ see5); + see6 = rapid_mix_np::(read_u64_np(slice, 96) ^ secrets[6], read_u64_np(slice, 104) ^ see6); + + seed = rapid_mix_np::(read_u64_np(slice, 112) ^ secrets[0], read_u64_np(slice, 120) ^ seed); + see1 = rapid_mix_np::(read_u64_np(slice, 128) ^ secrets[1], read_u64_np(slice, 136) ^ see1); + see2 = rapid_mix_np::(read_u64_np(slice, 144) ^ secrets[2], read_u64_np(slice, 152) ^ see2); + see3 = rapid_mix_np::(read_u64_np(slice, 160) ^ secrets[3], read_u64_np(slice, 168) ^ see3); + see4 = rapid_mix_np::(read_u64_np(slice, 176) ^ secrets[4], read_u64_np(slice, 184) ^ see4); + see5 = rapid_mix_np::(read_u64_np(slice, 192) ^ secrets[5], read_u64_np(slice, 200) ^ see5); + see6 = rapid_mix_np::(read_u64_np(slice, 208) ^ secrets[6], read_u64_np(slice, 216) ^ see6); let (_, split) = slice.split_at(224); slice = split; } if slice.len() >= 112 { - seed = rapid_mix_np::(read_u64(slice, 0) ^ secrets[0], read_u64(slice, 8) ^ seed); - see1 = rapid_mix_np::(read_u64(slice, 16) ^ secrets[1], read_u64(slice, 24) ^ see1); - see2 = rapid_mix_np::(read_u64(slice, 32) ^ secrets[2], read_u64(slice, 40) ^ see2); - see3 = rapid_mix_np::(read_u64(slice, 48) ^ secrets[3], read_u64(slice, 56) ^ see3); - see4 = rapid_mix_np::(read_u64(slice, 64) ^ secrets[4], read_u64(slice, 72) ^ see4); - see5 = rapid_mix_np::(read_u64(slice, 80) ^ secrets[5], read_u64(slice, 88) ^ see5); - see6 = rapid_mix_np::(read_u64(slice, 96) ^ secrets[6], read_u64(slice, 104) ^ see6); + seed = rapid_mix_np::(read_u64_np(slice, 0) ^ secrets[0], read_u64_np(slice, 8) ^ seed); + see1 = rapid_mix_np::(read_u64_np(slice, 16) ^ secrets[1], read_u64_np(slice, 24) ^ see1); + see2 = rapid_mix_np::(read_u64_np(slice, 32) ^ secrets[2], read_u64_np(slice, 40) ^ see2); + see3 = rapid_mix_np::(read_u64_np(slice, 48) ^ secrets[3], read_u64_np(slice, 56) ^ see3); + see4 = rapid_mix_np::(read_u64_np(slice, 64) ^ secrets[4], read_u64_np(slice, 72) ^ see4); + see5 = rapid_mix_np::(read_u64_np(slice, 80) ^ secrets[5], read_u64_np(slice, 88) ^ see5); + see6 = rapid_mix_np::(read_u64_np(slice, 96) ^ secrets[6], read_u64_np(slice, 104) ^ see6); let (_, split) = slice.split_at(112); slice = split; } } else { while slice.len() >= 112 { - seed = rapid_mix_np::(read_u64(slice, 0) ^ secrets[0], read_u64(slice, 8) ^ seed); - see1 = rapid_mix_np::(read_u64(slice, 16) ^ secrets[1], read_u64(slice, 24) ^ see1); - see2 = rapid_mix_np::(read_u64(slice, 32) ^ secrets[2], read_u64(slice, 40) ^ see2); - see3 = rapid_mix_np::(read_u64(slice, 48) ^ secrets[3], read_u64(slice, 56) ^ see3); - see4 = rapid_mix_np::(read_u64(slice, 64) ^ secrets[4], read_u64(slice, 72) ^ see4); - see5 = rapid_mix_np::(read_u64(slice, 80) ^ secrets[5], read_u64(slice, 88) ^ see5); - see6 = rapid_mix_np::(read_u64(slice, 96) ^ secrets[6], read_u64(slice, 104) ^ see6); + seed = rapid_mix_np::(read_u64_np(slice, 0) ^ secrets[0], read_u64_np(slice, 8) ^ seed); + see1 = rapid_mix_np::(read_u64_np(slice, 16) ^ secrets[1], read_u64_np(slice, 24) ^ see1); + see2 = rapid_mix_np::(read_u64_np(slice, 32) ^ secrets[2], read_u64_np(slice, 40) ^ see2); + see3 = rapid_mix_np::(read_u64_np(slice, 48) ^ secrets[3], read_u64_np(slice, 56) ^ see3); + see4 = rapid_mix_np::(read_u64_np(slice, 64) ^ secrets[4], read_u64_np(slice, 72) ^ see4); + see5 = rapid_mix_np::(read_u64_np(slice, 80) ^ secrets[5], read_u64_np(slice, 88) ^ see5); + see6 = rapid_mix_np::(read_u64_np(slice, 96) ^ secrets[6], read_u64_np(slice, 104) ^ see6); let (_, split) = slice.split_at(112); slice = split; } @@ -179,25 +179,25 @@ const fn rapidhash_core_cold= 48 { - seed = rapid_mix_np::(read_u64(slice, 0) ^ secrets[0], read_u64(slice, 8) ^ seed); - see1 = rapid_mix_np::(read_u64(slice, 16) ^ secrets[1], read_u64(slice, 24) ^ see1); - see2 = rapid_mix_np::(read_u64(slice, 32) ^ secrets[2], read_u64(slice, 40) ^ see2); + seed = rapid_mix_np::(read_u64_np(slice, 0) ^ secrets[0], read_u64_np(slice, 8) ^ seed); + see1 = rapid_mix_np::(read_u64_np(slice, 16) ^ secrets[1], read_u64_np(slice, 24) ^ see1); + see2 = rapid_mix_np::(read_u64_np(slice, 32) ^ secrets[2], read_u64_np(slice, 40) ^ see2); let (_, split) = slice.split_at(48); slice = split; if slice.len() >= 48 { - seed = rapid_mix_np::(read_u64(slice, 0) ^ secrets[0], read_u64(slice, 8) ^ seed); - see1 = rapid_mix_np::(read_u64(slice, 16) ^ secrets[1], read_u64(slice, 24) ^ see1); - see2 = rapid_mix_np::(read_u64(slice, 32) ^ secrets[2], read_u64(slice, 40) ^ see2); + seed = rapid_mix_np::(read_u64_np(slice, 0) ^ secrets[0], read_u64_np(slice, 8) ^ seed); + see1 = rapid_mix_np::(read_u64_np(slice, 16) ^ secrets[1], read_u64_np(slice, 24) ^ see1); + see2 = rapid_mix_np::(read_u64_np(slice, 32) ^ secrets[2], read_u64_np(slice, 40) ^ see2); let (_, split) = slice.split_at(48); slice = split; } } } else { while slice.len() >= 48 { - seed = rapid_mix_np::(read_u64(slice, 0) ^ secrets[0], read_u64(slice, 8) ^ seed); - see1 = rapid_mix_np::(read_u64(slice, 16) ^ secrets[1], read_u64(slice, 24) ^ see1); - see2 = rapid_mix_np::(read_u64(slice, 32) ^ secrets[2], read_u64(slice, 40) ^ see2); + seed = rapid_mix_np::(read_u64_np(slice, 0) ^ secrets[0], read_u64_np(slice, 8) ^ seed); + see1 = rapid_mix_np::(read_u64_np(slice, 16) ^ secrets[1], read_u64_np(slice, 24) ^ see1); + see2 = rapid_mix_np::(read_u64_np(slice, 32) ^ secrets[2], read_u64_np(slice, 40) ^ see2); let (_, split) = slice.split_at(48); slice = split; } @@ -211,14 +211,14 @@ const fn rapidhash_core_cold 16 { - seed = rapid_mix_np::(read_u64(slice, 0) ^ secrets[2], read_u64(slice, 8) ^ seed); + seed = rapid_mix_np::(read_u64_np(slice, 0) ^ secrets[2], read_u64_np(slice, 8) ^ seed); if slice.len() > 32 { - seed = rapid_mix_np::(read_u64(slice, 16) ^ secrets[2], read_u64(slice, 24) ^ seed); + seed = rapid_mix_np::(read_u64_np(slice, 16) ^ secrets[2], read_u64_np(slice, 24) ^ seed); } } - a ^= read_u64(data, data.len() - 16); - b ^= read_u64(data, data.len() - 8); + a ^= read_u64_np(data, data.len() - 16); + b ^= read_u64_np(data, data.len() - 8); seed = seed.wrapping_add(data.len() as u64); rapidhash_finish::(a, b , seed, secrets) diff --git a/rapidhash/src/inner/rapid_hasher.rs b/rapidhash/src/inner/rapid_hasher.rs index fe172b9..c15f1e7 100644 --- a/rapidhash/src/inner/rapid_hasher.rs +++ b/rapidhash/src/inner/rapid_hasher.rs @@ -1,6 +1,6 @@ use core::hash::{BuildHasher, Hasher}; use super::DEFAULT_RAPID_SECRETS; -use super::mix::rapid_mix_np; +use super::mix_np::rapid_mix_np; use super::rapid_const::rapidhash_core; use super::seed::rapidhash_seed; diff --git a/rapidhash/src/inner/read_np.rs b/rapidhash/src/inner/read_np.rs new file mode 100644 index 0000000..c591284 --- /dev/null +++ b/rapidhash/src/inner/read_np.rs @@ -0,0 +1,134 @@ +//! Internal module for reading unaligned bytes from a slice into `u64` and `u32` values. +//! +//! This is a non-portable implementation specifically designed for `RapidHasher`. + +/// Hacky const-friendly memory-safe unaligned bytes to u64. Compiler can't seem to remove the +/// bounds check, and so we have an unsafe version behind the `unsafe` feature flag. +#[cfg(not(feature = "unsafe"))] +#[inline(always)] +pub(crate) const fn read_u64_np(slice: &[u8], offset: usize) -> u64 { + // equivalent to slice[offset..offset+8].try_into().unwrap(), but const-friendly + let maybe_buf = slice.split_at(offset).1.first_chunk::<8>(); + let buf = match maybe_buf { + Some(buf) => *buf, + None => panic!("read_u64: slice too short"), + }; + u64::from_ne_bytes(buf) +} + +/// Hacky const-friendly memory-safe unaligned bytes to u64. Compiler can't seem to remove the +/// bounds check, and so we have an unsafe version behind the `unsafe` feature flag. +#[cfg(not(feature = "unsafe"))] +#[inline(always)] +pub(crate) const fn read_u32_np(slice: &[u8], offset: usize) -> u32 { + // equivalent to slice[offset..offset+4].try_into().unwrap(), but const-friendly + let maybe_buf = slice.split_at(offset).1.first_chunk::<4>(); + let buf = match maybe_buf { + Some(buf) => *buf, + None => panic!("read_u32: slice too short"), + }; + u32::from_ne_bytes(buf) +} + +/// Unsafe but const-friendly unaligned bytes to u64. The compiler can't seem to remove the bounds +/// checks for small integers because we do some funky bit shifting in the indexing. +/// +/// SAFETY: `slice` must be at least `offset+8` bytes long, which we guarantee in this rapidhash +/// implementation. +#[cfg(feature = "unsafe")] +#[inline(always)] +pub(crate) const fn read_u64_np(slice: &[u8], offset: usize) -> u64 { + debug_assert!(offset as isize >= 0); + debug_assert!(slice.len() >= 8 + offset); + unsafe { core::ptr::read_unaligned(slice.as_ptr().offset(offset as isize) as *const u64) } +} + +/// Unsafe but const-friendly unaligned bytes to u32. The compiler can't seem to remove the bounds +/// checks for small integers because we do some funky bit shifting in the indexing. +/// +/// SAFETY: `slice` must be at least `offset+8` bytes long, which we guarantee in this rapidhash +/// implementation. +#[cfg(feature = "unsafe")] +#[inline(always)] +pub(crate) const fn read_u32_np(slice: &[u8], offset: usize) -> u32 { + debug_assert!(offset as isize >= 0); + debug_assert!(slice.len() >= 4 + offset); + unsafe { core::ptr::read_unaligned(slice.as_ptr().offset(offset as isize) as *const u32) } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[cfg(target_endian = "little")] + #[test] + fn test_read_u32_np() { + let bytes = &[23, 145, 3, 34]; + + let split_result = bytes.split_at(0).1; + assert_eq!(split_result.len(), 4); + let maybe_buf = split_result.first_chunk::<4>(); + assert_eq!(maybe_buf, Some(&[23, 145, 3, 34])); + + assert_eq!(read_u32_np(bytes, 0), 570659095); + + let bytes = &[24, 54, 3, 23, 145, 3, 34]; + assert_eq!(read_u32_np(bytes, 3), 570659095); + + assert_eq!(read_u32_np(&[0, 0, 0, 0], 0), 0); + assert_eq!(read_u32_np(&[1, 0, 0, 0], 0), 1); + assert_eq!(read_u32_np(&[12, 0, 0, 0], 0), 12); + assert_eq!(read_u32_np(&[0, 10, 0, 0], 0), 2560); + } + + #[cfg(target_endian = "little")] + #[test] + fn test_read_u64_np() { + let bytes = [23, 145, 3, 34, 0, 0, 0, 0, 0, 0, 0].as_slice(); + assert_eq!(read_u64_np(bytes, 0), 570659095); + + let bytes = [1, 2, 3, 23, 145, 3, 34, 0, 0, 0, 0, 0, 0, 0].as_slice(); + assert_eq!(read_u64_np(bytes, 3), 570659095); + + let bytes = [0, 0, 0, 0, 0, 0, 0, 0].as_slice(); + assert_eq!(read_u64_np(bytes, 0), 0); + } + + #[cfg(target_endian = "little")] + #[cfg(feature = "std")] + #[test] + fn test_u32_to_u128_delta() { + fn formula(len: u64) -> u64 { + (len & 24) >> (len >> 3) + } + + fn formula2(len: u64) -> u64 { + match len { + 8.. => 4, + _ => 0, + } + } + + let inputs: std::vec::Vec = (4..=16).collect(); + let outputs: std::vec::Vec = inputs.iter().map(|&x| formula(x)).collect(); + let expected = std::vec![0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4]; + assert_eq!(outputs, expected); + assert_eq!(outputs, inputs.iter().map(|&x| formula2(x)).collect::>()); + } + + #[test] + #[should_panic] + #[cfg(any(test, not(feature = "unsafe")))] + fn test_read_u32_np_to_short_panics() { + let bytes = [23, 145, 0].as_slice(); + assert_eq!(read_u32_np(bytes, 0), 0); + } + + #[test] + #[should_panic] + #[cfg(any(test, not(feature = "unsafe")))] + fn test_read_u64_np_to_short_panics() { + let bytes = [23, 145, 0].as_slice(); + assert_eq!(read_u64_np(bytes, 0), 0); + } +} diff --git a/rapidhash/src/inner/seeding.rs b/rapidhash/src/inner/seeding.rs index 35b38a6..a2b2e99 100644 --- a/rapidhash/src/inner/seeding.rs +++ b/rapidhash/src/inner/seeding.rs @@ -14,7 +14,7 @@ const DEFAULT_SECRETS: [u64; 7] = [ ]; pub(crate) mod seed { - use crate::inner::mix::rapid_mix_np; + use crate::inner::mix_np::rapid_mix_np; use super::DEFAULT_SECRETS; #[inline]