diff --git a/xxhash.c b/xxhash.c index 093564cf..3bb38fd0 100644 --- a/xxhash.c +++ b/xxhash.c @@ -472,6 +472,8 @@ FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH h64 ^= h64 >> 32; return h64; + +#undef XXH_get64bits } @@ -503,6 +505,344 @@ unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed #endif } +FORCE_INLINE void XXH128_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align, void* out) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* bEnd = p + len; + U64 h1, h2; +#define XXH_get64bits(p) XXH_readLE64_align((const U64*)p, endian, align) + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (p==NULL) + { + len=0; + bEnd=p=(const BYTE*)(size_t)32; + } +#endif + + if (len>=32) + { + const BYTE* const limit = bEnd - 32; + U64 v1 = seed + PRIME64_1 + PRIME64_2; + U64 v2 = seed + PRIME64_2; + U64 v3 = seed + 0; + U64 v4 = seed - PRIME64_1; + + do + { + v1 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v1 = XXH_rotl64(v1, 31); + v1 *= PRIME64_1; + v2 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v2 = XXH_rotl64(v2, 31); + v2 *= PRIME64_1; + v3 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v3 = XXH_rotl64(v3, 31); + v3 *= PRIME64_1; + v4 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v4 = XXH_rotl64(v4, 31); + v4 *= PRIME64_1; + } + while (p<=limit); + + v1 *= PRIME64_2; + v1 = XXH_rotl64(v1, 31); + v1 *= PRIME64_1; + h1 = v1; + h2 = ( XXH_rotl64(h1, 27) + h1 ) * PRIME64_1 + PRIME64_4; + + v2 *= PRIME64_2; + v2 = XXH_rotl64(v2, 33); + v2 *= PRIME64_1; + h2 ^= v2; + h1 ^= ( XXH_rotl64(h2, 27) + h2 ) * PRIME64_1 + PRIME64_4; + + v3 *= PRIME64_2; + v3 = XXH_rotl64(v3, 29); + v3 *= PRIME64_1; + h1 ^= v3; + h2 ^= ( XXH_rotl64(h1, 27) + h1 ) * PRIME64_1 + PRIME64_4; + + v4 *= PRIME64_2; + v4 = XXH_rotl64(v4, 27); + v4 *= PRIME64_1; + h2 ^= v4; + h1 ^= ( XXH_rotl64(h2, 27) + h2 ) * PRIME64_1 + PRIME64_4; + } + else + { + h1 = seed + PRIME64_5; + h2 = seed + PRIME64_1; + } + + switch(len & 31) + { + case 31: h2 ^= ((U64)p[30]) << 48; + case 30: h2 ^= ((U64)p[29]) << 40; + case 29: h2 ^= ((U64)p[28]) << 32; + case 28: h2 ^= ((U64)p[27]) << 24; + case 27: h2 ^= ((U64)p[26]) << 16; + case 26: h2 ^= ((U64)p[25]) << 8; + case 25: h2 ^= ((U64)p[24]) << 0; + h1 ^= XXH_rotl64(h2 * PRIME64_2, 11) * PRIME64_1; + + case 24: h1 ^= ((U64)p[23]) << 56; + case 23: h1 ^= ((U64)p[22]) << 48; + case 22: h1 ^= ((U64)p[21]) << 40; + case 21: h1 ^= ((U64)p[20]) << 32; + case 20: h1 ^= ((U64)p[19]) << 24; + case 19: h1 ^= ((U64)p[18]) << 16; + case 18: h1 ^= ((U64)p[17]) << 8; + case 17: h1 ^= ((U64)p[16]) << 0; + h2 ^= XXH_rotl64(h1 * PRIME64_2, 11) * PRIME64_1; + + case 16: h2 ^= ((U64)p[15]) << 56; + case 15: h2 ^= ((U64)p[14]) << 48; + case 14: h2 ^= ((U64)p[13]) << 40; + case 13: h2 ^= ((U64)p[12]) << 32; + case 12: h2 ^= ((U64)p[11]) << 24; + case 11: h2 ^= ((U64)p[10]) << 16; + case 10: h2 ^= ((U64)p[9]) << 8; + case 9: h2 ^= ((U64)p[8]) << 0; + h1 ^= XXH_rotl64(h2 * PRIME64_2, 11) * PRIME64_1; + + case 8: h1 ^= ((U64)p[7]) << 56; + case 7: h1 ^= ((U64)p[6]) << 48; + case 6: h1 ^= ((U64)p[5]) << 40; + case 5: h1 ^= ((U64)p[4]) << 32; + case 4: h1 ^= ((U64)p[3]) << 24; + case 3: h1 ^= ((U64)p[2]) << 16; + case 2: h1 ^= ((U64)p[1]) << 8; + case 1: h1 ^= ((U64)p[0]) << 0; + h2 ^= XXH_rotl64(h1 * PRIME64_5, 11) * PRIME64_1; + } + + h1 = XXH_rotl64(h2, 27) * PRIME64_1 + PRIME64_4; + + h1 += (U64) len; + h2 += (U64) len; + + h2 ^= h1 >> 33; + h2 *= PRIME64_2; + h1 ^= h2 >> 29; + h1 *= PRIME64_3; + h2 ^= h1 >> 32; + + ((U64*)out)[0] = h1; + ((U64*)out)[1] = h2; + +#undef XXH_get64bits +} + +void XXH128 (const void* input, size_t len, unsigned long long seed, void* out) +{ +#if 0 + XXH128_state_t state; + XXH128_reset(&state, seed); + XXH128_update(&state, input, len); + XXH128_digest(&state, out); +#else + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + +# if !defined(XXH_USE_UNALIGNED_ACCESS) + if ((((size_t)input) & 7)==0) // Input is aligned, let's leverage the speed advantage + { + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + XXH128_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned, out); + else + XXH128_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned, out); + } +# endif + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + XXH128_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned, out); + else + XXH128_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned, out); +#endif +} + + +FORCE_INLINE void XXH256_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align, void* out) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* bEnd = p + len; + U64 h1, h2, h3, h4; + +#define XXH_get64bits(p) XXH_readLE64_align((const U64*)p, endian, align) + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (p==NULL) + { + len=0; + bEnd=p=(const BYTE*)(size_t)32; + } +#endif + + if (len>=32) + { + const BYTE* const limit = bEnd - 32; + U64 v1 = seed + PRIME64_1 + PRIME64_2; + U64 v2 = seed + PRIME64_2; + U64 v3 = seed + 0; + U64 v4 = seed - PRIME64_1; + + do + { + v1 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v1 = XXH_rotl64(v1, 31); + v1 *= PRIME64_1; + v2 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v2 = XXH_rotl64(v2, 31); + v2 *= PRIME64_1; + v3 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v3 = XXH_rotl64(v3, 31); + v3 *= PRIME64_1; + v4 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v4 = XXH_rotl64(v4, 31); + v4 *= PRIME64_1; + } + while (p<=limit); + + v1 *= PRIME64_2; + v1 = XXH_rotl64(v1, 31); + v1 *= PRIME64_1; + h1 = v1; + h2 = ( XXH_rotl64(h1, 27) + h1 ) * PRIME64_1 + PRIME64_2; + + v2 *= PRIME64_2; + v2 = XXH_rotl64(v2, 33); + v2 *= PRIME64_1; + h2 ^= v2; + h3 = ( XXH_rotl64(h2, 29) + h2 ) * PRIME64_2 + PRIME64_3; + + v3 *= PRIME64_2; + v3 = XXH_rotl64(v3, 29); + v3 *= PRIME64_1; + h3 ^= v3; + h4 = ( XXH_rotl64(h3, 31) + h3 ) * PRIME64_3 + PRIME64_4; + + v4 *= PRIME64_2; + v4 = XXH_rotl64(v4, 27); + v4 *= PRIME64_1; + h4 ^= v4; + h1 ^= ( XXH_rotl64(h4, 33) + h4 ) * PRIME64_4 + PRIME64_5; + } + else + { + h1 = seed + PRIME64_5; + h2 = seed + PRIME64_1; + h3 = seed + PRIME64_4; + h4 = seed + PRIME64_2; + } + + switch(len & 31) + { + case 31: h4 ^= ((U64)p[30]) << 48; + case 30: h4 ^= ((U64)p[29]) << 40; + case 29: h4 ^= ((U64)p[28]) << 32; + case 28: h4 ^= ((U64)p[27]) << 24; + case 27: h4 ^= ((U64)p[26]) << 16; + case 26: h4 ^= ((U64)p[25]) << 8; + case 25: h4 ^= ((U64)p[24]) << 0; + h3 ^= XXH_rotl64(h4 * PRIME64_5, 17) * PRIME64_1; + + case 24: h3 ^= ((U64)p[23]) << 56; + case 23: h3 ^= ((U64)p[22]) << 48; + case 22: h3 ^= ((U64)p[21]) << 40; + case 21: h3 ^= ((U64)p[20]) << 32; + case 20: h3 ^= ((U64)p[19]) << 24; + case 19: h3 ^= ((U64)p[18]) << 16; + case 18: h3 ^= ((U64)p[17]) << 8; + case 17: h3 ^= ((U64)p[16]) << 0; + h2 ^= XXH_rotl64(h3 * PRIME64_5, 13) * PRIME64_1; + + case 16: h2 ^= ((U64)p[15]) << 56; + case 15: h2 ^= ((U64)p[14]) << 48; + case 14: h2 ^= ((U64)p[13]) << 40; + case 13: h2 ^= ((U64)p[12]) << 32; + case 12: h2 ^= ((U64)p[11]) << 24; + case 11: h2 ^= ((U64)p[10]) << 16; + case 10: h2 ^= ((U64)p[9]) << 8; + case 9: h2 ^= ((U64)p[8]) << 0; + h1 ^= XXH_rotl64(h2 * PRIME64_5, 11) * PRIME64_1; + + case 8: h1 ^= ((U64)p[7]) << 56; + case 7: h1 ^= ((U64)p[6]) << 48; + case 6: h1 ^= ((U64)p[5]) << 40; + case 5: h1 ^= ((U64)p[4]) << 32; + case 4: h1 ^= ((U64)p[3]) << 24; + case 3: h1 ^= ((U64)p[2]) << 16; + case 2: h1 ^= ((U64)p[1]) << 8; + case 1: h1 ^= ((U64)p[0]) << 0; + h4 ^= XXH_rotl64(h1 * PRIME64_5, 7) * PRIME64_1; + } + + h2 ^= ( XXH_rotl64(h1, 27) + h1 ) * PRIME64_1 + PRIME64_4; + h3 ^= ( XXH_rotl64(h2, 29) + h2 ) * PRIME64_2 + PRIME64_3; + h4 ^= ( XXH_rotl64(h3, 31) + h3 ) * PRIME64_3 + PRIME64_2; + h1 ^= ( XXH_rotl64(h4, 33) + h4 ) * PRIME64_4 + PRIME64_1; + + h1 += (U64) len; + h2 += (U64) len; + h3 += (U64) len; + h4 += (U64) len; + + h4 ^= h1 >> 33; + h4 *= PRIME64_2; + h1 ^= h4 >> 29; + h1 *= PRIME64_3; + h4 ^= h1 >> 32; + + h3 ^= h2 >> 33; + h3 *= PRIME64_2; + h2 ^= h3 >> 29; + h2 *= PRIME64_3; + h3 ^= h2 >> 32; + + ((unsigned long long*)out)[0] = h1; + ((unsigned long long*)out)[1] = h2; + ((unsigned long long*)out)[2] = h3; + ((unsigned long long*)out)[3] = h4; + +#undef XXH_get64bits +} + +void XXH256 (const void* input, size_t len, unsigned long long seed, void* out) +{ +#if 0 + XXH256_state_t state; + XXH256_reset(&state, seed); + XXH256_update(&state, input, len); + XXH256_digest(&state, out); +#else + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + +# if !defined(XXH_USE_UNALIGNED_ACCESS) + if ((((size_t)input) & 7)==0) // Input is aligned, let's leverage the speed advantage + { + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + XXH256_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned, out); + else + XXH256_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned, out); + } +# endif + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + XXH256_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned, out); + else + XXH256_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned, out); +#endif +} + + /**************************************************** * Advanced Hash Functions ****************************************************/ @@ -533,6 +873,31 @@ typedef struct } XXH_istate64_t; +typedef struct +{ + U64 total_len; + U64 seed; + U64 v1; + U64 v2; + U64 v3; + U64 v4; + char memory[64]; + U32 memsize; +} XXH_istate128_t; + +typedef struct +{ + U64 total_len; + U64 seed; + U64 v1; + U64 v2; + U64 v3; + U64 v4; + char memory[64]; + U32 memsize; +} XXH_istate256_t; + + XXH32_state_t* XXH32_createState(void) { XXH_STATIC_ASSERT(sizeof(XXH32_state_t) >= sizeof(XXH_istate32_t)); /* A compilation error here means XXH32_state_t is not large enough */ @@ -555,6 +920,27 @@ XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) return XXH_OK; } +XXH128_state_t* XXH128_createState(void) +{ + XXH_STATIC_ASSERT(sizeof(XXH128_state_t) >= sizeof(XXH_istate128_t)); // A compilation error here means XXH128_state_t is not large enough + return (XXH128_state_t*)XXH_malloc(sizeof(XXH128_state_t)); +} +XXH_errorcode XXH128_freeState(XXH128_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +} + +XXH256_state_t* XXH256_createState(void) +{ + XXH_STATIC_ASSERT(sizeof(XXH256_state_t) >= sizeof(XXH_istate256_t)); // A compilation error here means XXH256_state_t is not large enough + return (XXH256_state_t*)XXH_malloc(sizeof(XXH256_state_t)); +} +XXH_errorcode XXH256_freeState(XXH256_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +} /*** Hash feed ***/ @@ -584,6 +970,32 @@ XXH_errorcode XXH64_reset(XXH64_state_t* state_in, unsigned long long seed) return XXH_OK; } +XXH_errorcode XXH128_reset(XXH128_state_t* state_in, unsigned long long seed) +{ + XXH_istate128_t* state = (XXH_istate128_t*) state_in; + state->seed = seed; + state->v1 = seed + PRIME64_1 + PRIME64_2; + state->v2 = seed + PRIME64_2; + state->v3 = seed + 0; + state->v4 = seed - PRIME64_1; + state->total_len = 0; + state->memsize = 0; + return XXH_OK; +} + +XXH_errorcode XXH256_reset(XXH256_state_t* state_in, unsigned long long seed) +{ + XXH_istate256_t* state = (XXH_istate256_t*) state_in; + state->seed = seed; + state->v1 = seed + PRIME64_1 + PRIME64_2; + state->v2 = seed + PRIME64_2; + state->v3 = seed + 0; + state->v4 = seed - PRIME64_1; + state->total_len = 0; + state->memsize = 0; + return XXH_OK; +} + FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state_in, const void* input, size_t len, XXH_endianess endian) { @@ -933,3 +1345,440 @@ unsigned long long XXH64_digest (const XXH64_state_t* state_in) } +FORCE_INLINE XXH_errorcode XXH128_update_endian (XXH128_state_t* state_in, const void* input, size_t len, XXH_endianess endian) +{ + XXH_istate128_t * state = (XXH_istate128_t *) state_in; + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; +#define XXH_get64bits(p) XXH_readLE64((const U64*)p, endian) + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (input==NULL) return XXH_ERROR; +#endif + + state->total_len += len; + + if (state->memsize + len < 32) // fill in tmp buffer + { + XXH_memcpy(state->memory + state->memsize, input, len); + state->memsize += (U32)len; + return XXH_OK; + } + + if (state->memsize) // some data left from previous update + { + XXH_memcpy(state->memory + state->memsize, input, 32-state->memsize); + { + const BYTE* ps = (const BYTE*)state->memory; + state->v1 += XXH_get64bits(ps) * PRIME64_2; + state->v1 = XXH_rotl64(state->v1, 31); + state->v1 *= PRIME64_1; + ps+=8; + state->v2 += XXH_get64bits(ps) * PRIME64_2; + state->v2 = XXH_rotl64(state->v2, 31); + state->v2 *= PRIME64_1; + ps+=8; + state->v3 += XXH_get64bits(ps) * PRIME64_2; + state->v3 = XXH_rotl64(state->v3, 31); + state->v3 *= PRIME64_1; + ps+=8; + state->v4 += XXH_get64bits(ps) * PRIME64_2; + state->v4 = XXH_rotl64(state->v4, 31); + state->v4 *= PRIME64_1; + ps+=8; + } + p += 32-state->memsize; + state->memsize = 0; + } + + if (p+32 <= bEnd) + { + const BYTE* const limit = bEnd - 32; + U64 v1 = state->v1; + U64 v2 = state->v2; + U64 v3 = state->v3; + U64 v4 = state->v4; + + do + { + v1 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v1 = XXH_rotl64(v1, 31); + v1 *= PRIME64_1; + v2 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v2 = XXH_rotl64(v2, 31); + v2 *= PRIME64_1; + v3 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v3 = XXH_rotl64(v3, 31); + v3 *= PRIME64_1; + v4 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v4 = XXH_rotl64(v4, 31); + v4 *= PRIME64_1; + } + while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) + { + XXH_memcpy(state->memory, p, bEnd-p); + state->memsize = (int)(bEnd-p); + } + + return XXH_OK; + +#undef XXH_get64bits +} + +XXH_errorcode XXH128_update (XXH128_state_t* state_in, const void* input, size_t len) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH128_update_endian(state_in, input, len, XXH_littleEndian); + else + return XXH128_update_endian(state_in, input, len, XXH_bigEndian); +} + + +FORCE_INLINE void XXH128_digest_endian (const XXH128_state_t* state_in, XXH_endianess endian, void* out) +{ + (void)endian; + XXH_istate128_t * state = (XXH_istate128_t *) state_in; + const BYTE * p = (const BYTE*)state->memory; + U64 h1, h2; + + if (state->total_len >= 32) + { + U64 v1 = state->v1; + U64 v2 = state->v2; + U64 v3 = state->v3; + U64 v4 = state->v4; + + v1 *= PRIME64_2; + v1 = XXH_rotl64(v1, 31); + v1 *= PRIME64_1; + h1 = v1; + h2 = ( XXH_rotl64(h1, 27) + h1 ) * PRIME64_1 + PRIME64_4; + + v2 *= PRIME64_2; + v2 = XXH_rotl64(v2, 33); + v2 *= PRIME64_1; + h2 ^= v2; + h1 ^= ( XXH_rotl64(h2, 27) + h2 ) * PRIME64_1 + PRIME64_4; + + v3 *= PRIME64_2; + v3 = XXH_rotl64(v3, 29); + v3 *= PRIME64_1; + h1 ^= v3; + h2 ^= ( XXH_rotl64(h1, 27) + h1 ) * PRIME64_1 + PRIME64_4; + + v4 *= PRIME64_2; + v4 = XXH_rotl64(v4, 27); + v4 *= PRIME64_1; + h2 ^= v4; + h1 ^= ( XXH_rotl64(h2, 27) + h2 ) * PRIME64_1 + PRIME64_4; + } + else + { + h1 = state->seed + PRIME64_5; + h2 = state->seed + PRIME64_1; + } + + switch(state->total_len & 31) + { + case 31: h2 ^= ((U64)p[30]) << 48; + case 30: h2 ^= ((U64)p[29]) << 40; + case 29: h2 ^= ((U64)p[28]) << 32; + case 28: h2 ^= ((U64)p[27]) << 24; + case 27: h2 ^= ((U64)p[26]) << 16; + case 26: h2 ^= ((U64)p[25]) << 8; + case 25: h2 ^= ((U64)p[24]) << 0; + h1 ^= XXH_rotl64(h2 * PRIME64_2, 11) * PRIME64_1; + + case 24: h1 ^= ((U64)p[23]) << 56; + case 23: h1 ^= ((U64)p[22]) << 48; + case 22: h1 ^= ((U64)p[21]) << 40; + case 21: h1 ^= ((U64)p[20]) << 32; + case 20: h1 ^= ((U64)p[19]) << 24; + case 19: h1 ^= ((U64)p[18]) << 16; + case 18: h1 ^= ((U64)p[17]) << 8; + case 17: h1 ^= ((U64)p[16]) << 0; + h2 ^= XXH_rotl64(h1 * PRIME64_2, 11) * PRIME64_1; + + case 16: h2 ^= ((U64)p[15]) << 56; + case 15: h2 ^= ((U64)p[14]) << 48; + case 14: h2 ^= ((U64)p[13]) << 40; + case 13: h2 ^= ((U64)p[12]) << 32; + case 12: h2 ^= ((U64)p[11]) << 24; + case 11: h2 ^= ((U64)p[10]) << 16; + case 10: h2 ^= ((U64)p[9]) << 8; + case 9: h2 ^= ((U64)p[8]) << 0; + h1 ^= XXH_rotl64(h2 * PRIME64_2, 11) * PRIME64_1; + + case 8: h1 ^= ((U64)p[7]) << 56; + case 7: h1 ^= ((U64)p[6]) << 48; + case 6: h1 ^= ((U64)p[5]) << 40; + case 5: h1 ^= ((U64)p[4]) << 32; + case 4: h1 ^= ((U64)p[3]) << 24; + case 3: h1 ^= ((U64)p[2]) << 16; + case 2: h1 ^= ((U64)p[1]) << 8; + case 1: h1 ^= ((U64)p[0]) << 0; + h2 ^= XXH_rotl64(h1 * PRIME64_5, 11) * PRIME64_1; + } + + h1 = XXH_rotl64(h2, 27) * PRIME64_1 + PRIME64_4; + + h1 += (U64) state->total_len; + h2 += (U64) state->total_len; + + h2 ^= h1 >> 33; + h2 *= PRIME64_2; + h1 ^= h2 >> 29; + h1 *= PRIME64_3; + h2 ^= h1 >> 32; + + ((U64*)out)[0] = h1; + ((U64*)out)[1] = h2; +} + +void XXH128_digest (const XXH128_state_t* state_in, void* out) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH128_digest_endian(state_in, XXH_littleEndian, (unsigned long long*)out); + else + return XXH128_digest_endian(state_in, XXH_bigEndian, (unsigned long long*)out); +} + + +FORCE_INLINE XXH_errorcode XXH256_update_endian (XXH256_state_t* state_in, const void* input, size_t len, XXH_endianess endian) +{ + XXH_istate256_t * state = (XXH_istate256_t *) state_in; + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (input==NULL) return XXH_ERROR; +#endif + + state->total_len += len; + + if (state->memsize + len < 32) // fill in tmp buffer + { + XXH_memcpy(state->memory + state->memsize, input, len); + state->memsize += (U32)len; + return XXH_OK; + } + + if (state->memsize) // some data left from previous update + { + XXH_memcpy(state->memory + state->memsize, input, 32-state->memsize); + { + const U64* p64 = (const U64*)state->memory; + state->v1 += XXH_readLE64(p64, endian) * PRIME64_2; + state->v1 = XXH_rotl64(state->v1, 31); + state->v1 *= PRIME64_1; + p64++; + state->v2 += XXH_readLE64(p64, endian) * PRIME64_2; + state->v2 = XXH_rotl64(state->v2, 31); + state->v2 *= PRIME64_1; + p64++; + state->v3 += XXH_readLE64(p64, endian) * PRIME64_2; + state->v3 = XXH_rotl64(state->v3, 31); + state->v3 *= PRIME64_1; + p64++; + state->v4 += XXH_readLE64(p64, endian) * PRIME64_2; + state->v4 = XXH_rotl64(state->v4, 31); + state->v4 *= PRIME64_1; + p64++; + } + p += 32-state->memsize; + state->memsize = 0; + } + + if (p+32 <= bEnd) + { + const BYTE* const limit = bEnd - 32; + U64 v1 = state->v1; + U64 v2 = state->v2; + U64 v3 = state->v3; + U64 v4 = state->v4; + + do + { + v1 += XXH_readLE64((const U64*)p+0, endian) * PRIME64_2; + v1 = XXH_rotl64(v1, 31) * PRIME64_1; + + v2 += XXH_readLE64((const U64*)p+1, endian) * PRIME64_2; + v2 = XXH_rotl64(v2, 31) * PRIME64_1; + + v3 += XXH_readLE64((const U64*)p+2, endian) * PRIME64_2; + v3 = XXH_rotl64(v3, 31) * PRIME64_1; + + v4 += XXH_readLE64((const U64*)p+3, endian) * PRIME64_2; + v4 = XXH_rotl64(v4, 31) * PRIME64_1; + + p+=32; + } + while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) + { + XXH_memcpy(state->memory, p, bEnd-p); + state->memsize = (int)(bEnd-p); + } + + return XXH_OK; +} + +XXH_errorcode XXH256_update (XXH256_state_t* state_in, const void* input, size_t len) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH256_update_endian(state_in, input, len, XXH_littleEndian); + else + return XXH256_update_endian(state_in, input, len, XXH_bigEndian); +} + + +FORCE_INLINE void XXH256_digest_endian (const XXH256_state_t* state_in, XXH_endianess endian, void* out) +{ + (void)endian; + XXH_istate256_t * state = (XXH_istate256_t *) state_in; + const BYTE * p = (const BYTE*)state->memory; + U64 h1, h2, h3, h4; + + if (state->total_len >= 32) + { + U64 v1 = state->v1; + U64 v2 = state->v2; + U64 v3 = state->v3; + U64 v4 = state->v4; + + v1 *= PRIME64_2; + v1 = XXH_rotl64(v1, 31); + v1 *= PRIME64_1; + h1 = v1; + h2 = ( XXH_rotl64(h1, 27) + h1 ) * PRIME64_1 + PRIME64_2; + + v2 *= PRIME64_2; + v2 = XXH_rotl64(v2, 33); + v2 *= PRIME64_1; + h2 ^= v2; + h3 = ( XXH_rotl64(h2, 29) + h2 ) * PRIME64_2 + PRIME64_3; + + v3 *= PRIME64_2; + v3 = XXH_rotl64(v3, 29); + v3 *= PRIME64_1; + h3 ^= v3; + h4 = ( XXH_rotl64(h3, 31) + h3 ) * PRIME64_3 + PRIME64_4; + + v4 *= PRIME64_2; + v4 = XXH_rotl64(v4, 27); + v4 *= PRIME64_1; + h4 ^= v4; + h1 ^= ( XXH_rotl64(h4, 33) + h4 ) * PRIME64_4 + PRIME64_5; + } + else + { + h1 = state->seed + PRIME64_5; + h2 = state->seed + PRIME64_1; + h3 = state->seed + PRIME64_4; + h4 = state->seed + PRIME64_2; + } + + switch(state->total_len & 31) + { + case 31: h4 ^= ((U64)p[30]) << 48; + case 30: h4 ^= ((U64)p[29]) << 40; + case 29: h4 ^= ((U64)p[28]) << 32; + case 28: h4 ^= ((U64)p[27]) << 24; + case 27: h4 ^= ((U64)p[26]) << 16; + case 26: h4 ^= ((U64)p[25]) << 8; + case 25: h4 ^= ((U64)p[24]) << 0; + h3 ^= XXH_rotl64(h4 * PRIME64_5, 17) * PRIME64_1; + + case 24: h3 ^= ((U64)p[23]) << 56; + case 23: h3 ^= ((U64)p[22]) << 48; + case 22: h3 ^= ((U64)p[21]) << 40; + case 21: h3 ^= ((U64)p[20]) << 32; + case 20: h3 ^= ((U64)p[19]) << 24; + case 19: h3 ^= ((U64)p[18]) << 16; + case 18: h3 ^= ((U64)p[17]) << 8; + case 17: h3 ^= ((U64)p[16]) << 0; + h2 ^= XXH_rotl64(h3 * PRIME64_5, 13) * PRIME64_1; + + case 16: h2 ^= ((U64)p[15]) << 56; + case 15: h2 ^= ((U64)p[14]) << 48; + case 14: h2 ^= ((U64)p[13]) << 40; + case 13: h2 ^= ((U64)p[12]) << 32; + case 12: h2 ^= ((U64)p[11]) << 24; + case 11: h2 ^= ((U64)p[10]) << 16; + case 10: h2 ^= ((U64)p[9]) << 8; + case 9: h2 ^= ((U64)p[8]) << 0; + h1 ^= XXH_rotl64(h2 * PRIME64_5, 11) * PRIME64_1; + + case 8: h1 ^= ((U64)p[7]) << 56; + case 7: h1 ^= ((U64)p[6]) << 48; + case 6: h1 ^= ((U64)p[5]) << 40; + case 5: h1 ^= ((U64)p[4]) << 32; + case 4: h1 ^= ((U64)p[3]) << 24; + case 3: h1 ^= ((U64)p[2]) << 16; + case 2: h1 ^= ((U64)p[1]) << 8; + case 1: h1 ^= ((U64)p[0]) << 0; + h4 ^= XXH_rotl64(h1 * PRIME64_5, 7) * PRIME64_1; + } + + h2 ^= ( XXH_rotl64(h1, 27) + h1 ) * PRIME64_1 + PRIME64_4; + h3 ^= ( XXH_rotl64(h2, 29) + h2 ) * PRIME64_2 + PRIME64_3; + h4 ^= ( XXH_rotl64(h3, 31) + h3 ) * PRIME64_3 + PRIME64_2; + h1 ^= ( XXH_rotl64(h4, 33) + h4 ) * PRIME64_4 + PRIME64_1; + + h1 += (U64) state->total_len; + h2 += (U64) state->total_len; + h3 += (U64) state->total_len; + h4 += (U64) state->total_len; + + h4 ^= h1 >> 33; + h4 *= PRIME64_2; + h1 ^= h4 >> 29; + h1 *= PRIME64_3; + h4 ^= h1 >> 32; + + h3 ^= h2 >> 33; + h3 *= PRIME64_2; + h2 ^= h3 >> 29; + h2 *= PRIME64_3; + h3 ^= h2 >> 32; + + ((unsigned long long*)out)[0] = h1; + ((unsigned long long*)out)[1] = h2; + ((unsigned long long*)out)[2] = h3; + ((unsigned long long*)out)[3] = h4; +} + +void XXH256_digest (const XXH256_state_t* state_in, void* out) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH256_digest_endian(state_in, XXH_littleEndian, (unsigned long long*)out); + else + return XXH256_digest_endian(state_in, XXH_bigEndian, (unsigned long long*)out); +} diff --git a/xxhash.h b/xxhash.h index be0acced..0b720ae1 100644 --- a/xxhash.h +++ b/xxhash.h @@ -86,6 +86,8 @@ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; unsigned int XXH32 (const void* input, size_t length, unsigned seed); unsigned long long XXH64 (const void* input, size_t length, unsigned long long seed); +void XXH128 (const void* input, size_t length, unsigned long long seed, void* out); +void XXH256 (const void* input, size_t length, unsigned long long seed, void* out); /* XXH32() : @@ -96,6 +98,12 @@ XXH32() : Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s XXH64() : Calculate the 64-bits hash of sequence of length "len" stored at memory address "input". +XXH128(): + Calculate the 128-bits hash of sequence of length "len" stored at memory address "input". + Output is stored in the 16 byte array "out" +XXH256(): + Calculate the 256-bits hash of sequence of length "len" stored at memory address "input". + Output is stored in the 32 byte array "out" */ @@ -105,6 +113,8 @@ XXH64() : *****************************/ typedef struct { long long ll[ 6]; } XXH32_state_t; typedef struct { long long ll[11]; } XXH64_state_t; +typedef struct { long long ll[28]; } XXH128_state_t; +typedef struct { long long ll[28]; } XXH256_state_t; /* These structures allow static allocation of XXH states. @@ -119,6 +129,12 @@ XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); XXH64_state_t* XXH64_createState(void); XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); +XXH128_state_t* XXH128_createState(void); +XXH_errorcode XXH128_freeState(XXH128_state_t* statePtr); + +XXH256_state_t* XXH256_createState(void); +XXH_errorcode XXH256_freeState(XXH256_state_t* statePtr); + /* These functions create and release memory for XXH state. States must then be initialized using XXHnn_reset() before first use. @@ -133,6 +149,14 @@ XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long see XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); unsigned long long XXH64_digest (const XXH64_state_t* statePtr); +XXH_errorcode XXH128_reset (XXH128_state_t* statePtr, unsigned long long seed); +XXH_errorcode XXH128_update (XXH128_state_t* statePtr, const void* input, size_t length); +void XXH128_digest (const XXH128_state_t* statePtr, void* out); + +XXH_errorcode XXH256_reset (XXH256_state_t* statePtr, unsigned long long seed); +XXH_errorcode XXH256_update (XXH256_state_t* statePtr, const void* input, size_t length); +void XXH256_digest (const XXH256_state_t* statePtr, void* out); + /* These functions calculate the xxHash of an input provided in multiple smaller packets, as opposed to an input provided as a single block. @@ -153,7 +177,6 @@ and therefore get some new hashes, by calling again XXHnn_digest(). When you are done, don't forget to free XXH state space, using typically XXHnn_freeState(). */ - #if defined (__cplusplus) } #endif diff --git a/xxhsum.c b/xxhsum.c index 783c7a5c..d93806e5 100644 --- a/xxhsum.c +++ b/xxhsum.c @@ -377,6 +377,82 @@ static int BMK_benchFile(char** fileNamesTable, int nbFiles) totalc += fastestC; } + // Bench XXH128 + { + int interationNb; + double fastestC = 100000000.; + unsigned long long h128[2] = {0, 0}; + + DISPLAY("\r%79s\r", ""); // Clean display line + for (interationNb = 1; interationNb <= g_nbIterations; interationNb++) + { + int nbHashes = 0; + int milliTime; + + DISPLAY("%1i-%-14.14s : %10i ->\r", interationNb, "XXH128", (int)benchedSize); + + // Hash loop + milliTime = BMK_GetMilliStart(); + while(BMK_GetMilliStart() == milliTime); + milliTime = BMK_GetMilliStart(); + while(BMK_GetMilliSpan(milliTime) < TIMELOOP) + { + int i; + for (i=0; i<100; i++) + { + XXH128(alignedBuffer, benchedSize, 0, h128); + nbHashes++; + } + } + milliTime = BMK_GetMilliSpan(milliTime); + if ((double)milliTime < fastestC*nbHashes) fastestC = (double)milliTime/nbHashes; + DISPLAY("%1i-%-14.14s : %10i -> %7.1f MB/s\r", interationNb, "XXH128", (int)benchedSize, (double)benchedSize / fastestC / 1000.); + } + DISPLAY("%-16.16s : %10i -> %7.1f MB/s 0x%08X%08X%08X%08X\n", "XXH128", (int)benchedSize, (double)benchedSize / fastestC / 1000., (U32)(h128[1]>>32), (U32)(h128[1]), (U32)(h128[0]>>32), (U32)(h128[0])); + + totals += benchedSize; + totalc += fastestC; + } + + // Bench XXH256 + { + int interationNb; + double fastestC = 100000000.; + unsigned long long h256[4] = {0, 0}; + + DISPLAY("\r%79s\r", ""); // Clean display line + for (interationNb = 1; interationNb <= g_nbIterations; interationNb++) + { + int nbHashes = 0; + int milliTime; + + DISPLAY("%1i-%-14.14s : %10i ->\r", interationNb, "XXH256", (int)benchedSize); + + // Hash loop + milliTime = BMK_GetMilliStart(); + while(BMK_GetMilliStart() == milliTime); + milliTime = BMK_GetMilliStart(); + while(BMK_GetMilliSpan(milliTime) < TIMELOOP) + { + int i; + for (i=0; i<100; i++) + { + XXH256(alignedBuffer, benchedSize, 0, h256); + nbHashes++; + } + } + milliTime = BMK_GetMilliSpan(milliTime); + if ((double)milliTime < fastestC*nbHashes) fastestC = (double)milliTime/nbHashes; + DISPLAY("%1i-%-14.14s : %10i -> %7.1f MB/s\r", interationNb, "XXH256", (int)benchedSize, (double)benchedSize / fastestC / 1000.); + } + DISPLAY("%-16.16s : %10i -> %7.1f MB/s 0x%08X%08X%08X%08X%08X%08X%08X%08X\n", "XXH256", (int)benchedSize, (double)benchedSize / fastestC / 1000., + (U32)(h256[3]>>32), (U32)(h256[3]), (U32)(h256[2]>>32), (U32)(h256[2]), + (U32)(h256[1]>>32), (U32)(h256[1]), (U32)(h256[0]>>32), (U32)(h256[0])); + + totals += benchedSize; + totalc += fastestC; + } + free(buffer); } @@ -505,7 +581,7 @@ static int BMK_hash(const char* fileName, U32 hashNb) size_t const blockSize = 64 KB; size_t readSize; char* buffer; - XXH64_state_t state; + XXH256_state_t state; // Check file existence if (fileName == stdinName) @@ -537,7 +613,13 @@ static int BMK_hash(const char* fileName, U32 hashNb) XXH32_reset((XXH32_state_t*)&state, 0); break; case 1: - XXH64_reset(&state, 0); + XXH64_reset((XXH64_state_t*)&state, 0); + break; + case 2: + XXH128_reset((XXH128_state_t*)&state, 0); + break; + case 3: + XXH256_reset((XXH256_state_t*)&state, 0); break; default: DISPLAY("Error : bad hash algorithm ID\n"); @@ -559,7 +641,13 @@ static int BMK_hash(const char* fileName, U32 hashNb) XXH32_update((XXH32_state_t*)&state, buffer, readSize); break; case 1: - XXH64_update(&state, buffer, readSize); + XXH64_update((XXH64_state_t*)&state, buffer, readSize); + break; + case 2: + XXH128_update((XXH128_state_t*)&state, buffer, readSize); + break; + case 3: + XXH256_update((XXH256_state_t*)&state, buffer, readSize); break; default: break; @@ -580,11 +668,27 @@ static int BMK_hash(const char* fileName, U32 hashNb) } case 1: { - U64 h64 = XXH64_digest(&state); + U64 h64 = XXH64_digest((XXH64_state_t*)&state); BMK_display_BigEndian(&h64, 8); DISPLAYRESULT(" %s \n", fileName); break; } + case 2: + { + U64 h64[2]; + XXH128_digest((XXH128_state_t*)&state, h64); + BMK_display_BigEndian(&h64[0], 16); + DISPLAYRESULT(" %s \n", fileName); + break; + } + case 3: + { + U64 h64[4]; + XXH256_digest((XXH256_state_t*)&state, h64); + BMK_display_BigEndian(&h64[0], 32); + DISPLAYRESULT(" %s \n", fileName); + break; + } default: break; } @@ -604,7 +708,7 @@ static int usage(const char* exename) DISPLAY( " %s [arg] [filename]\n", exename); DISPLAY( "When no filename provided, or - provided : use stdin as input\n"); DISPLAY( "Arguments :\n"); - DISPLAY( " -H# : hash selection : 0=32bits, 1=64bits (default %i)\n", g_fn_selection); + DISPLAY( " -H# : hash selection : 0=32bits, 1=64bits, 2=128bits, 3=256bits (default %i)\n", g_fn_selection); DISPLAY( " -b : benchmark mode \n"); DISPLAY( " -i# : number of iterations (benchmark mode; default %i)\n", g_nbIterations); DISPLAY( " -h : help (this text)\n"); @@ -691,7 +795,7 @@ int main(int argc, char** argv) return BMK_benchFile(argv+filenamesStart, argc-filenamesStart); } - if(g_fn_selection < 0 || g_fn_selection > 1) return badusage(exename); + if(g_fn_selection < 0 || g_fn_selection > 3) return badusage(exename); return BMK_hash(input_filename, g_fn_selection); }