From face1cbf89c1f23fb431cd2f99575448a1606c2d Mon Sep 17 00:00:00 2001 From: Mathias Westerdahl Date: Mon, 1 Dec 2014 22:52:04 -0500 Subject: [PATCH 1/3] Added XXH128 and XXH256 --- xxhash.c | 848 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- xxhash.h | 25 +- xxhsum.c | 192 +++++++++---- 3 files changed, 1003 insertions(+), 62 deletions(-) diff --git a/xxhash.c b/xxhash.c index e6c2f31a..85282995 100644 --- a/xxhash.c +++ b/xxhash.c @@ -93,7 +93,6 @@ FORCE_INLINE void* XXH_memcpy(void* dest, const void* src, size_t size) return memcpy(dest,src,size); } - //************************************** // Basic Types //************************************** @@ -112,6 +111,7 @@ typedef signed int S32; typedef unsigned long long U64; #endif + #if defined(__GNUC__) && !defined(XXH_USE_UNALIGNED_ACCESS) # define _PACKED __attribute__ ((packed)) #else @@ -157,6 +157,7 @@ typedef struct _U64_S # define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r))) #endif + #if defined(_MSC_VER) // Visual Studio # define XXH_swap32 _byteswap_ulong # define XXH_swap64 _byteswap_uint64 @@ -164,14 +165,14 @@ typedef struct _U64_S # define XXH_swap32 __builtin_bswap32 # define XXH_swap64 __builtin_bswap64 #else -static inline U32 XXH_swap32 (U32 x) +FORCE_INLINE U32 XXH_swap32 (U32 x) { return ((x << 24) & 0xff000000 ) | ((x << 8) & 0x00ff0000 ) | ((x >> 8) & 0x0000ff00 ) | ((x >> 24) & 0x000000ff ); } -static inline U64 XXH_swap64 (U64 x) +FORCE_INLINE U64 XXH_swap64 (U64 x) { return ((x << 56) & 0xff00000000000000ULL) | ((x << 40) & 0x00ff000000000000ULL) | @@ -184,7 +185,6 @@ static inline U64 XXH_swap64 (U64 x) } #endif - //************************************** // Constants //************************************** @@ -247,7 +247,6 @@ FORCE_INLINE U64 XXH_readLE64(const U64* ptr, XXH_endianess endian) return XXH_readLE64_align(ptr, endian, XXH_unaligned); } - //**************************** // Simple Hash Functions //**************************** @@ -465,6 +464,8 @@ FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH h64 ^= h64 >> 32; return h64; + +#undef XXH_get64bits } @@ -496,6 +497,344 @@ unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed #endif } +FORCE_INLINE void XXH128_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align, void* out) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* bEnd = p + len; + U64 h1, h2; +#define XXH_get64bits(p) XXH_readLE64_align((const U64*)p, endian, align) + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (p==NULL) + { + len=0; + bEnd=p=(const BYTE*)(size_t)32; + } +#endif + + if (len>=32) + { + const BYTE* const limit = bEnd - 32; + U64 v1 = seed + PRIME64_1 + PRIME64_2; + U64 v2 = seed + PRIME64_2; + U64 v3 = seed + 0; + U64 v4 = seed - PRIME64_1; + + do + { + v1 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v1 = XXH_rotl64(v1, 31); + v1 *= PRIME64_1; + v2 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v2 = XXH_rotl64(v2, 31); + v2 *= PRIME64_1; + v3 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v3 = XXH_rotl64(v3, 31); + v3 *= PRIME64_1; + v4 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v4 = XXH_rotl64(v4, 31); + v4 *= PRIME64_1; + } + while (p<=limit); + + v1 *= PRIME64_2; + v1 = XXH_rotl64(v1, 31); + v1 *= PRIME64_1; + h1 = v1; + h2 = ( XXH_rotl64(h1, 27) + h1 ) * PRIME64_1 + PRIME64_4; + + v2 *= PRIME64_2; + v2 = XXH_rotl64(v2, 33); + v2 *= PRIME64_1; + h2 ^= v2; + h1 ^= ( XXH_rotl64(h2, 27) + h2 ) * PRIME64_1 + PRIME64_4; + + v3 *= PRIME64_2; + v3 = XXH_rotl64(v3, 29); + v3 *= PRIME64_1; + h1 ^= v3; + h2 ^= ( XXH_rotl64(h1, 27) + h1 ) * PRIME64_1 + PRIME64_4; + + v4 *= PRIME64_2; + v4 = XXH_rotl64(v4, 27); + v4 *= PRIME64_1; + h2 ^= v4; + h1 ^= ( XXH_rotl64(h2, 27) + h2 ) * PRIME64_1 + PRIME64_4; + } + else + { + h1 = seed + PRIME64_5; + h2 = seed + PRIME64_1; + } + + switch(len & 31) + { + case 31: h2 ^= ((U64)p[30]) << 48; + case 30: h2 ^= ((U64)p[29]) << 40; + case 29: h2 ^= ((U64)p[28]) << 32; + case 28: h2 ^= ((U64)p[27]) << 24; + case 27: h2 ^= ((U64)p[26]) << 16; + case 26: h2 ^= ((U64)p[25]) << 8; + case 25: h2 ^= ((U64)p[24]) << 0; + h1 ^= XXH_rotl64(h2 * PRIME64_2, 11) * PRIME64_1; + + case 24: h1 ^= ((U64)p[23]) << 56; + case 23: h1 ^= ((U64)p[22]) << 48; + case 22: h1 ^= ((U64)p[21]) << 40; + case 21: h1 ^= ((U64)p[20]) << 32; + case 20: h1 ^= ((U64)p[19]) << 24; + case 19: h1 ^= ((U64)p[18]) << 16; + case 18: h1 ^= ((U64)p[17]) << 8; + case 17: h1 ^= ((U64)p[16]) << 0; + h2 ^= XXH_rotl64(h1 * PRIME64_2, 11) * PRIME64_1; + + case 16: h2 ^= ((U64)p[15]) << 56; + case 15: h2 ^= ((U64)p[14]) << 48; + case 14: h2 ^= ((U64)p[13]) << 40; + case 13: h2 ^= ((U64)p[12]) << 32; + case 12: h2 ^= ((U64)p[11]) << 24; + case 11: h2 ^= ((U64)p[10]) << 16; + case 10: h2 ^= ((U64)p[9]) << 8; + case 9: h2 ^= ((U64)p[8]) << 0; + h1 ^= XXH_rotl64(h2 * PRIME64_2, 11) * PRIME64_1; + + case 8: h1 ^= ((U64)p[7]) << 56; + case 7: h1 ^= ((U64)p[6]) << 48; + case 6: h1 ^= ((U64)p[5]) << 40; + case 5: h1 ^= ((U64)p[4]) << 32; + case 4: h1 ^= ((U64)p[3]) << 24; + case 3: h1 ^= ((U64)p[2]) << 16; + case 2: h1 ^= ((U64)p[1]) << 8; + case 1: h1 ^= ((U64)p[0]) << 0; + h2 ^= XXH_rotl64(h1 * PRIME64_5, 11) * PRIME64_1; + } + + h1 = XXH_rotl64(h2, 27) * PRIME64_1 + PRIME64_4; + + h1 += (U64) len; + h2 += (U64) len; + + h2 ^= h1 >> 33; + h2 *= PRIME64_2; + h1 ^= h2 >> 29; + h1 *= PRIME64_3; + h2 ^= h1 >> 32; + + ((U64*)out)[0] = h1; + ((U64*)out)[1] = h2; + +#undef XXH_get64bits +} + +void XXH128 (const void* input, size_t len, unsigned long long seed, void* out) +{ +#if 0 + XXH128_state_t state; + XXH128_reset(&state, seed); + XXH128_update(&state, input, len); + XXH128_digest(&state, out); +#else + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + +# if !defined(XXH_USE_UNALIGNED_ACCESS) + if ((((size_t)input) & 7)==0) // Input is aligned, let's leverage the speed advantage + { + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + XXH128_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned, out); + else + XXH128_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned, out); + } +# endif + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + XXH128_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned, out); + else + XXH128_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned, out); +#endif +} + + +FORCE_INLINE void XXH256_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align, void* out) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* bEnd = p + len; + U64 h1, h2, h3, h4; + +#define XXH_get64bits(p) XXH_readLE64_align((const U64*)p, endian, align) + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (p==NULL) + { + len=0; + bEnd=p=(const BYTE*)(size_t)32; + } +#endif + + if (len>=32) + { + const BYTE* const limit = bEnd - 32; + U64 v1 = seed + PRIME64_1 + PRIME64_2; + U64 v2 = seed + PRIME64_2; + U64 v3 = seed + 0; + U64 v4 = seed - PRIME64_1; + + do + { + v1 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v1 = XXH_rotl64(v1, 31); + v1 *= PRIME64_1; + v2 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v2 = XXH_rotl64(v2, 31); + v2 *= PRIME64_1; + v3 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v3 = XXH_rotl64(v3, 31); + v3 *= PRIME64_1; + v4 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v4 = XXH_rotl64(v4, 31); + v4 *= PRIME64_1; + } + while (p<=limit); + + v1 *= PRIME64_2; + v1 = XXH_rotl64(v1, 31); + v1 *= PRIME64_1; + h1 = v1; + h2 = ( XXH_rotl64(h1, 27) + h1 ) * PRIME64_1 + PRIME64_2; + + v2 *= PRIME64_2; + v2 = XXH_rotl64(v2, 33); + v2 *= PRIME64_1; + h2 ^= v2; + h3 = ( XXH_rotl64(h2, 29) + h2 ) * PRIME64_2 + PRIME64_3; + + v3 *= PRIME64_2; + v3 = XXH_rotl64(v3, 29); + v3 *= PRIME64_1; + h3 ^= v3; + h4 = ( XXH_rotl64(h3, 31) + h3 ) * PRIME64_3 + PRIME64_4; + + v4 *= PRIME64_2; + v4 = XXH_rotl64(v4, 27); + v4 *= PRIME64_1; + h4 ^= v4; + h1 ^= ( XXH_rotl64(h4, 33) + h4 ) * PRIME64_4 + PRIME64_5; + } + else + { + h1 = seed + PRIME64_5; + h2 = seed + PRIME64_1; + h3 = seed + PRIME64_4; + h4 = seed + PRIME64_2; + } + + switch(len & 31) + { + case 31: h4 ^= ((U64)p[30]) << 48; + case 30: h4 ^= ((U64)p[29]) << 40; + case 29: h4 ^= ((U64)p[28]) << 32; + case 28: h4 ^= ((U64)p[27]) << 24; + case 27: h4 ^= ((U64)p[26]) << 16; + case 26: h4 ^= ((U64)p[25]) << 8; + case 25: h4 ^= ((U64)p[24]) << 0; + h3 ^= XXH_rotl64(h4 * PRIME64_5, 17) * PRIME64_1; + + case 24: h3 ^= ((U64)p[23]) << 56; + case 23: h3 ^= ((U64)p[22]) << 48; + case 22: h3 ^= ((U64)p[21]) << 40; + case 21: h3 ^= ((U64)p[20]) << 32; + case 20: h3 ^= ((U64)p[19]) << 24; + case 19: h3 ^= ((U64)p[18]) << 16; + case 18: h3 ^= ((U64)p[17]) << 8; + case 17: h3 ^= ((U64)p[16]) << 0; + h2 ^= XXH_rotl64(h3 * PRIME64_5, 13) * PRIME64_1; + + case 16: h2 ^= ((U64)p[15]) << 56; + case 15: h2 ^= ((U64)p[14]) << 48; + case 14: h2 ^= ((U64)p[13]) << 40; + case 13: h2 ^= ((U64)p[12]) << 32; + case 12: h2 ^= ((U64)p[11]) << 24; + case 11: h2 ^= ((U64)p[10]) << 16; + case 10: h2 ^= ((U64)p[9]) << 8; + case 9: h2 ^= ((U64)p[8]) << 0; + h1 ^= XXH_rotl64(h2 * PRIME64_5, 11) * PRIME64_1; + + case 8: h1 ^= ((U64)p[7]) << 56; + case 7: h1 ^= ((U64)p[6]) << 48; + case 6: h1 ^= ((U64)p[5]) << 40; + case 5: h1 ^= ((U64)p[4]) << 32; + case 4: h1 ^= ((U64)p[3]) << 24; + case 3: h1 ^= ((U64)p[2]) << 16; + case 2: h1 ^= ((U64)p[1]) << 8; + case 1: h1 ^= ((U64)p[0]) << 0; + h4 ^= XXH_rotl64(h1 * PRIME64_5, 7) * PRIME64_1; + } + + h2 ^= ( XXH_rotl64(h1, 27) + h1 ) * PRIME64_1 + PRIME64_4; + h3 ^= ( XXH_rotl64(h2, 29) + h2 ) * PRIME64_2 + PRIME64_3; + h4 ^= ( XXH_rotl64(h3, 31) + h3 ) * PRIME64_3 + PRIME64_2; + h1 ^= ( XXH_rotl64(h4, 33) + h4 ) * PRIME64_4 + PRIME64_1; + + h1 += (U64) len; + h2 += (U64) len; + h3 += (U64) len; + h4 += (U64) len; + + h4 ^= h1 >> 33; + h4 *= PRIME64_2; + h1 ^= h4 >> 29; + h1 *= PRIME64_3; + h4 ^= h1 >> 32; + + h3 ^= h2 >> 33; + h3 *= PRIME64_2; + h2 ^= h3 >> 29; + h2 *= PRIME64_3; + h3 ^= h2 >> 32; + + ((unsigned long long*)out)[0] = h1; + ((unsigned long long*)out)[1] = h2; + ((unsigned long long*)out)[2] = h3; + ((unsigned long long*)out)[3] = h4; + +#undef XXH_get64bits +} + +void XXH256 (const void* input, size_t len, unsigned long long seed, void* out) +{ +#if 0 + XXH256_state_t state; + XXH256_reset(&state, seed); + XXH256_update(&state, input, len); + XXH256_digest(&state, out); +#else + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + +# if !defined(XXH_USE_UNALIGNED_ACCESS) + if ((((size_t)input) & 7)==0) // Input is aligned, let's leverage the speed advantage + { + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + XXH256_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned, out); + else + XXH256_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned, out); + } +# endif + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + XXH256_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned, out); + else + XXH256_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned, out); +#endif +} + + /**************************************************** * Advanced Hash Functions ****************************************************/ @@ -526,6 +865,31 @@ typedef struct } XXH_istate64_t; +typedef struct +{ + U64 total_len; + U64 seed; + U64 v1; + U64 v2; + U64 v3; + U64 v4; + char memory[64]; + U32 memsize; +} XXH_istate128_t; + +typedef struct +{ + U64 total_len; + U64 seed; + U64 v1; + U64 v2; + U64 v3; + U64 v4; + char memory[64]; + U32 memsize; +} XXH_istate256_t; + + XXH32_state_t* XXH32_createState(void) { XXH_STATIC_ASSERT(sizeof(XXH32_state_t) >= sizeof(XXH_istate32_t)); // A compilation error here means XXH32_state_t is not large enough @@ -548,6 +912,17 @@ XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) return XXH_OK; }; +XXH128_state_t* XXH128_createState(void) +{ + XXH_STATIC_ASSERT(sizeof(XXH128_state_t) >= sizeof(XXH_istate128_t)); // A compilation error here means XXH128_state_t is not large enough + return (XXH128_state_t*)XXH_malloc(sizeof(XXH128_state_t)); +} +XXH_errorcode XXH128_freeState(XXH128_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +} + /*** Hash feed ***/ @@ -577,6 +952,32 @@ XXH_errorcode XXH64_reset(XXH64_state_t* state_in, unsigned long long seed) return XXH_OK; } +XXH_errorcode XXH128_reset(XXH128_state_t* state_in, unsigned long long seed) +{ + XXH_istate128_t* state = (XXH_istate128_t*) state_in; + state->seed = seed; + state->v1 = seed + PRIME64_1 + PRIME64_2; + state->v2 = seed + PRIME64_2; + state->v3 = seed + 0; + state->v4 = seed - PRIME64_1; + state->total_len = 0; + state->memsize = 0; + return XXH_OK; +} + +XXH_errorcode XXH256_reset(XXH256_state_t* state_in, unsigned long long seed) +{ + XXH_istate256_t* state = (XXH_istate256_t*) state_in; + state->seed = seed; + state->v1 = seed + PRIME64_1 + PRIME64_2; + state->v2 = seed + PRIME64_2; + state->v3 = seed + 0; + state->v4 = seed - PRIME64_1; + state->total_len = 0; + state->memsize = 0; + return XXH_OK; +} + FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state_in, const void* input, size_t len, XXH_endianess endian) { @@ -926,3 +1327,440 @@ unsigned long long XXH64_digest (const XXH64_state_t* state_in) } +FORCE_INLINE XXH_errorcode XXH128_update_endian (XXH128_state_t* state_in, const void* input, size_t len, XXH_endianess endian) +{ + XXH_istate128_t * state = (XXH_istate128_t *) state_in; + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; +#define XXH_get64bits(p) XXH_readLE64((const U64*)p, endian) + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (input==NULL) return XXH_ERROR; +#endif + + state->total_len += len; + + if (state->memsize + len < 32) // fill in tmp buffer + { + XXH_memcpy(state->memory + state->memsize, input, len); + state->memsize += (U32)len; + return XXH_OK; + } + + if (state->memsize) // some data left from previous update + { + XXH_memcpy(state->memory + state->memsize, input, 32-state->memsize); + { + const BYTE* ps = (const BYTE*)state->memory; + state->v1 += XXH_get64bits(ps) * PRIME64_2; + state->v1 = XXH_rotl64(state->v1, 31); + state->v1 *= PRIME64_1; + ps+=8; + state->v2 += XXH_get64bits(ps) * PRIME64_2; + state->v2 = XXH_rotl64(state->v2, 31); + state->v2 *= PRIME64_1; + ps+=8; + state->v3 += XXH_get64bits(ps) * PRIME64_2; + state->v3 = XXH_rotl64(state->v3, 31); + state->v3 *= PRIME64_1; + ps+=8; + state->v4 += XXH_get64bits(ps) * PRIME64_2; + state->v4 = XXH_rotl64(state->v4, 31); + state->v4 *= PRIME64_1; + ps+=8; + } + p += 32-state->memsize; + state->memsize = 0; + } + + if (p+32 <= bEnd) + { + const BYTE* const limit = bEnd - 32; + U64 v1 = state->v1; + U64 v2 = state->v2; + U64 v3 = state->v3; + U64 v4 = state->v4; + + do + { + v1 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v1 = XXH_rotl64(v1, 31); + v1 *= PRIME64_1; + v2 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v2 = XXH_rotl64(v2, 31); + v2 *= PRIME64_1; + v3 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v3 = XXH_rotl64(v3, 31); + v3 *= PRIME64_1; + v4 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v4 = XXH_rotl64(v4, 31); + v4 *= PRIME64_1; + } + while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) + { + XXH_memcpy(state->memory, p, bEnd-p); + state->memsize = (int)(bEnd-p); + } + + return XXH_OK; + +#undef XXH_get64bits +} + +XXH_errorcode XXH128_update (XXH128_state_t* state_in, const void* input, size_t len) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH128_update_endian(state_in, input, len, XXH_littleEndian); + else + return XXH128_update_endian(state_in, input, len, XXH_bigEndian); +} + + +FORCE_INLINE void XXH128_digest_endian (const XXH128_state_t* state_in, XXH_endianess endian, void* out) +{ + (void)endian; + XXH_istate128_t * state = (XXH_istate128_t *) state_in; + const BYTE * p = (const BYTE*)state->memory; + U64 h1, h2; + + if (state->total_len >= 32) + { + U64 v1 = state->v1; + U64 v2 = state->v2; + U64 v3 = state->v3; + U64 v4 = state->v4; + + v1 *= PRIME64_2; + v1 = XXH_rotl64(v1, 31); + v1 *= PRIME64_1; + h1 = v1; + h2 = ( XXH_rotl64(h1, 27) + h1 ) * PRIME64_1 + PRIME64_4; + + v2 *= PRIME64_2; + v2 = XXH_rotl64(v2, 33); + v2 *= PRIME64_1; + h2 ^= v2; + h1 ^= ( XXH_rotl64(h2, 27) + h2 ) * PRIME64_1 + PRIME64_4; + + v3 *= PRIME64_2; + v3 = XXH_rotl64(v3, 29); + v3 *= PRIME64_1; + h1 ^= v3; + h2 ^= ( XXH_rotl64(h1, 27) + h1 ) * PRIME64_1 + PRIME64_4; + + v4 *= PRIME64_2; + v4 = XXH_rotl64(v4, 27); + v4 *= PRIME64_1; + h2 ^= v4; + h1 ^= ( XXH_rotl64(h2, 27) + h2 ) * PRIME64_1 + PRIME64_4; + } + else + { + h1 = state->seed + PRIME64_5; + h2 = state->seed + PRIME64_1; + } + + switch(state->total_len & 31) + { + case 31: h2 ^= ((U64)p[30]) << 48; + case 30: h2 ^= ((U64)p[29]) << 40; + case 29: h2 ^= ((U64)p[28]) << 32; + case 28: h2 ^= ((U64)p[27]) << 24; + case 27: h2 ^= ((U64)p[26]) << 16; + case 26: h2 ^= ((U64)p[25]) << 8; + case 25: h2 ^= ((U64)p[24]) << 0; + h1 ^= XXH_rotl64(h2 * PRIME64_2, 11) * PRIME64_1; + + case 24: h1 ^= ((U64)p[23]) << 56; + case 23: h1 ^= ((U64)p[22]) << 48; + case 22: h1 ^= ((U64)p[21]) << 40; + case 21: h1 ^= ((U64)p[20]) << 32; + case 20: h1 ^= ((U64)p[19]) << 24; + case 19: h1 ^= ((U64)p[18]) << 16; + case 18: h1 ^= ((U64)p[17]) << 8; + case 17: h1 ^= ((U64)p[16]) << 0; + h2 ^= XXH_rotl64(h1 * PRIME64_2, 11) * PRIME64_1; + + case 16: h2 ^= ((U64)p[15]) << 56; + case 15: h2 ^= ((U64)p[14]) << 48; + case 14: h2 ^= ((U64)p[13]) << 40; + case 13: h2 ^= ((U64)p[12]) << 32; + case 12: h2 ^= ((U64)p[11]) << 24; + case 11: h2 ^= ((U64)p[10]) << 16; + case 10: h2 ^= ((U64)p[9]) << 8; + case 9: h2 ^= ((U64)p[8]) << 0; + h1 ^= XXH_rotl64(h2 * PRIME64_2, 11) * PRIME64_1; + + case 8: h1 ^= ((U64)p[7]) << 56; + case 7: h1 ^= ((U64)p[6]) << 48; + case 6: h1 ^= ((U64)p[5]) << 40; + case 5: h1 ^= ((U64)p[4]) << 32; + case 4: h1 ^= ((U64)p[3]) << 24; + case 3: h1 ^= ((U64)p[2]) << 16; + case 2: h1 ^= ((U64)p[1]) << 8; + case 1: h1 ^= ((U64)p[0]) << 0; + h2 ^= XXH_rotl64(h1 * PRIME64_5, 11) * PRIME64_1; + } + + h1 = XXH_rotl64(h2, 27) * PRIME64_1 + PRIME64_4; + + h1 += (U64) state->total_len; + h2 += (U64) state->total_len; + + h2 ^= h1 >> 33; + h2 *= PRIME64_2; + h1 ^= h2 >> 29; + h1 *= PRIME64_3; + h2 ^= h1 >> 32; + + ((U64*)out)[0] = h1; + ((U64*)out)[1] = h2; +} + +void XXH128_digest (const XXH128_state_t* state_in, void* out) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH128_digest_endian(state_in, XXH_littleEndian, (unsigned long long*)out); + else + return XXH128_digest_endian(state_in, XXH_bigEndian, (unsigned long long*)out); +} + + +FORCE_INLINE XXH_errorcode XXH256_update_endian (XXH256_state_t* state_in, const void* input, size_t len, XXH_endianess endian) +{ + XXH_istate256_t * state = (XXH_istate256_t *) state_in; + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (input==NULL) return XXH_ERROR; +#endif + + state->total_len += len; + + if (state->memsize + len < 32) // fill in tmp buffer + { + XXH_memcpy(state->memory + state->memsize, input, len); + state->memsize += (U32)len; + return XXH_OK; + } + + if (state->memsize) // some data left from previous update + { + XXH_memcpy(state->memory + state->memsize, input, 32-state->memsize); + { + const U64* p64 = (const U64*)state->memory; + state->v1 += XXH_readLE64(p64, endian) * PRIME64_2; + state->v1 = XXH_rotl64(state->v1, 31); + state->v1 *= PRIME64_1; + p64++; + state->v2 += XXH_readLE64(p64, endian) * PRIME64_2; + state->v2 = XXH_rotl64(state->v2, 31); + state->v2 *= PRIME64_1; + p64++; + state->v3 += XXH_readLE64(p64, endian) * PRIME64_2; + state->v3 = XXH_rotl64(state->v3, 31); + state->v3 *= PRIME64_1; + p64++; + state->v4 += XXH_readLE64(p64, endian) * PRIME64_2; + state->v4 = XXH_rotl64(state->v4, 31); + state->v4 *= PRIME64_1; + p64++; + } + p += 32-state->memsize; + state->memsize = 0; + } + + if (p+32 <= bEnd) + { + const BYTE* const limit = bEnd - 32; + U64 v1 = state->v1; + U64 v2 = state->v2; + U64 v3 = state->v3; + U64 v4 = state->v4; + + do + { + v1 += XXH_readLE64((const U64*)p+0, endian) * PRIME64_2; + v1 = XXH_rotl64(v1, 31) * PRIME64_1; + + v2 += XXH_readLE64((const U64*)p+1, endian) * PRIME64_2; + v2 = XXH_rotl64(v2, 31) * PRIME64_1; + + v3 += XXH_readLE64((const U64*)p+2, endian) * PRIME64_2; + v3 = XXH_rotl64(v3, 31) * PRIME64_1; + + v4 += XXH_readLE64((const U64*)p+3, endian) * PRIME64_2; + v4 = XXH_rotl64(v4, 31) * PRIME64_1; + + p+=32; + } + while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) + { + XXH_memcpy(state->memory, p, bEnd-p); + state->memsize = (int)(bEnd-p); + } + + return XXH_OK; +} + +XXH_errorcode XXH256_update (XXH256_state_t* state_in, const void* input, size_t len) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH256_update_endian(state_in, input, len, XXH_littleEndian); + else + return XXH256_update_endian(state_in, input, len, XXH_bigEndian); +} + + +FORCE_INLINE void XXH256_digest_endian (const XXH256_state_t* state_in, XXH_endianess endian, void* out) +{ + (void)endian; + XXH_istate256_t * state = (XXH_istate256_t *) state_in; + const BYTE * p = (const BYTE*)state->memory; + U64 h1, h2, h3, h4; + + if (state->total_len >= 32) + { + U64 v1 = state->v1; + U64 v2 = state->v2; + U64 v3 = state->v3; + U64 v4 = state->v4; + + v1 *= PRIME64_2; + v1 = XXH_rotl64(v1, 31); + v1 *= PRIME64_1; + h1 = v1; + h2 = ( XXH_rotl64(h1, 27) + h1 ) * PRIME64_1 + PRIME64_2; + + v2 *= PRIME64_2; + v2 = XXH_rotl64(v2, 33); + v2 *= PRIME64_1; + h2 ^= v2; + h3 = ( XXH_rotl64(h2, 29) + h2 ) * PRIME64_2 + PRIME64_3; + + v3 *= PRIME64_2; + v3 = XXH_rotl64(v3, 29); + v3 *= PRIME64_1; + h3 ^= v3; + h4 = ( XXH_rotl64(h3, 31) + h3 ) * PRIME64_3 + PRIME64_4; + + v4 *= PRIME64_2; + v4 = XXH_rotl64(v4, 27); + v4 *= PRIME64_1; + h4 ^= v4; + h1 ^= ( XXH_rotl64(h4, 33) + h4 ) * PRIME64_4 + PRIME64_5; + } + else + { + h1 = state->seed + PRIME64_5; + h2 = state->seed + PRIME64_1; + h3 = state->seed + PRIME64_4; + h4 = state->seed + PRIME64_2; + } + + switch(state->total_len & 31) + { + case 31: h4 ^= ((U64)p[30]) << 48; + case 30: h4 ^= ((U64)p[29]) << 40; + case 29: h4 ^= ((U64)p[28]) << 32; + case 28: h4 ^= ((U64)p[27]) << 24; + case 27: h4 ^= ((U64)p[26]) << 16; + case 26: h4 ^= ((U64)p[25]) << 8; + case 25: h4 ^= ((U64)p[24]) << 0; + h3 ^= XXH_rotl64(h4 * PRIME64_5, 17) * PRIME64_1; + + case 24: h3 ^= ((U64)p[23]) << 56; + case 23: h3 ^= ((U64)p[22]) << 48; + case 22: h3 ^= ((U64)p[21]) << 40; + case 21: h3 ^= ((U64)p[20]) << 32; + case 20: h3 ^= ((U64)p[19]) << 24; + case 19: h3 ^= ((U64)p[18]) << 16; + case 18: h3 ^= ((U64)p[17]) << 8; + case 17: h3 ^= ((U64)p[16]) << 0; + h2 ^= XXH_rotl64(h3 * PRIME64_5, 13) * PRIME64_1; + + case 16: h2 ^= ((U64)p[15]) << 56; + case 15: h2 ^= ((U64)p[14]) << 48; + case 14: h2 ^= ((U64)p[13]) << 40; + case 13: h2 ^= ((U64)p[12]) << 32; + case 12: h2 ^= ((U64)p[11]) << 24; + case 11: h2 ^= ((U64)p[10]) << 16; + case 10: h2 ^= ((U64)p[9]) << 8; + case 9: h2 ^= ((U64)p[8]) << 0; + h1 ^= XXH_rotl64(h2 * PRIME64_5, 11) * PRIME64_1; + + case 8: h1 ^= ((U64)p[7]) << 56; + case 7: h1 ^= ((U64)p[6]) << 48; + case 6: h1 ^= ((U64)p[5]) << 40; + case 5: h1 ^= ((U64)p[4]) << 32; + case 4: h1 ^= ((U64)p[3]) << 24; + case 3: h1 ^= ((U64)p[2]) << 16; + case 2: h1 ^= ((U64)p[1]) << 8; + case 1: h1 ^= ((U64)p[0]) << 0; + h4 ^= XXH_rotl64(h1 * PRIME64_5, 7) * PRIME64_1; + } + + h2 ^= ( XXH_rotl64(h1, 27) + h1 ) * PRIME64_1 + PRIME64_4; + h3 ^= ( XXH_rotl64(h2, 29) + h2 ) * PRIME64_2 + PRIME64_3; + h4 ^= ( XXH_rotl64(h3, 31) + h3 ) * PRIME64_3 + PRIME64_2; + h1 ^= ( XXH_rotl64(h4, 33) + h4 ) * PRIME64_4 + PRIME64_1; + + h1 += (U64) state->total_len; + h2 += (U64) state->total_len; + h3 += (U64) state->total_len; + h4 += (U64) state->total_len; + + h4 ^= h1 >> 33; + h4 *= PRIME64_2; + h1 ^= h4 >> 29; + h1 *= PRIME64_3; + h4 ^= h1 >> 32; + + h3 ^= h2 >> 33; + h3 *= PRIME64_2; + h2 ^= h3 >> 29; + h2 *= PRIME64_3; + h3 ^= h2 >> 32; + + ((unsigned long long*)out)[0] = h1; + ((unsigned long long*)out)[1] = h2; + ((unsigned long long*)out)[2] = h3; + ((unsigned long long*)out)[3] = h4; +} + +void XXH256_digest (const XXH256_state_t* state_in, void* out) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH256_digest_endian(state_in, XXH_littleEndian, (unsigned long long*)out); + else + return XXH256_digest_endian(state_in, XXH_bigEndian, (unsigned long long*)out); +} diff --git a/xxhash.h b/xxhash.h index 55b45015..07b34169 100644 --- a/xxhash.h +++ b/xxhash.h @@ -83,6 +83,8 @@ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; unsigned int XXH32 (const void* input, size_t length, unsigned seed); unsigned long long XXH64 (const void* input, size_t length, unsigned long long seed); +void XXH128 (const void* input, size_t length, unsigned long long seed, void* out); +void XXH256 (const void* input, size_t length, unsigned long long seed, void* out); /* XXH32() : @@ -93,6 +95,12 @@ XXH32() : Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s XXH64() : Calculate the 64-bits hash of sequence of length "len" stored at memory address "input". +XXH128(): + Calculate the 128-bits hash of sequence of length "len" stored at memory address "input". + Output is stored in the 16 byte array "out" +XXH256(): + Calculate the 256-bits hash of sequence of length "len" stored at memory address "input". + Output is stored in the 32 byte array "out" */ @@ -102,6 +110,8 @@ XXH64() : *****************************/ typedef struct { long long ll[ 6]; } XXH32_state_t; typedef struct { long long ll[11]; } XXH64_state_t; +typedef struct { long long ll[28]; } XXH128_state_t; +typedef struct { long long ll[28]; } XXH256_state_t; /* These structures allow static allocation of XXH states. @@ -116,6 +126,12 @@ XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); XXH64_state_t* XXH64_createState(void); XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); +XXH128_state_t* XXH128_createState(void); +XXH_errorcode XXH128_freeState(XXH128_state_t* statePtr); + +XXH256_state_t* XXH256_createState(void); +XXH_errorcode XXH256_freeState(XXH256_state_t* statePtr); + /* These functions create and release memory for XXH state. States must then be initialized using XXHnn_reset() before first use. @@ -130,6 +146,14 @@ XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long see XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); unsigned long long XXH64_digest (const XXH64_state_t* statePtr); +XXH_errorcode XXH128_reset (XXH128_state_t* statePtr, unsigned long long seed); +XXH_errorcode XXH128_update (XXH128_state_t* statePtr, const void* input, size_t length); +void XXH128_digest (const XXH128_state_t* statePtr, void* out); + +XXH_errorcode XXH256_reset (XXH256_state_t* statePtr, unsigned long long seed); +XXH_errorcode XXH256_update (XXH256_state_t* statePtr, const void* input, size_t length); +void XXH256_digest (const XXH256_state_t* statePtr, void* out); + /* These functions calculate the xxHash of an input provided in multiple smaller packets, as opposed to an input provided as a single block. @@ -150,7 +174,6 @@ and therefore get some new hashes, by calling again XXHnn_digest(). When you are done, don't forget to free XXH state space, using typically XXHnn_freeState(). */ - #if defined (__cplusplus) } #endif diff --git a/xxhsum.c b/xxhsum.c index 39347681..d1d12eca 100644 --- a/xxhsum.c +++ b/xxhsum.c @@ -21,35 +21,26 @@ You can contact the author at : - Discussion group : https://groups.google.com/forum/?fromgroups#!forum/lz4c */ -/************************************** - * Compiler Options - *************************************/ -/* MS Visual */ -#if defined(_MSC_VER) || defined(_WIN32) -# define _CRT_SECURE_NO_WARNINGS /* removes visual warnings */ -# define BMK_LEGACY_TIMER 1 /* gettimeofday() not supported by MSVC */ -#endif +//************************************** +// Compiler Options +//************************************** +// Visual warning messages (must be first line) +#define _CRT_SECURE_NO_WARNINGS -/* Under Linux at least, pull in the *64 commands */ +// Under Linux at least, pull in the *64 commands #define _LARGEFILE64_SOURCE -/************************************** - * Includes - *************************************/ +//************************************** +// Includes +//************************************** #include // malloc #include // fprintf, fopen, ftello64 #include // strcmp +#include // timeb #include // stat64 #include // stat64 -// Use ftime() if gettimeofday() is not available on your target -#if defined(BMK_LEGACY_TIMER) -# include // timeb, ftime -#else -# include // gettimeofday -#endif - #include "xxhash.h" @@ -93,8 +84,8 @@ You can contact the author at : #define TIMELOOP 2500 // Minimum timing per iteration #define PRIME 2654435761U -#define KB *(1<<10) -#define MB *(1<<20) +#define KB *(1U<<10) +#define MB *(1U<<20) #define GB *(1U<<30) #define MAX_MEM (2 GB - 64 MB) @@ -121,34 +112,18 @@ static int g_fn_selection = 1; // Benchmark Functions //********************************************************* -#if defined(BMK_LEGACY_TIMER) - static int BMK_GetMilliStart(void) { - // Based on Legacy ftime() - // Rolls over every ~ 12.1 days (0x100000/24/60/60) - // Use GetMilliSpan to correct for rollover - struct timeb tb; - int nCount; - ftime( &tb ); - nCount = (int) (tb.millitm + (tb.time & 0xfffff) * 1000); - return nCount; + // Supposed to be portable + // Rolls over every ~ 12.1 days (0x100000/24/60/60) + // Use GetMilliSpan to correct for rollover + struct timeb tb; + int nCount; + ftime( &tb ); + nCount = tb.millitm + (tb.time & 0xfffff) * 1000; + return nCount; } -#else - -static int BMK_GetMilliStart(void) -{ - // Based on newer gettimeofday() - // Use GetMilliSpan to correct for rollover - struct timeval tv; - int nCount; - gettimeofday(&tv, NULL); - nCount = (int) (tv.tv_usec/1000 + (tv.tv_sec & 0xfffff) * 1000); - return nCount; -} - -#endif static int BMK_GetMilliSpan( int nTimeStart ) { @@ -286,7 +261,7 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles) if ((double)milliTime < fastestC*nbHashes) fastestC = (double)milliTime/nbHashes; DISPLAY("%1i-%-14.14s : %10i -> %7.1f MB/s\r", interationNb, "XXH32", (int)benchedSize, (double)benchedSize / fastestC / 1000.); } - DISPLAY("%-16.16s : %10i -> %7.1f MB/s 0x%08X\n", "XXH32", (int)benchedSize, (double)benchedSize / fastestC / 1000., hashResult); + DISPLAY("%-16.16s : %10i -> %7.1f MB/s 0x%08x\n", "XXH32", (int)benchedSize, (double)benchedSize / fastestC / 1000., hashResult); totals += benchedSize; totalc += fastestC; @@ -355,7 +330,83 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles) if ((double)milliTime < fastestC*nbHashes) fastestC = (double)milliTime/nbHashes; DISPLAY("%1i-%-14.14s : %10i -> %7.1f MB/s\r", interationNb, "XXH64", (int)benchedSize, (double)benchedSize / fastestC / 1000.); } - DISPLAY("%-16.16s : %10i -> %7.1f MB/s 0x%08X%08X\n", "XXH64", (int)benchedSize, (double)benchedSize / fastestC / 1000., (U32)(h64>>32), (U32)(h64)); + DISPLAY("%-16.16s : %10i -> %7.1f MB/s 0x%08x%08x\n", "XXH64", (int)benchedSize, (double)benchedSize / fastestC / 1000., (U32)(h64>>32), (U32)(h64)); + + totals += benchedSize; + totalc += fastestC; + } + + // Bench XXH128 + { + int interationNb; + double fastestC = 100000000.; + unsigned long long h128[2] = {0, 0}; + + DISPLAY("\r%79s\r", ""); // Clean display line + for (interationNb = 1; interationNb <= g_nbIterations; interationNb++) + { + int nbHashes = 0; + int milliTime; + + DISPLAY("%1i-%-14.14s : %10i ->\r", interationNb, "XXH128", (int)benchedSize); + + // Hash loop + milliTime = BMK_GetMilliStart(); + while(BMK_GetMilliStart() == milliTime); + milliTime = BMK_GetMilliStart(); + while(BMK_GetMilliSpan(milliTime) < TIMELOOP) + { + int i; + for (i=0; i<100; i++) + { + XXH128(alignedBuffer, benchedSize, 0, h128); + nbHashes++; + } + } + milliTime = BMK_GetMilliSpan(milliTime); + if ((double)milliTime < fastestC*nbHashes) fastestC = (double)milliTime/nbHashes; + DISPLAY("%1i-%-14.14s : %10i -> %7.1f MB/s\r", interationNb, "XXH128", (int)benchedSize, (double)benchedSize / fastestC / 1000.); + } + DISPLAY("%-16.16s : %10i -> %7.1f MB/s 0x%08x%08x%08x%08x\n", "XXH128", (int)benchedSize, (double)benchedSize / fastestC / 1000., (U32)(h128[1]>>32), (U32)(h128[1]), (U32)(h128[0]>>32), (U32)(h128[0])); + + totals += benchedSize; + totalc += fastestC; + } + + // Bench XXH256 + { + int interationNb; + double fastestC = 100000000.; + unsigned long long h256[4] = {0, 0}; + + DISPLAY("\r%79s\r", ""); // Clean display line + for (interationNb = 1; interationNb <= g_nbIterations; interationNb++) + { + int nbHashes = 0; + int milliTime; + + DISPLAY("%1i-%-14.14s : %10i ->\r", interationNb, "XXH256", (int)benchedSize); + + // Hash loop + milliTime = BMK_GetMilliStart(); + while(BMK_GetMilliStart() == milliTime); + milliTime = BMK_GetMilliStart(); + while(BMK_GetMilliSpan(milliTime) < TIMELOOP) + { + int i; + for (i=0; i<100; i++) + { + XXH256(alignedBuffer, benchedSize, 0, h256); + nbHashes++; + } + } + milliTime = BMK_GetMilliSpan(milliTime); + if ((double)milliTime < fastestC*nbHashes) fastestC = (double)milliTime/nbHashes; + DISPLAY("%1i-%-14.14s : %10i -> %7.1f MB/s\r", interationNb, "XXH256", (int)benchedSize, (double)benchedSize / fastestC / 1000.); + } + DISPLAY("%-16.16s : %10i -> %7.1f MB/s 0x%08x%08x%08x%08x%08x%08x%08x%08x\n", "XXH256", (int)benchedSize, (double)benchedSize / fastestC / 1000., + (U32)(h256[3]>>32), (U32)(h256[3]), (U32)(h256[2]>>32), (U32)(h256[2]), + (U32)(h256[1]>>32), (U32)(h256[1]), (U32)(h256[0]>>32), (U32)(h256[0])); totals += benchedSize; totalc += fastestC; @@ -376,10 +427,10 @@ static void BMK_checkResult(U32 r1, U32 r2) { static int nbTests = 1; - if (r1==r2) DISPLAY("\rTest%3i : %08X == %08X ok ", nbTests, r1, r2); + if (r1==r2) DISPLAY("\rTest%3i : %08x == %08x ok ", nbTests, r1, r2); else { - DISPLAY("\rERROR : Test%3i : %08X <> %08X !!!!! \n", nbTests, r1, r2); + DISPLAY("\rERROR : Test%3i : %08x <> %08x !!!!! \n", nbTests, r1, r2); exit(1); } nbTests++; @@ -393,7 +444,7 @@ static void BMK_checkResult64(U64 r1, U64 r2) if (r1!=r2) { DISPLAY("\rERROR : Test%3i : 64-bits values non equals !!!!! \n", nbTests); - DISPLAY("\r %08X%08X != %08X%08X \n", (U32)(r1>>32), (U32)r1, (U32)(r2<<32), (U32)r2); + DISPLAY("\r %08x%08x != %08x%08x \n", (U32)(r1>>32), (U32)r1, (U32)(r2<<32), (U32)r2); exit(1); } nbTests++; @@ -484,7 +535,7 @@ int BMK_hash(char* fileName, U32 hashNb) size_t const blockSize = 64 KB; size_t readSize; char* buffer; - XXH64_state_t state; + XXH256_state_t state; // Check file existence inFile = fopen( fileName, "rb" ); @@ -510,7 +561,13 @@ int BMK_hash(char* fileName, U32 hashNb) XXH32_reset((XXH32_state_t*)&state, 0); break; case 1: - XXH64_reset(&state, 0); + XXH64_reset((XXH64_state_t*)&state, 0); + break; + case 2: + XXH128_reset((XXH128_state_t*)&state, 0); + break; + case 3: + XXH256_reset((XXH256_state_t*)&state, 0); break; default: DISPLAY("Error : bad hash algorithm ID\n"); @@ -532,7 +589,13 @@ int BMK_hash(char* fileName, U32 hashNb) XXH32_update((XXH32_state_t*)&state, buffer, readSize); break; case 1: - XXH64_update(&state, buffer, readSize); + XXH64_update((XXH64_state_t*)&state, buffer, readSize); + break; + case 2: + XXH128_update((XXH128_state_t*)&state, buffer, readSize); + break; + case 3: + XXH256_update((XXH256_state_t*)&state, buffer, readSize); break; default: break; @@ -552,10 +615,27 @@ int BMK_hash(char* fileName, U32 hashNb) } case 1: { - U64 h64 = XXH64_digest(&state); + U64 h64 = XXH64_digest((XXH64_state_t*)&state); DISPLAYRESULT("%08x%08x %s \n", (U32)(h64>>32), (U32)(h64), fileName); break; } + case 2: + { + U64 h64[2]; + XXH128_digest((XXH128_state_t*)&state, h64); + DISPLAYRESULT("%08x%08x%08x%08x %s \n", + (U32)(h64[1]>>32), (U32)(h64[1]), (U32)(h64[0]>>32), (U32)(h64[0]), fileName); + break; + } + case 3: + { + U64 h64[4]; + XXH256_digest((XXH256_state_t*)&state, h64); + DISPLAYRESULT("%08x%08x%08x%08x%08x%08x%08x%08x %s \n", + (U32)(h64[3]>>32), (U32)(h64[3]), (U32)(h64[2]>>32), (U32)(h64[2]), + (U32)(h64[1]>>32), (U32)(h64[1]), (U32)(h64[0]>>32), (U32)(h64[0]), fileName); + break; + } default: break; } @@ -574,7 +654,7 @@ int usage(char* exename) DISPLAY( "Usage :\n"); DISPLAY( " %s [arg] filename\n", exename); DISPLAY( "Arguments :\n"); - DISPLAY( " -H# : hash selection : 0=32bits, 1=64bits (default %i)\n", g_fn_selection); + DISPLAY( " -H# : hash selection : 0=32bits, 1=64bits, 2=128bits, 3=256bits (default %i)\n", g_fn_selection); DISPLAY( " -b : benchmark mode \n"); DISPLAY( " -i# : number of iterations (benchmark mode; default %i)\n", g_nbIterations); DISPLAY( " -h : help (this text)\n"); @@ -664,7 +744,7 @@ int main(int argc, char** argv) // No input filename ==> Error if(!input_filename) { badusage(exename); return 1; } - if(g_fn_selection < 0 || g_fn_selection > 1) { badusage(exename); return 1; } + if(g_fn_selection < 0 || g_fn_selection > 3) { badusage(exename); return 1; } return BMK_hash(argv[filenamesStart], g_fn_selection); } From ff6285a9f86052bdf32b4c075cd85d219c5ccf24 Mon Sep 17 00:00:00 2001 From: Mathias Westerdahl Date: Mon, 1 Dec 2014 22:59:59 -0500 Subject: [PATCH 2/3] Added missing create/free state for 256 version --- xxhash.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/xxhash.c b/xxhash.c index 85282995..08445475 100644 --- a/xxhash.c +++ b/xxhash.c @@ -923,6 +923,16 @@ XXH_errorcode XXH128_freeState(XXH128_state_t* statePtr) return XXH_OK; } +XXH256_state_t* XXH256_createState(void) +{ + XXH_STATIC_ASSERT(sizeof(XXH256_state_t) >= sizeof(XXH_istate256_t)); // A compilation error here means XXH256_state_t is not large enough + return (XXH256_state_t*)XXH_malloc(sizeof(XXH256_state_t)); +} +XXH_errorcode XXH256_freeState(XXH256_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +} /*** Hash feed ***/ From 4d31c1282db5baa2f1f10f3d2400221b521b5c1a Mon Sep 17 00:00:00 2001 From: Mathias Westerdahl Date: Thu, 18 Dec 2014 02:00:56 -0500 Subject: [PATCH 3/3] Fixed broken merge --- xxhash.c | 15 +++++++----- xxhsum.c | 75 +++++++++++++++++++++++++++++++++++++------------------- 2 files changed, 59 insertions(+), 31 deletions(-) diff --git a/xxhash.c b/xxhash.c index 440b2ed8..3bb38fd0 100644 --- a/xxhash.c +++ b/xxhash.c @@ -72,11 +72,11 @@ You can contact the author at : # define FORCE_INLINE static __forceinline #else # if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ -# ifdef __GNUC__ -# define FORCE_INLINE static inline __attribute__((always_inline)) -# else -# define FORCE_INLINE static inline -# endif +# ifdef __GNUC__ +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif # else # define FORCE_INLINE static # endif /* __STDC_VERSION__ */ @@ -99,6 +99,7 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) return memcpy(dest,src,size); } + /************************************** * Basic Types ***************************************/ @@ -117,7 +118,6 @@ typedef signed int S32; typedef unsigned long long U64; #endif - #if defined(__GNUC__) && !defined(XXH_USE_UNALIGNED_ACCESS) # define _PACKED __attribute__ ((packed)) #else @@ -190,6 +190,7 @@ static U64 XXH_swap64 (U64 x) } #endif + /************************************** * Constants ***************************************/ @@ -253,6 +254,7 @@ FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian) return XXH_readLE64_align(ptr, endian, XXH_unaligned); } + /**************************** * Simple Hash Functions *****************************/ @@ -916,6 +918,7 @@ XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) { XXH_free(statePtr); return XXH_OK; +} XXH128_state_t* XXH128_createState(void) { diff --git a/xxhsum.c b/xxhsum.c index cc492d9d..d93806e5 100644 --- a/xxhsum.c +++ b/xxhsum.c @@ -22,19 +22,22 @@ You can contact the author at : - public discussion board : https://groups.google.com/forum/#!forum/lz4c */ -//************************************** -// Compiler Options -//************************************** -// Visual warning messages (must be first line) -#define _CRT_SECURE_NO_WARNINGS +/************************************** + * Compiler Options + *************************************/ +/* MS Visual */ +#if defined(_MSC_VER) || defined(_WIN32) +# define _CRT_SECURE_NO_WARNINGS /* removes visual warnings */ +# define BMK_LEGACY_TIMER 1 /* gettimeofday() not supported by MSVC */ +#endif -// Under Linux at least, pull in the *64 commands +/* Under Linux at least, pull in the *64 commands */ #define _LARGEFILE64_SOURCE -//************************************** -// Includes -//************************************** +/************************************** + * Includes + *************************************/ #include /* malloc */ #include /* fprintf, fopen, ftello64, fread, stdin, stdout; when present : _fileno */ #include /* strcmp */ @@ -47,6 +50,13 @@ You can contact the author at : /************************************** * OS-Specific Includes *************************************/ +// Use ftime() if gettimeofday() is not available on your target +#if defined(BMK_LEGACY_TIMER) +# include // timeb, ftime +#else +# include // gettimeofday +#endif + #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__) # include // _O_BINARY # include // _setmode, _isatty @@ -98,8 +108,8 @@ You can contact the author at : #define TIMELOOP 2500 // Minimum timing per iteration #define PRIME 2654435761U -#define KB *(1U<<10) -#define MB *(1U<<20) +#define KB *(1<<10) +#define MB *(1<<20) #define GB *(1U<<30) #define MAX_MEM (2 GB - 64 MB) @@ -127,18 +137,34 @@ static int g_fn_selection = 1; // required within main() & usage() // Benchmark Functions //********************************************************* +#if defined(BMK_LEGACY_TIMER) + static int BMK_GetMilliStart(void) { - // Supposed to be portable + // Based on Legacy ftime() // Rolls over every ~ 12.1 days (0x100000/24/60/60) // Use GetMilliSpan to correct for rollover struct timeb tb; int nCount; ftime( &tb ); - nCount = tb.millitm + (tb.time & 0xfffff) * 1000; + nCount = (int) (tb.millitm + (tb.time & 0xfffff) * 1000); return nCount; } +#else + +static int BMK_GetMilliStart(void) +{ + // Based on newer gettimeofday() + // Use GetMilliSpan to correct for rollover + struct timeval tv; + int nCount; + gettimeofday(&tv, NULL); + nCount = (int) (tv.tv_usec/1000 + (tv.tv_sec & 0xfffff) * 1000); + return nCount; +} + +#endif static int BMK_GetMilliSpan( int nTimeStart ) { @@ -276,7 +302,7 @@ static int BMK_benchFile(char** fileNamesTable, int nbFiles) if ((double)milliTime < fastestC*nbHashes) fastestC = (double)milliTime/nbHashes; DISPLAY("%1i-%-14.14s : %10i -> %7.1f MB/s\r", interationNb, "XXH32", (int)benchedSize, (double)benchedSize / fastestC / 1000.); } - DISPLAY("%-16.16s : %10i -> %7.1f MB/s 0x%08x\n", "XXH32", (int)benchedSize, (double)benchedSize / fastestC / 1000., hashResult); + DISPLAY("%-16.16s : %10i -> %7.1f MB/s 0x%08X\n", "XXH32", (int)benchedSize, (double)benchedSize / fastestC / 1000., hashResult); totals += benchedSize; totalc += fastestC; @@ -345,7 +371,7 @@ static int BMK_benchFile(char** fileNamesTable, int nbFiles) if ((double)milliTime < fastestC*nbHashes) fastestC = (double)milliTime/nbHashes; DISPLAY("%1i-%-14.14s : %10i -> %7.1f MB/s\r", interationNb, "XXH64", (int)benchedSize, (double)benchedSize / fastestC / 1000.); } - DISPLAY("%-16.16s : %10i -> %7.1f MB/s 0x%08x%08x\n", "XXH64", (int)benchedSize, (double)benchedSize / fastestC / 1000., (U32)(h64>>32), (U32)(h64)); + DISPLAY("%-16.16s : %10i -> %7.1f MB/s 0x%08X%08X\n", "XXH64", (int)benchedSize, (double)benchedSize / fastestC / 1000., (U32)(h64>>32), (U32)(h64)); totals += benchedSize; totalc += fastestC; @@ -382,7 +408,7 @@ static int BMK_benchFile(char** fileNamesTable, int nbFiles) if ((double)milliTime < fastestC*nbHashes) fastestC = (double)milliTime/nbHashes; DISPLAY("%1i-%-14.14s : %10i -> %7.1f MB/s\r", interationNb, "XXH128", (int)benchedSize, (double)benchedSize / fastestC / 1000.); } - DISPLAY("%-16.16s : %10i -> %7.1f MB/s 0x%08x%08x%08x%08x\n", "XXH128", (int)benchedSize, (double)benchedSize / fastestC / 1000., (U32)(h128[1]>>32), (U32)(h128[1]), (U32)(h128[0]>>32), (U32)(h128[0])); + DISPLAY("%-16.16s : %10i -> %7.1f MB/s 0x%08X%08X%08X%08X\n", "XXH128", (int)benchedSize, (double)benchedSize / fastestC / 1000., (U32)(h128[1]>>32), (U32)(h128[1]), (U32)(h128[0]>>32), (U32)(h128[0])); totals += benchedSize; totalc += fastestC; @@ -419,7 +445,7 @@ static int BMK_benchFile(char** fileNamesTable, int nbFiles) if ((double)milliTime < fastestC*nbHashes) fastestC = (double)milliTime/nbHashes; DISPLAY("%1i-%-14.14s : %10i -> %7.1f MB/s\r", interationNb, "XXH256", (int)benchedSize, (double)benchedSize / fastestC / 1000.); } - DISPLAY("%-16.16s : %10i -> %7.1f MB/s 0x%08x%08x%08x%08x%08x%08x%08x%08x\n", "XXH256", (int)benchedSize, (double)benchedSize / fastestC / 1000., + DISPLAY("%-16.16s : %10i -> %7.1f MB/s 0x%08X%08X%08X%08X%08X%08X%08X%08X\n", "XXH256", (int)benchedSize, (double)benchedSize / fastestC / 1000., (U32)(h256[3]>>32), (U32)(h256[3]), (U32)(h256[2]>>32), (U32)(h256[2]), (U32)(h256[1]>>32), (U32)(h256[1]), (U32)(h256[0]>>32), (U32)(h256[0])); @@ -442,10 +468,10 @@ static void BMK_checkResult(U32 r1, U32 r2) { static int nbTests = 1; - if (r1==r2) DISPLAY("\rTest%3i : %08x == %08x ok ", nbTests, r1, r2); + if (r1==r2) DISPLAY("\rTest%3i : %08X == %08X ok ", nbTests, r1, r2); else { - DISPLAY("\rERROR : Test%3i : %08x <> %08x !!!!! \n", nbTests, r1, r2); + DISPLAY("\rERROR : Test%3i : %08X <> %08X !!!!! \n", nbTests, r1, r2); exit(1); } nbTests++; @@ -459,7 +485,7 @@ static void BMK_checkResult64(U64 r1, U64 r2) if (r1!=r2) { DISPLAY("\rERROR : Test%3i : 64-bits values non equals !!!!! \n", nbTests); - DISPLAY("\r %08x%08x != %08x%08x \n", (U32)(r1>>32), (U32)r1, (U32)(r2<<32), (U32)r2); + DISPLAY("\r %08X%08X != %08X%08X \n", (U32)(r1>>32), (U32)r1, (U32)(r2<<32), (U32)r2); exit(1); } nbTests++; @@ -651,17 +677,16 @@ static int BMK_hash(const char* fileName, U32 hashNb) { U64 h64[2]; XXH128_digest((XXH128_state_t*)&state, h64); - DISPLAYRESULT("%08x%08x%08x%08x %s \n", - (U32)(h64[1]>>32), (U32)(h64[1]), (U32)(h64[0]>>32), (U32)(h64[0]), fileName); + BMK_display_BigEndian(&h64[0], 16); + DISPLAYRESULT(" %s \n", fileName); break; } case 3: { U64 h64[4]; XXH256_digest((XXH256_state_t*)&state, h64); - DISPLAYRESULT("%08x%08x%08x%08x%08x%08x%08x%08x %s \n", - (U32)(h64[3]>>32), (U32)(h64[3]), (U32)(h64[2]>>32), (U32)(h64[2]), - (U32)(h64[1]>>32), (U32)(h64[1]), (U32)(h64[0]>>32), (U32)(h64[0]), fileName); + BMK_display_BigEndian(&h64[0], 32); + DISPLAYRESULT(" %s \n", fileName); break; } default: