1 /*-----------------------------------------------------------------------------
2 * MurmurHash3 was written by Austin Appleby, and is placed in the public
3 * domain.
4 *
5 * This is a c++ implementation of MurmurHash3_128 with support for progressive
6 * processing based on PMurHash implementation written by Shane Day.
7 */
8
9 /*-----------------------------------------------------------------------------
10
11 If you want to understand the MurmurHash algorithm you would be much better
12 off reading the original source. Just point your browser at:
13 http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
14
15
16 What this version provides?
17
18 1. Progressive data feeding. Useful when the entire payload to be hashed
19 does not fit in memory or when the data is streamed through the application.
20 Also useful when hashing a number of strings with a common prefix. A partial
21 hash of a prefix string can be generated and reused for each suffix string.
22
23 How does it work?
24
25 We can only process entire 128 bit chunks of input, except for the very end
26 that may be shorter. So along with the partial hash we need to give back to
27 the caller a carry containing up to 15 bytes that we were unable to process.
28 This carry also needs to record the number of bytes the carry holds. I use
29 the low 4 bits as a count (0..15) and the carry bytes are shifted into the
30 high byte in stream order.
31
32 To handle endianess I simply use a macro that reads an uint and define
33 that macro to be a direct read on little endian machines, a read and swap
34 on big endian machines.
35
36 -----------------------------------------------------------------------------*/
37
38
39 #include "PMurHash128.h"
40
41 /*-----------------------------------------------------------------------------
42 * Endianess, misalignment capabilities and util macros
43 *
44 * The following 5 macros are defined in this section. The other macros defined
45 * are only needed to help derive these 5.
46 *
47 * READ_UINT32(x,i) Read a little endian unsigned 32-bit int at index
48 * READ_UINT64(x,i) Read a little endian unsigned 64-bit int at index
49 * UNALIGNED_SAFE Defined if READ_UINTXX works on non-word boundaries
50 * ROTL32(x,r) Rotate x left by r bits
51 * ROTL64(x,r) Rotate x left by r bits
52 * BIG_CONSTANT
53 * FORCE_INLINE
54 */
55
56 /* I386 or AMD64 */
57 #if defined(_M_I86) || defined(_M_IX86) || defined(_X86_) || defined(__i386__) || defined(__i386) || defined(i386) \
58 || defined(_M_X64) || defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || defined(__amd64)
59 #define UNALIGNED_SAFE
60 #endif
61
62 /* Find best way to ROTL */
63 #if defined(_MSC_VER)
64 #define FORCE_INLINE static __forceinline
65 #include <stdlib.h> /* Microsoft put _rotl declaration in here */
66 #define ROTL32(x,y) _rotl(x,y)
67 #define ROTL64(x,y) _rotl64(x,y)
68 #define BIG_CONSTANT(x) (x)
69 #else
70 #define FORCE_INLINE static inline __attribute__((always_inline))
71 /* gcc recognises this code and generates a rotate instruction for CPUs with one */
72 #define ROTL32(x,r) (((uint32_t)x << r) | ((uint32_t)x >> (32 - r)))
73 #define ROTL64(x,r) (((uint64_t)x << r) | ((uint64_t)x >> (64 - r)))
74 #define BIG_CONSTANT(x) (x##LLU)
75 #endif
76
77 #include "endianness.h"
78
79 #define READ_UINT64(ptr,i) getblock64((uint64_t *)ptr,i)
80 #define READ_UINT32(ptr,i) getblock32((uint32_t *)ptr,i)
81
82 //-----------------------------------------------------------------------------
83 // Finalization mix - force all bits of a hash block to avalanche
84
fmix32(uint32_t h)85 FORCE_INLINE uint32_t fmix32 ( uint32_t h )
86 {
87 h ^= h >> 16;
88 h *= 0x85ebca6b;
89 h ^= h >> 13;
90 h *= 0xc2b2ae35;
91 h ^= h >> 16;
92
93 return h;
94 }
95
96 //----------
97
fmix64(uint64_t k)98 FORCE_INLINE uint64_t fmix64 ( uint64_t k )
99 {
100 k ^= k >> 33;
101 k *= BIG_CONSTANT(0xff51afd7ed558ccd);
102 k ^= k >> 33;
103 k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
104 k ^= k >> 33;
105
106 return k;
107 }
108
109 /*-----------------------------------------------------------------------------*
110 PMurHash128x86
111 *-----------------------------------------------------------------------------*/
112 /*-----------------------------------------------------------------------------
113 * Core murmurhash algorithm macros */
114
115 static const uint32_t kC1 = 0x239b961b;
116 static const uint32_t kC2 = 0xab0e9789;
117 static const uint32_t kC3 = 0x38b34ae5;
118 static const uint32_t kC4 = 0xa1e38b93;
119
120 /* This is the main processing body of the algorithm. It operates
121 * on each full 128-bits of input. */
122 #define doblock128x86(h1, h2, h3, h4, k1, k2, k3,k4)\
123 do {\
124 k1 *= kC1; k1 = ROTL32(k1,15); k1 *= kC2; h1 ^= k1;\
125 \
126 h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;\
127 \
128 k2 *= kC2; k2 = ROTL32(k2,16); k2 *= kC3; h2 ^= k2;\
129 \
130 h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;\
131 \
132 k3 *= kC3; k3 = ROTL32(k3,17); k3 *= kC4; h3 ^= k3;\
133 \
134 h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;\
135 \
136 k4 *= kC4; k4 = ROTL32(k4,18); k4 *= kC1; h4 ^= k4;\
137 \
138 h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;\
139 } while(0)
140
141 /* Append unaligned bytes to carry, forcing hash churn if we have 16 bytes */
142 /* cnt=bytes to process, h1-h4=hash k1-k4=carry, n=bytes in carry, ptr/len=payload */
143 #define dobytes128x86(cnt, h1, h2, h3, h4, k1, k2, k3, k4, n, ptr, len)\
144 do {\
145 unsigned __cnt = cnt;\
146 for(;__cnt--; len--) {\
147 switch(n) {\
148 case 0: case 1: case 2: case 3:\
149 k1 = k1>>8 | (uint32_t)*ptr++<<24;\
150 ++n; break;\
151 \
152 case 4: case 5: case 6: case 7:\
153 k2 = k2>>8 | (uint32_t)*ptr++<<24;\
154 ++n; break;\
155 \
156 case 8: case 9: case 10: case 11:\
157 k3 = k3>>8 | (uint32_t)*ptr++<<24;\
158 ++n; break;\
159 \
160 case 12: case 13: case 14:\
161 k4 = k4>>8 | (uint32_t)*ptr++<<24;\
162 ++n; break;\
163 \
164 case 15:\
165 k4 = k4>>8 | (uint32_t)*ptr++<<24;\
166 doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4);\
167 n = 0; break;\
168 }\
169 }\
170 } while(0)
171
172 /* Finalize a hash. To match the original Murmur3_128x86 the total_length must be provided */
PMurHash128x86_Result(const uint32_t ph[4],const uint32_t pcarry[4],uint32_t total_length,uint32_t out[4])173 void PMurHash128x86_Result(const uint32_t ph[4], const uint32_t pcarry[4], uint32_t total_length, uint32_t out[4])
174 {
175 uint32_t h1 = ph[0];
176 uint32_t h2 = ph[1];
177 uint32_t h3 = ph[2];
178 uint32_t h4 = ph[3];
179
180 uint32_t k1, k2, k3, k4 = pcarry[3];
181
182 int n = k4 & 15;
183 switch(n) {
184 case 1: case 2: case 3: case 4:
185 k1 = pcarry[0] >> (4-n)*8;
186 goto finrot_k1;
187
188 case 5: case 6: case 7: case 8:
189 k2 = pcarry[1] >> (8-n)*8;
190 goto finrot_k21;
191
192 case 9: case 10: case 11: case 12:
193 k3 = pcarry[2] >> (12-n)*8;
194 goto finrot_k321;
195
196 case 13: case 14: case 15:
197 k4 >>= (16-n)*8;
198 goto finrot_k4321;
199
200 default:
201 goto skiprot;
202 }
203 finrot_k4321:
204 k4 *= kC4; k4 = ROTL32(k4,18); k4 *= kC1; h4 ^= k4;
205 k3 = pcarry[2];
206 finrot_k321:
207 k3 *= kC3; k3 = ROTL32(k3,17); k3 *= kC4; h3 ^= k3;
208 k2 = pcarry[1];
209 finrot_k21:
210 k2 *= kC2; k2 = ROTL32(k2,16); k2 *= kC3; h2 ^= k2;
211 k1 = pcarry[0];
212 finrot_k1:
213 k1 *= kC1; k1 = ROTL32(k1,15); k1 *= kC2; h1 ^= k1;
214 skiprot:
215
216 //----------
217 // finalization
218
219 h1 ^= total_length; h2 ^= total_length;
220 h3 ^= total_length; h4 ^= total_length;
221
222 h1 += h2; h1 += h3; h1 += h4;
223 h2 += h1; h3 += h1; h4 += h1;
224
225 h1 = fmix32(h1);
226 h2 = fmix32(h2);
227 h3 = fmix32(h3);
228 h4 = fmix32(h4);
229
230 h1 += h2; h1 += h3; h1 += h4;
231 h2 += h1; h3 += h1; h4 += h1;
232
233 out[0] = h1;
234 out[1] = h2;
235 out[2] = h3;
236 out[3] = h4;
237 }
238
239 /*---------------------------------------------------------------------------*/
240
241 /* Main hashing function. Initialise carry[4] to {0,0,0,0} and h[4] to an initial {seed,seed,seed,seed}
242 * if wanted. Both ph and pcarry are required arguments. */
PMurHash128x86_Process(uint32_t ph[4],uint32_t pcarry[4],const void * const key,int len)243 void PMurHash128x86_Process(uint32_t ph[4], uint32_t pcarry[4], const void * const key, int len)
244 {
245 uint32_t h1 = ph[0];
246 uint32_t h2 = ph[1];
247 uint32_t h3 = ph[2];
248 uint32_t h4 = ph[3];
249
250 uint32_t k1 = pcarry[0];
251 uint32_t k2 = pcarry[1];
252 uint32_t k3 = pcarry[2];
253 uint32_t k4 = pcarry[3];
254
255 const uint8_t *ptr = (uint8_t*)key;
256 const uint8_t *end;
257
258 /* Extract carry count from low 4 bits of c value */
259 int n = k4 & 15;
260
261 #if defined(UNALIGNED_SAFE)
262 /* This CPU handles unaligned word access */
263 // #pragma message ( "UNALIGNED_SAFE" )
264 /* Consume any carry bytes */
265 int i = (16-n) & 15;
266 if(i && i <= len) {
267 dobytes128x86(i, h1, h2, h3, h4, k1, k2, k3, k4, n, ptr, len);
268 }
269
270 /* Process 128-bit chunks */
271 end = ptr + (len & ~15);
272 for( ; ptr < end ; ptr+=16) {
273 k1 = READ_UINT32(ptr, 0);
274 k2 = READ_UINT32(ptr, 1);
275 k3 = READ_UINT32(ptr, 2);
276 k4 = READ_UINT32(ptr, 3);
277 doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4);
278 }
279
280 #else /*UNALIGNED_SAFE*/
281 /* This CPU does not handle unaligned word access */
282 // #pragma message ( "ALIGNED" )
283 /* Consume enough so that the next data byte is word aligned */
284 int i = -(intptr_t)(void *)ptr & 3;
285 if(i && i <= len) {
286 dobytes128x86(i, h1, h2, h3, h4, k1, k2, k3, k4, n, ptr, len);
287 }
288 /* We're now aligned. Process in aligned blocks. Specialise for each possible carry count */
289 end = ptr + (len & ~15);
290
291 switch(n) { /* how many bytes in c */
292 case 0: /*
293 k1=[----] k2=[----] k2=[----] k4=[----] w=[3210 7654 ba98 fedc] b=[3210 7654 ba98 fedc] */
294 for( ; ptr < end ; ptr+=16) {
295 k1 = READ_UINT32(ptr, 0);
296 k2 = READ_UINT32(ptr, 1);
297 k3 = READ_UINT32(ptr, 2);
298 k4 = READ_UINT32(ptr, 3);
299 doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4);
300 }
301 break;
302 case 1: case 2: case 3: /*
303 k1=[10--] k2=[----] k3=[----] k4=[----] w=[5432 9876 dcba hgfe] b=[3210 7654 ba98 fedc] k1'=[hg--] */
304 {
305 const int lshift = n*8, rshift = 32-lshift;
306 for( ; ptr < end ; ptr+=16) {
307 uint32_t c = k1>>rshift; // --10
308 k2 = READ_UINT32(ptr, 0); // 5432
309 c |= k2<<lshift; // 3210.
310 k1 = READ_UINT32(ptr, 1); // 9876
311 k2 = k1<<lshift | k2>>rshift; // 7654.
312 k4 = READ_UINT32(ptr, 2); // dcba
313 k3 = k4<<lshift | k1>>rshift; // ba98.
314 k1 = READ_UINT32(ptr, 3); // hgfe.
315 k4 = k1<<lshift | k4>>rshift; // fedc.
316 doblock128x86(h1, h2, h3, h4, c, k2, k3, k4);
317 }
318 }
319 break;
320 case 4: /*
321 k1=[3210] k2=[----] k3=[----] k4=[----] w=[7654 ba98 fedc jihg] b=[3210 7654 ba98 fedc] k1'=[jihg] */
322 for( ; ptr < end ; ptr+=16) {
323 k2 = READ_UINT32(ptr, 0);
324 k3 = READ_UINT32(ptr, 1);
325 k4 = READ_UINT32(ptr, 2);
326 doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4);
327 k1 = READ_UINT32(ptr, 3);
328 }
329 break;
330 case 5: case 6: case 7: /*
331 k1=[3210] k2=[54--] k3=[----] k4=[----] w=[9876 dcba hgfe lkji] b=[3210 7654 ba98 fedc] k1'=[jihg] k2'=[lk--] */
332 {
333 const int lshift = n*8-32, rshift = 32-lshift;
334 for( ; ptr < end ; ptr+=16) {
335 uint32_t c = k2>>rshift; // --54
336 k3 = READ_UINT32(ptr, 0); // 9876
337 c |= k3<<lshift; // 7654.
338 k4 = READ_UINT32(ptr, 1); // dcba
339 k3 = k4<<lshift | k3>>rshift; // ba98.
340 k2 = READ_UINT32(ptr, 2); // hgfe
341 k4 = k2<<lshift | k4>>rshift; // fedc.
342 doblock128x86(h1, h2, h3, h4, k1, c, k3, k4);
343 k1 = k2>>rshift; // --hg
344 k2 = READ_UINT32(ptr, 3); // lkji.
345 k1 |= k2<<lshift; // jihg.
346 }
347 }
348 case 8: /*
349 k1=[3210] k2=[7654] k3=[----] k4=[----] w=[ba98 fedc jihg nmlk] b=[3210 7654 ba98 fedc] k1'=[jihg] k2'=[nmlk] */
350 for( ; ptr < end ; ptr+=16) {
351 k3 = READ_UINT32(ptr, 0);
352 k4 = READ_UINT32(ptr, 1);
353 doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4);
354 k1 = READ_UINT32(ptr, 2);
355 k2 = READ_UINT32(ptr, 3);
356 }
357 break;
358 case 9: case 10: case 11: /*
359 k1=[3210] k2=[7654] k3=[98--] k4=[----] w=[dcba hgfe lkji ponm] b=[3210 7654 ba98 fedc] k1'=[jihg] k2'=[nmlk] k3'=[po--] */
360 {
361 const int lshift = n*8-64, rshift = 32-lshift;
362 for( ; ptr < end ; ptr+=16) {
363 uint32_t c = k3>>rshift; // --98
364 k4 = READ_UINT32(ptr, 0); // dcba
365 c |= k4<<lshift; // ba98.
366 k3 = READ_UINT32(ptr, 1); // hgfe
367 k4 = k3<<lshift | k4>>rshift; // fedc.
368 doblock128x86(h1, h2, h3, h4, k1, k2, c, k4);
369 k2 = READ_UINT32(ptr, 2); // lkji
370 k1 = k2<<lshift | k3>>rshift; // jihg.
371 k3 = READ_UINT32(ptr, 3); // ponm.
372 k2 = k3<<lshift | k2>>rshift; // nmlk.
373 }
374 }
375 case 12: /*
376 k1=[3210] k2=[7654] k3=[ba98] k4=[----] w=[fedc jihg nmlk rqpo] b=[3210 7654 ba98 fedc] k1'=[jihg] k2'=[nmlk] k3'=[rqpo] */
377 for( ; ptr < end ; ptr+=16) {
378 k4 = READ_UINT32(ptr, 0);
379 doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4);
380 k1 = READ_UINT32(ptr, 1);
381 k2 = READ_UINT32(ptr, 2);
382 k3 = READ_UINT32(ptr, 3);
383 }
384 break;
385 default: /* 12 < n <= 15
386 k1=[3210] k2=[7654] k3=[ba98] k4=[dc--] w=[hgfe lkji ponm tsrq] b=[3210 7654 ba98 fedc] k1'=[jihg] k2'=[nmlk] k3'=[rqpo] k3'=[ts--] */
387 {
388 const int lshift = n*8-96, rshift = 32-lshift;
389 for( ; ptr < end ; ptr+=16) {
390 uint32_t c = k4>>rshift; // --dc
391 k4 = READ_UINT32(ptr, 0); // hgfe
392 c |= k4<<lshift; // fedc.
393 doblock128x86(h1, h2, h3, h4, k1, k2, k3, c);
394 k3 = READ_UINT32(ptr, 1); // lkji
395 k1 = k3<<lshift | k4>>rshift; // jihg.
396 c = READ_UINT32(ptr, 2); // ponm
397 k2 = c<<lshift | k3>>rshift; // nmlk.
398 k4 = READ_UINT32(ptr, 3); // tsrq.
399 k3 = k4<<lshift | c>>rshift; // rqpo.
400 }
401 }
402 }
403 #endif /*UNALIGNED_SAFE*/
404
405 /* Advance over whole 128-bit chunks, possibly leaving 1..15 bytes */
406 len -= len & ~15;
407
408 /* Append any remaining bytes into carry */
409 dobytes128x86(len, h1, h2, h3, h4, k1, k2, k3, k4, n, ptr, len);
410
411 /* Copy out new running hash and carry */
412 ph[0] = h1;
413 ph[1] = h2;
414 ph[2] = h3;
415 ph[3] = h4;
416 pcarry[0] = k1;
417 pcarry[1] = k2;
418 pcarry[2] = k3;
419 pcarry[3] = (k4 & ~0xff) | n;
420 }
421
422 /*---------------------------------------------------------------------------*/
423
424 /* All in one go */
425
426 /* MurmurHash3_x86_128 api */
PMurHash128x86(const void * key,const int len,uint32_t seed,void * out)427 void PMurHash128x86(const void * key, const int len, uint32_t seed, void * out)
428 {
429 uint32_t carry[4] = {0, 0, 0, 0};
430 uint32_t h[4] = {seed, seed, seed, seed};
431 PMurHash128x86_Process(h, carry, key, len);
432 PMurHash128x86_Result(h, carry, (uint32_t) len, (uint32_t *) out);
433 }
434
435 /*-----------------------------------------------------------------------------*
436 PMurHash128x64
437 *-----------------------------------------------------------------------------*/
438 /*-----------------------------------------------------------------------------
439 * Core murmurhash algorithm macros */
440
441 static const uint64_t kC1L = BIG_CONSTANT(0x87c37b91114253d5);
442 static const uint64_t kC2L = BIG_CONSTANT(0x4cf5ad432745937f);
443
444 /* This is the main processing body of the algorithm. It operates
445 * on each full 128-bits of input. */
446 #define doblock128x64(h1, h2, k1, k2)\
447 do {\
448 k1 *= kC1L; k1 = ROTL64(k1,31); k1 *= kC2L; h1 ^= k1;\
449 \
450 h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;\
451 \
452 k2 *= kC2L; k2 = ROTL64(k2,33); k2 *= kC1L; h2 ^= k2;\
453 \
454 h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;\
455 } while(0)
456
457 /* Append unaligned bytes to carry, forcing hash churn if we have 16 bytes */
458 /* cnt=bytes to process, h1,h2=hash k1,k2=carry, n=bytes in carry, ptr/len=payload */
459 #define dobytes128x64(cnt, h1, h2, k1, k2, n, ptr, len) \
460 do {\
461 unsigned __cnt = cnt;\
462 for(;__cnt--; len--) {\
463 switch(n) {\
464 case 0: case 1: case 2: case 3:\
465 case 4: case 5: case 6: case 7:\
466 k1 = k1>>8 | (uint64_t)*ptr++<<56;\
467 n++; break;\
468 \
469 case 8: case 9: case 10: case 11:\
470 case 12: case 13: case 14:\
471 k2 = k2>>8 | (uint64_t)*ptr++<<56;\
472 n++; break;\
473 \
474 case 15:\
475 k2 = k2>>8 | (uint64_t)*ptr++<<56;\
476 doblock128x64(h1, h2, k1, k2);\
477 n = 0; break;\
478 }\
479 }\
480 } while(0)
481
482 /* Finalize a hash. To match the original Murmur3_128x64 the total_length must be provided */
PMurHash128x64_Result(const uint64_t ph[2],const uint64_t pcarry[2],const uint32_t total_length,uint64_t out[2])483 void PMurHash128x64_Result(const uint64_t ph[2], const uint64_t pcarry[2],
484 const uint32_t total_length, uint64_t out[2])
485 {
486 uint64_t h1 = ph[0];
487 uint64_t h2 = ph[1];
488
489 uint64_t k1;
490 uint64_t k2 = pcarry[1];
491
492 int n = k2 & 15;
493 if (n) {
494 k1 = pcarry[0];
495 if (n > 8) {
496 k2 >>= (16-n)*8;
497 k2 *= kC2L; k2 = ROTL64(k2,33); k2 *= kC1L; h2 ^= k2;
498 } else {
499 k1 >>= (8-n)*8;
500 }
501 k1 *= kC1L; k1 = ROTL64(k1,31); k1 *= kC2L; h1 ^= k1;
502 }
503
504 //----------
505 // finalization
506
507 h1 ^= total_length; h2 ^= total_length;
508
509 h1 += h2;
510 h2 += h1;
511
512 h1 = fmix64(h1);
513 h2 = fmix64(h2);
514
515 h1 += h2;
516 h2 += h1;
517
518 out[0] = h1;
519 out[1] = h2;
520 }
521
522 /*---------------------------------------------------------------------------*/
523
524 /* Main hashing function. Initialise carry[2] to {0,0} and h[2] to an initial {seed,seed}
525 * if wanted. Both ph and pcarry are required arguments. */
PMurHash128x64_Process(uint64_t ph[2],uint64_t pcarry[2],const void * const key,int len)526 void PMurHash128x64_Process(uint64_t ph[2], uint64_t pcarry[2], const void * const key, int len)
527 {
528 uint64_t h1 = ph[0];
529 uint64_t h2 = ph[1];
530
531 uint64_t k1 = pcarry[0];
532 uint64_t k2 = pcarry[1];
533
534 const uint8_t *ptr = (uint8_t*)key;
535 const uint8_t *end;
536
537 /* Extract carry count from low 4 bits of c value */
538 int n = k2 & 15;
539
540 #if defined(UNALIGNED_SAFE)
541 /* This CPU handles unaligned word access */
542 // #pragma message ( "UNALIGNED_SAFE" )
543 /* Consume any carry bytes */
544 int i = (16-n) & 15;
545 if(i && i <= len) {
546 dobytes128x64(i, h1, h2, k1, k2, n, ptr, len);
547 }
548
549 /* Process 128-bit chunks */
550 end = ptr + (len & ~15);
551 for( ; ptr < end ; ptr+=16) {
552 k1 = READ_UINT64(ptr, 0);
553 k2 = READ_UINT64(ptr, 1);
554 doblock128x64(h1, h2, k1, k2);
555 }
556
557 #else /*UNALIGNED_SAFE*/
558 /* This CPU does not handle unaligned word access */
559 // #pragma message ( "ALIGNED" )
560 /* Consume enough so that the next data byte is word aligned */
561 int i = -(intptr_t)(void *)ptr & 7;
562 if(i && i <= len) {
563 dobytes128x64(i, h1, h2, k1, k2, n, ptr, len);
564 }
565 /* We're now aligned. Process in aligned blocks. Specialise for each possible carry count */
566 end = ptr + (len & ~15);
567
568 switch(n) { /* how many bytes in c */
569 case 0: /*
570 k1=[--------] k2=[--------] w=[76543210 fedcba98] b=[76543210 fedcba98] */
571 for( ; ptr < end ; ptr+=16) {
572 k1 = READ_UINT64(ptr, 0);
573 k2 = READ_UINT64(ptr, 1);
574 doblock128x64(h1, h2, k1, k2);
575 }
576 break;
577 case 1: case 2: case 3: case 4: case 5: case 6: case 7: /*
578 k1=[10------] k2=[--------] w=[98765432 hgfedcba] b=[76543210 fedcba98] k1'=[hg------] */
579 {
580 const int lshift = n*8, rshift = 64-lshift;
581 for( ; ptr < end ; ptr+=16) {
582 uint64_t c = k1>>rshift;
583 k2 = READ_UINT64(ptr, 0);
584 c |= k2<<lshift;
585 k1 = READ_UINT64(ptr, 1);
586 k2 = k2>>rshift | k1<<lshift;
587 doblock128x64(h1, h2, c, k2);
588 }
589 }
590 break;
591 case 8: /*
592 k1=[76543210] k2=[--------] w=[fedcba98 nmlkjihg] b=[76543210 fedcba98] k1`=[nmlkjihg] */
593 for( ; ptr < end ; ptr+=16) {
594 k2 = READ_UINT64(ptr, 0);
595 doblock128x64(h1, h2, k1, k2);
596 k1 = READ_UINT64(ptr, 1);
597 }
598 break;
599 default: /* 8 < n <= 15
600 k1=[76543210] k2=[98------] w=[hgfedcba ponmlkji] b=[76543210 fedcba98] k1`=[nmlkjihg] k2`=[po------] */
601 {
602 const int lshift = n*8-64, rshift = 64-lshift;
603 for( ; ptr < end ; ptr+=16) {
604 uint64_t c = k2 >> rshift;
605 k2 = READ_UINT64(ptr, 0);
606 c |= k2 << lshift;
607 doblock128x64(h1, h2, k1, c);
608 k1 = k2 >> rshift;
609 k2 = READ_UINT64(ptr, 1);
610 k1 |= k2 << lshift;
611 }
612 }
613 }
614 #endif /*UNALIGNED_SAFE*/
615
616 /* Advance over whole 128-bit chunks, possibly leaving 1..15 bytes */
617 len -= len & ~15;
618
619 /* Append any remaining bytes into carry */
620 dobytes128x64(len, h1, h2, k1, k2, n, ptr, len);
621
622 /* Copy out new running hash and carry */
623 ph[0] = h1;
624 ph[1] = h2;
625 pcarry[0] = k1;
626 pcarry[1] = (k2 & ~0xff) | n;
627 }
628
629 /*---------------------------------------------------------------------------*/
630
631 /* All in one go */
632
633 /* MurmurHash3_x64_128 api */
PMurHash128x64(const void * key,const int len,uint32_t seed,void * out)634 void PMurHash128x64(const void * key, const int len, uint32_t seed, void * out)
635 {
636 uint64_t carry[2] = {0, 0};
637 uint64_t h[2] = {seed, seed};
638 PMurHash128x64_Process(h, carry, key, len);
639 PMurHash128x64_Result(h, carry, (uint32_t) len, (uint64_t *) out);
640 }
641