xref: /php-src/ext/hash/murmur/PMurHash128.c (revision e0e3d985)
1 /*-----------------------------------------------------------------------------
2  * MurmurHash3 was written by Austin Appleby, and is placed in the public
3  * domain.
4  *
5  * This is a c++ implementation of MurmurHash3_128 with support for progressive
6  * processing based on PMurHash implementation written by Shane Day.
7  */
8 
9 /*-----------------------------------------------------------------------------
10 
11 If you want to understand the MurmurHash algorithm you would be much better
12 off reading the original source. Just point your browser at:
13 http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
14 
15 
16 What this version provides?
17 
18 1. Progressive data feeding. Useful when the entire payload to be hashed
19 does not fit in memory or when the data is streamed through the application.
20 Also useful when hashing a number of strings with a common prefix. A partial
21 hash of a prefix string can be generated and reused for each suffix string.
22 
23 How does it work?
24 
25 We can only process entire 128 bit chunks of input, except for the very end
26 that may be shorter. So along with the partial hash we need to give back to
27 the caller a carry containing up to 15 bytes that we were unable to process.
28 This carry also needs to record the number of bytes the carry holds. I use
29 the low 4 bits as a count (0..15) and the carry bytes are shifted into the
30 high byte in stream order.
31 
32 To handle endianess I simply use a macro that reads an uint and define
33 that macro to be a direct read on little endian machines, a read and swap
34 on big endian machines.
35 
36 -----------------------------------------------------------------------------*/
37 
38 
39 #include "PMurHash128.h"
40 
41 /*-----------------------------------------------------------------------------
42  * Endianess, misalignment capabilities and util macros
43  *
44  * The following 5 macros are defined in this section. The other macros defined
45  * are only needed to help derive these 5.
46  *
47  * READ_UINT32(x,i) Read a little endian unsigned 32-bit int at index
48  * READ_UINT64(x,i) Read a little endian unsigned 64-bit int at index
49  * UNALIGNED_SAFE   Defined if READ_UINTXX works on non-word boundaries
50  * ROTL32(x,r)      Rotate x left by r bits
51  * ROTL64(x,r)      Rotate x left by r bits
52  * BIG_CONSTANT
53  * FORCE_INLINE
54  */
55 
56 /* I386 or AMD64 */
57 #if defined(_M_I86) || defined(_M_IX86) || defined(_X86_) || defined(__i386__) || defined(__i386) || defined(i386) \
58  || defined(_M_X64) || defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || defined(__amd64)
59   #define UNALIGNED_SAFE
60 #endif
61 
62 /* Find best way to ROTL */
63 #if defined(_MSC_VER)
64   #define FORCE_INLINE  static __forceinline
65   #include <stdlib.h>  /* Microsoft put _rotl declaration in here */
66   #define ROTL32(x,y)  _rotl(x,y)
67   #define ROTL64(x,y)  _rotl64(x,y)
68   #define BIG_CONSTANT(x) (x)
69 #else
70   #define FORCE_INLINE static inline __attribute__((always_inline))
71   /* gcc recognises this code and generates a rotate instruction for CPUs with one */
72   #define ROTL32(x,r)  (((uint32_t)x << r) | ((uint32_t)x >> (32 - r)))
73   #define ROTL64(x,r)  (((uint64_t)x << r) | ((uint64_t)x >> (64 - r)))
74   #define BIG_CONSTANT(x) (x##LLU)
75 #endif
76 
77 #include "endianness.h"
78 
79 #define READ_UINT64(ptr,i) getblock64((uint64_t *)ptr,i)
80 #define READ_UINT32(ptr,i) getblock32((uint32_t *)ptr,i)
81 
82 //-----------------------------------------------------------------------------
83 // Finalization mix - force all bits of a hash block to avalanche
84 
fmix32(uint32_t h)85 FORCE_INLINE uint32_t fmix32 ( uint32_t h )
86 {
87   h ^= h >> 16;
88   h *= 0x85ebca6b;
89   h ^= h >> 13;
90   h *= 0xc2b2ae35;
91   h ^= h >> 16;
92 
93   return h;
94 }
95 
96 //----------
97 
fmix64(uint64_t k)98 FORCE_INLINE uint64_t fmix64 ( uint64_t k )
99 {
100   k ^= k >> 33;
101   k *= BIG_CONSTANT(0xff51afd7ed558ccd);
102   k ^= k >> 33;
103   k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
104   k ^= k >> 33;
105 
106   return k;
107 }
108 
109 /*-----------------------------------------------------------------------------*
110                                  PMurHash128x86
111  *-----------------------------------------------------------------------------*/
112 /*-----------------------------------------------------------------------------
113  * Core murmurhash algorithm macros */
114 
115 static const uint32_t kC1 = 0x239b961b;
116 static const uint32_t kC2 = 0xab0e9789;
117 static const uint32_t kC3 = 0x38b34ae5;
118 static const uint32_t kC4 = 0xa1e38b93;
119 
120 /* This is the main processing body of the algorithm. It operates
121  * on each full 128-bits of input. */
122 #define doblock128x86(h1, h2, h3, h4, k1, k2, k3,k4)\
123 do {\
124   k1 *= kC1; k1  = ROTL32(k1,15); k1 *= kC2; h1 ^= k1;\
125 \
126   h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;\
127 \
128   k2 *= kC2; k2  = ROTL32(k2,16); k2 *= kC3; h2 ^= k2;\
129 \
130   h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;\
131 \
132   k3 *= kC3; k3  = ROTL32(k3,17); k3 *= kC4; h3 ^= k3;\
133 \
134   h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;\
135 \
136   k4 *= kC4; k4  = ROTL32(k4,18); k4 *= kC1; h4 ^= k4;\
137 \
138   h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;\
139 } while(0)
140 
141 /* Append unaligned bytes to carry, forcing hash churn if we have 16 bytes */
142 /* cnt=bytes to process, h1-h4=hash k1-k4=carry, n=bytes in carry, ptr/len=payload */
143 #define dobytes128x86(cnt, h1, h2, h3, h4, k1, k2, k3, k4, n, ptr, len)\
144 do {\
145   unsigned __cnt = cnt;\
146   for(;__cnt--; len--) {\
147     switch(n) {\
148       case  0: case  1: case  2: case  3:\
149         k1 = k1>>8 | (uint32_t)*ptr++<<24;\
150         ++n; break;\
151 \
152       case  4: case  5: case  6: case  7:\
153         k2 = k2>>8 | (uint32_t)*ptr++<<24;\
154         ++n; break;\
155 \
156       case  8: case  9: case 10: case 11:\
157         k3 = k3>>8 | (uint32_t)*ptr++<<24;\
158         ++n; break;\
159 \
160       case 12: case 13: case 14:\
161         k4 = k4>>8 | (uint32_t)*ptr++<<24;\
162         ++n; break;\
163 \
164       case 15:\
165         k4 = k4>>8 | (uint32_t)*ptr++<<24;\
166         doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4);\
167         n = 0; break;\
168     }\
169   }\
170 } while(0)
171 
172 /* Finalize a hash. To match the original Murmur3_128x86 the total_length must be provided */
PMurHash128x86_Result(const uint32_t ph[4],const uint32_t pcarry[4],uint32_t total_length,uint32_t out[4])173 void PMurHash128x86_Result(const uint32_t ph[4], const uint32_t pcarry[4], uint32_t total_length, uint32_t out[4])
174 {
175   uint32_t h1 = ph[0];
176   uint32_t h2 = ph[1];
177   uint32_t h3 = ph[2];
178   uint32_t h4 = ph[3];
179 
180   uint32_t k1, k2, k3, k4 = pcarry[3];
181 
182   int n = k4 & 15;
183   switch(n) {
184     case  1: case  2: case  3: case  4:
185       k1 = pcarry[0] >> (4-n)*8;
186       goto finrot_k1;
187 
188     case  5: case  6: case  7: case  8:
189       k2 = pcarry[1] >> (8-n)*8;
190       goto finrot_k21;
191 
192     case  9: case 10: case 11: case 12:
193       k3 = pcarry[2] >> (12-n)*8;
194       goto finrot_k321;
195 
196     case 13: case 14: case 15:
197       k4 >>= (16-n)*8;
198       goto finrot_k4321;
199 
200     default:
201       goto skiprot;
202   }
203 finrot_k4321:
204   k4 *= kC4; k4  = ROTL32(k4,18); k4 *= kC1; h4 ^= k4;
205   k3 = pcarry[2];
206 finrot_k321:
207   k3 *= kC3; k3  = ROTL32(k3,17); k3 *= kC4; h3 ^= k3;
208   k2 = pcarry[1];
209 finrot_k21:
210   k2 *= kC2; k2  = ROTL32(k2,16); k2 *= kC3; h2 ^= k2;
211   k1 = pcarry[0];
212 finrot_k1:
213   k1 *= kC1; k1  = ROTL32(k1,15); k1 *= kC2; h1 ^= k1;
214 skiprot:
215 
216   //----------
217   // finalization
218 
219   h1 ^= total_length; h2 ^= total_length;
220   h3 ^= total_length; h4 ^= total_length;
221 
222   h1 += h2; h1 += h3; h1 += h4;
223   h2 += h1; h3 += h1; h4 += h1;
224 
225   h1 = fmix32(h1);
226   h2 = fmix32(h2);
227   h3 = fmix32(h3);
228   h4 = fmix32(h4);
229 
230   h1 += h2; h1 += h3; h1 += h4;
231   h2 += h1; h3 += h1; h4 += h1;
232 
233   out[0] = h1;
234   out[1] = h2;
235   out[2] = h3;
236   out[3] = h4;
237 }
238 
239 /*---------------------------------------------------------------------------*/
240 
241 /* Main hashing function. Initialise carry[4] to {0,0,0,0} and h[4] to an initial {seed,seed,seed,seed}
242  * if wanted. Both ph and pcarry are required arguments. */
PMurHash128x86_Process(uint32_t ph[4],uint32_t pcarry[4],const void * const key,int len)243 void PMurHash128x86_Process(uint32_t ph[4], uint32_t pcarry[4], const void * const key, int len)
244 {
245   uint32_t h1 = ph[0];
246   uint32_t h2 = ph[1];
247   uint32_t h3 = ph[2];
248   uint32_t h4 = ph[3];
249 
250   uint32_t k1 = pcarry[0];
251   uint32_t k2 = pcarry[1];
252   uint32_t k3 = pcarry[2];
253   uint32_t k4 = pcarry[3];
254 
255   const uint8_t *ptr = (uint8_t*)key;
256   const uint8_t *end;
257 
258   /* Extract carry count from low 4 bits of c value */
259   int n = k4 & 15;
260 
261 #if defined(UNALIGNED_SAFE)
262   /* This CPU handles unaligned word access */
263 // #pragma message ( "UNALIGNED_SAFE" )
264   /* Consume any carry bytes */
265   int i = (16-n) & 15;
266   if(i && i <= len) {
267     dobytes128x86(i, h1, h2, h3, h4, k1, k2, k3, k4, n, ptr, len);
268   }
269 
270   /* Process 128-bit chunks */
271   end = ptr + (len & ~15);
272   for( ; ptr < end ; ptr+=16) {
273     k1 = READ_UINT32(ptr, 0);
274     k2 = READ_UINT32(ptr, 1);
275     k3 = READ_UINT32(ptr, 2);
276     k4 = READ_UINT32(ptr, 3);
277     doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4);
278   }
279 
280 #else /*UNALIGNED_SAFE*/
281   /* This CPU does not handle unaligned word access */
282 // #pragma message ( "ALIGNED" )
283   /* Consume enough so that the next data byte is word aligned */
284   int i = -(intptr_t)(void *)ptr & 3;
285   if(i && i <= len) {
286     dobytes128x86(i, h1, h2, h3, h4, k1, k2, k3, k4, n, ptr, len);
287   }
288   /* We're now aligned. Process in aligned blocks. Specialise for each possible carry count */
289   end = ptr + (len & ~15);
290 
291   switch(n) { /* how many bytes in c */
292   case 0: /*
293   k1=[----] k2=[----] k2=[----] k4=[----] w=[3210 7654 ba98 fedc] b=[3210 7654 ba98 fedc] */
294     for( ; ptr < end ; ptr+=16) {
295       k1 = READ_UINT32(ptr, 0);
296       k2 = READ_UINT32(ptr, 1);
297       k3 = READ_UINT32(ptr, 2);
298       k4 = READ_UINT32(ptr, 3);
299       doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4);
300     }
301     break;
302   case 1: case 2: case 3: /*
303   k1=[10--] k2=[----] k3=[----] k4=[----] w=[5432 9876 dcba hgfe] b=[3210 7654 ba98 fedc] k1'=[hg--] */
304     {
305       const int lshift = n*8, rshift = 32-lshift;
306       for( ; ptr < end ; ptr+=16) {
307         uint32_t c = k1>>rshift;      // --10
308         k2 = READ_UINT32(ptr, 0);     // 5432
309         c |= k2<<lshift;              // 3210.
310         k1 = READ_UINT32(ptr, 1);     // 9876
311         k2 = k1<<lshift | k2>>rshift; // 7654.
312         k4 = READ_UINT32(ptr, 2);     // dcba
313         k3 = k4<<lshift | k1>>rshift; // ba98.
314         k1 = READ_UINT32(ptr, 3);     // hgfe.
315         k4 = k1<<lshift | k4>>rshift; // fedc.
316         doblock128x86(h1, h2, h3, h4, c, k2, k3, k4);
317       }
318     }
319     break;
320   case 4: /*
321   k1=[3210] k2=[----] k3=[----] k4=[----] w=[7654 ba98 fedc jihg] b=[3210 7654 ba98 fedc] k1'=[jihg] */
322     for( ; ptr < end ; ptr+=16) {
323       k2 = READ_UINT32(ptr, 0);
324       k3 = READ_UINT32(ptr, 1);
325       k4 = READ_UINT32(ptr, 2);
326       doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4);
327       k1 = READ_UINT32(ptr, 3);
328     }
329     break;
330   case 5: case 6: case 7: /*
331   k1=[3210] k2=[54--] k3=[----] k4=[----] w=[9876 dcba hgfe lkji] b=[3210 7654 ba98 fedc] k1'=[jihg] k2'=[lk--] */
332     {
333       const int lshift = n*8-32, rshift = 32-lshift;
334       for( ; ptr < end ; ptr+=16) {
335         uint32_t c = k2>>rshift;      // --54
336         k3 = READ_UINT32(ptr, 0);     // 9876
337         c |= k3<<lshift;              // 7654.
338         k4 = READ_UINT32(ptr, 1);     // dcba
339         k3 = k4<<lshift | k3>>rshift; // ba98.
340         k2 = READ_UINT32(ptr, 2);     // hgfe
341         k4 = k2<<lshift | k4>>rshift; // fedc.
342         doblock128x86(h1, h2, h3, h4, k1, c, k3, k4);
343         k1 = k2>>rshift;              // --hg
344         k2 = READ_UINT32(ptr, 3);     // lkji.
345         k1 |= k2<<lshift;             // jihg.
346       }
347     }
348   case 8: /*
349   k1=[3210] k2=[7654] k3=[----] k4=[----] w=[ba98 fedc jihg nmlk] b=[3210 7654 ba98 fedc] k1'=[jihg] k2'=[nmlk] */
350     for( ; ptr < end ; ptr+=16) {
351       k3 = READ_UINT32(ptr, 0);
352       k4 = READ_UINT32(ptr, 1);
353       doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4);
354       k1 = READ_UINT32(ptr, 2);
355       k2 = READ_UINT32(ptr, 3);
356     }
357     break;
358   case 9: case 10: case 11: /*
359   k1=[3210] k2=[7654] k3=[98--] k4=[----] w=[dcba hgfe lkji ponm] b=[3210 7654 ba98 fedc] k1'=[jihg] k2'=[nmlk] k3'=[po--] */
360     {
361       const int lshift = n*8-64, rshift = 32-lshift;
362       for( ; ptr < end ; ptr+=16) {
363         uint32_t c = k3>>rshift;      // --98
364         k4 = READ_UINT32(ptr, 0);     // dcba
365         c |= k4<<lshift;              // ba98.
366         k3 = READ_UINT32(ptr, 1);     // hgfe
367         k4 = k3<<lshift | k4>>rshift; // fedc.
368         doblock128x86(h1, h2, h3, h4, k1, k2, c, k4);
369         k2 = READ_UINT32(ptr, 2);     // lkji
370         k1 = k2<<lshift | k3>>rshift; // jihg.
371         k3 = READ_UINT32(ptr, 3);     // ponm.
372         k2 = k3<<lshift | k2>>rshift; // nmlk.
373       }
374     }
375   case 12: /*
376   k1=[3210] k2=[7654] k3=[ba98] k4=[----] w=[fedc jihg nmlk rqpo] b=[3210 7654 ba98 fedc] k1'=[jihg] k2'=[nmlk] k3'=[rqpo] */
377     for( ; ptr < end ; ptr+=16) {
378       k4 = READ_UINT32(ptr, 0);
379       doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4);
380       k1 = READ_UINT32(ptr, 1);
381       k2 = READ_UINT32(ptr, 2);
382       k3 = READ_UINT32(ptr, 3);
383     }
384     break;
385   default: /* 12 < n <= 15
386   k1=[3210] k2=[7654] k3=[ba98] k4=[dc--] w=[hgfe lkji ponm tsrq] b=[3210 7654 ba98 fedc] k1'=[jihg] k2'=[nmlk] k3'=[rqpo] k3'=[ts--] */
387     {
388       const int lshift = n*8-96, rshift = 32-lshift;
389       for( ; ptr < end ; ptr+=16) {
390         uint32_t c = k4>>rshift;      // --dc
391         k4 = READ_UINT32(ptr, 0);     // hgfe
392         c |= k4<<lshift;              // fedc.
393         doblock128x86(h1, h2, h3, h4, k1, k2, k3, c);
394         k3 = READ_UINT32(ptr, 1);     // lkji
395         k1 = k3<<lshift | k4>>rshift; // jihg.
396         c  = READ_UINT32(ptr, 2);     // ponm
397         k2 = c<<lshift | k3>>rshift;  // nmlk.
398         k4 = READ_UINT32(ptr, 3);     // tsrq.
399         k3 = k4<<lshift | c>>rshift;  // rqpo.
400       }
401     }
402   }
403 #endif /*UNALIGNED_SAFE*/
404 
405   /* Advance over whole 128-bit chunks, possibly leaving 1..15 bytes */
406   len -= len & ~15;
407 
408   /* Append any remaining bytes into carry */
409   dobytes128x86(len, h1, h2, h3, h4, k1, k2, k3, k4, n, ptr, len);
410 
411   /* Copy out new running hash and carry */
412   ph[0] = h1;
413   ph[1] = h2;
414   ph[2] = h3;
415   ph[3] = h4;
416   pcarry[0] = k1;
417   pcarry[1] = k2;
418   pcarry[2] = k3;
419   pcarry[3] = (k4 & ~0xff) | n;
420 }
421 
422 /*---------------------------------------------------------------------------*/
423 
424 /* All in one go */
425 
426 /* MurmurHash3_x86_128 api */
PMurHash128x86(const void * key,const int len,uint32_t seed,void * out)427 void PMurHash128x86(const void * key, const int len, uint32_t seed, void * out)
428 {
429   uint32_t carry[4] = {0, 0, 0, 0};
430   uint32_t h[4] = {seed, seed, seed, seed};
431   PMurHash128x86_Process(h, carry, key, len);
432   PMurHash128x86_Result(h, carry, (uint32_t) len, (uint32_t *) out);
433 }
434 
435 /*-----------------------------------------------------------------------------*
436                                  PMurHash128x64
437  *-----------------------------------------------------------------------------*/
438 /*-----------------------------------------------------------------------------
439  * Core murmurhash algorithm macros */
440 
441 static const uint64_t kC1L = BIG_CONSTANT(0x87c37b91114253d5);
442 static const uint64_t kC2L = BIG_CONSTANT(0x4cf5ad432745937f);
443 
444 /* This is the main processing body of the algorithm. It operates
445  * on each full 128-bits of input. */
446 #define doblock128x64(h1, h2, k1, k2)\
447 do {\
448   k1 *= kC1L; k1  = ROTL64(k1,31); k1 *= kC2L; h1 ^= k1;\
449 \
450   h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;\
451 \
452   k2 *= kC2L; k2  = ROTL64(k2,33); k2 *= kC1L; h2 ^= k2;\
453 \
454   h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;\
455 } while(0)
456 
457 /* Append unaligned bytes to carry, forcing hash churn if we have 16 bytes */
458 /* cnt=bytes to process, h1,h2=hash k1,k2=carry, n=bytes in carry, ptr/len=payload */
459 #define dobytes128x64(cnt, h1, h2, k1, k2, n, ptr, len) \
460 do {\
461   unsigned __cnt = cnt;\
462   for(;__cnt--; len--) {\
463     switch(n) {\
464       case  0: case  1: case  2: case  3:\
465       case  4: case  5: case  6: case  7:\
466         k1 = k1>>8 | (uint64_t)*ptr++<<56;\
467         n++; break;\
468 \
469       case  8: case  9: case 10: case 11:\
470       case 12: case 13: case 14:\
471         k2 = k2>>8 | (uint64_t)*ptr++<<56;\
472         n++; break;\
473 \
474       case 15:\
475         k2 = k2>>8 | (uint64_t)*ptr++<<56;\
476         doblock128x64(h1, h2, k1, k2);\
477         n = 0; break;\
478     }\
479   }\
480 } while(0)
481 
482 /* Finalize a hash. To match the original Murmur3_128x64 the total_length must be provided */
PMurHash128x64_Result(const uint64_t ph[2],const uint64_t pcarry[2],const uint32_t total_length,uint64_t out[2])483 void PMurHash128x64_Result(const uint64_t ph[2], const uint64_t pcarry[2],
484                         const uint32_t total_length, uint64_t out[2])
485 {
486   uint64_t h1 = ph[0];
487   uint64_t h2 = ph[1];
488 
489   uint64_t k1;
490   uint64_t k2 = pcarry[1];
491 
492   int n = k2 & 15;
493   if (n) {
494     k1 = pcarry[0];
495     if (n > 8) {
496       k2 >>= (16-n)*8;
497       k2 *= kC2L; k2  = ROTL64(k2,33); k2 *= kC1L; h2 ^= k2;
498     } else {
499       k1 >>= (8-n)*8;
500     }
501     k1 *= kC1L; k1  = ROTL64(k1,31); k1 *= kC2L; h1 ^= k1;
502   }
503 
504   //----------
505   // finalization
506 
507   h1 ^= total_length; h2 ^= total_length;
508 
509   h1 += h2;
510   h2 += h1;
511 
512   h1 = fmix64(h1);
513   h2 = fmix64(h2);
514 
515   h1 += h2;
516   h2 += h1;
517 
518   out[0] = h1;
519   out[1] = h2;
520 }
521 
522 /*---------------------------------------------------------------------------*/
523 
524 /* Main hashing function. Initialise carry[2] to {0,0} and h[2] to an initial {seed,seed}
525  * if wanted. Both ph and pcarry are required arguments. */
PMurHash128x64_Process(uint64_t ph[2],uint64_t pcarry[2],const void * const key,int len)526 void PMurHash128x64_Process(uint64_t ph[2], uint64_t pcarry[2], const void * const key, int len)
527 {
528   uint64_t h1 = ph[0];
529   uint64_t h2 = ph[1];
530 
531   uint64_t k1 = pcarry[0];
532   uint64_t k2 = pcarry[1];
533 
534   const uint8_t *ptr = (uint8_t*)key;
535   const uint8_t *end;
536 
537   /* Extract carry count from low 4 bits of c value */
538   int n = k2 & 15;
539 
540 #if defined(UNALIGNED_SAFE)
541   /* This CPU handles unaligned word access */
542 // #pragma message ( "UNALIGNED_SAFE" )
543   /* Consume any carry bytes */
544   int i = (16-n) & 15;
545   if(i && i <= len) {
546     dobytes128x64(i, h1, h2, k1, k2, n, ptr, len);
547   }
548 
549   /* Process 128-bit chunks */
550   end = ptr + (len & ~15);
551   for( ; ptr < end ; ptr+=16) {
552     k1 = READ_UINT64(ptr, 0);
553     k2 = READ_UINT64(ptr, 1);
554     doblock128x64(h1, h2, k1, k2);
555   }
556 
557 #else /*UNALIGNED_SAFE*/
558   /* This CPU does not handle unaligned word access */
559 // #pragma message ( "ALIGNED" )
560   /* Consume enough so that the next data byte is word aligned */
561   int i = -(intptr_t)(void *)ptr & 7;
562   if(i && i <= len) {
563     dobytes128x64(i, h1, h2, k1, k2, n, ptr, len);
564   }
565   /* We're now aligned. Process in aligned blocks. Specialise for each possible carry count */
566   end = ptr + (len & ~15);
567 
568   switch(n) { /* how many bytes in c */
569   case 0: /*
570     k1=[--------] k2=[--------] w=[76543210 fedcba98] b=[76543210 fedcba98] */
571     for( ; ptr < end ; ptr+=16) {
572       k1 = READ_UINT64(ptr, 0);
573       k2 = READ_UINT64(ptr, 1);
574       doblock128x64(h1, h2, k1, k2);
575     }
576     break;
577   case 1: case 2: case 3: case 4: case 5: case 6: case 7: /*
578     k1=[10------] k2=[--------] w=[98765432 hgfedcba] b=[76543210 fedcba98] k1'=[hg------] */
579     {
580       const int lshift = n*8, rshift = 64-lshift;
581       for( ; ptr < end ; ptr+=16) {
582         uint64_t c = k1>>rshift;
583         k2 = READ_UINT64(ptr, 0);
584         c |= k2<<lshift;
585         k1 = READ_UINT64(ptr, 1);
586         k2 = k2>>rshift | k1<<lshift;
587         doblock128x64(h1, h2, c, k2);
588       }
589     }
590     break;
591   case 8: /*
592   k1=[76543210] k2=[--------] w=[fedcba98 nmlkjihg] b=[76543210 fedcba98] k1`=[nmlkjihg] */
593     for( ; ptr < end ; ptr+=16) {
594       k2 = READ_UINT64(ptr, 0);
595       doblock128x64(h1, h2, k1, k2);
596       k1 = READ_UINT64(ptr, 1);
597     }
598     break;
599   default: /* 8 < n <= 15
600   k1=[76543210] k2=[98------] w=[hgfedcba ponmlkji] b=[76543210 fedcba98] k1`=[nmlkjihg] k2`=[po------] */
601     {
602       const int lshift = n*8-64, rshift = 64-lshift;
603       for( ; ptr < end ; ptr+=16) {
604         uint64_t c = k2 >> rshift;
605         k2 = READ_UINT64(ptr, 0);
606         c |= k2 << lshift;
607         doblock128x64(h1, h2, k1, c);
608         k1 = k2 >> rshift;
609         k2 = READ_UINT64(ptr, 1);
610         k1 |= k2 << lshift;
611       }
612     }
613   }
614 #endif /*UNALIGNED_SAFE*/
615 
616   /* Advance over whole 128-bit chunks, possibly leaving 1..15 bytes */
617   len -= len & ~15;
618 
619   /* Append any remaining bytes into carry */
620   dobytes128x64(len, h1, h2, k1, k2, n, ptr, len);
621 
622   /* Copy out new running hash and carry */
623   ph[0] = h1;
624   ph[1] = h2;
625   pcarry[0] = k1;
626   pcarry[1] = (k2 & ~0xff) | n;
627 }
628 
629 /*---------------------------------------------------------------------------*/
630 
631 /* All in one go */
632 
633 /* MurmurHash3_x64_128 api */
PMurHash128x64(const void * key,const int len,uint32_t seed,void * out)634 void PMurHash128x64(const void * key, const int len, uint32_t seed, void * out)
635 {
636   uint64_t carry[2] = {0, 0};
637   uint64_t h[2] = {seed, seed};
638   PMurHash128x64_Process(h, carry, key, len);
639   PMurHash128x64_Result(h, carry, (uint32_t) len, (uint64_t *) out);
640 }
641