1 /*
2 Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
3 Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
4 denoted as "the implementer".
5 
6 For more information, feedback or questions, please refer to our websites:
7 http://keccak.noekeon.org/
8 http://keyak.noekeon.org/
9 http://ketje.noekeon.org/
10 
11 To the extent possible under law, the implementer has waived all copyright
12 and related or neighboring rights to the source code in this file.
13 http://creativecommons.org/publicdomain/zero/1.0/
14 */
15 
16 #include    <string.h>
17 #include "brg_endian.h"
18 #include "KeccakP-1600-SnP.h"
19 #include "SnP-Relaned.h"
20 #ifdef __has_feature
21 # if __has_feature(undefined_behavior_sanitizer)
22 #  define ALLOW_MISALIGNED_ACCESS __attribute__((no_sanitize("alignment")))
23 # endif
24 #endif
25 #ifndef ALLOW_MISALIGNED_ACCESS
26 # define ALLOW_MISALIGNED_ACCESS
27 #endif
28 
29 typedef unsigned char UINT8;
30 typedef unsigned int UINT32;
31 /* WARNING: on 8-bit and 16-bit platforms, this should be replaced by: */
32 /* typedef unsigned long       UINT32; */
33 
34 #define ROL32(a, offset) ((((UINT32)a) << (offset)) ^ (((UINT32)a) >> (32-(offset))))
35 
36 /* Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
37 #define prepareToBitInterleaving(low, high, temp, temp0, temp1) \
38         temp0 = (low); \
39         temp = (temp0 ^ (temp0 >>  1)) & 0x22222222UL;  temp0 = temp0 ^ temp ^ (temp <<  1); \
40         temp = (temp0 ^ (temp0 >>  2)) & 0x0C0C0C0CUL;  temp0 = temp0 ^ temp ^ (temp <<  2); \
41         temp = (temp0 ^ (temp0 >>  4)) & 0x00F000F0UL;  temp0 = temp0 ^ temp ^ (temp <<  4); \
42         temp = (temp0 ^ (temp0 >>  8)) & 0x0000FF00UL;  temp0 = temp0 ^ temp ^ (temp <<  8); \
43         temp1 = (high); \
44         temp = (temp1 ^ (temp1 >>  1)) & 0x22222222UL;  temp1 = temp1 ^ temp ^ (temp <<  1); \
45         temp = (temp1 ^ (temp1 >>  2)) & 0x0C0C0C0CUL;  temp1 = temp1 ^ temp ^ (temp <<  2); \
46         temp = (temp1 ^ (temp1 >>  4)) & 0x00F000F0UL;  temp1 = temp1 ^ temp ^ (temp <<  4); \
47         temp = (temp1 ^ (temp1 >>  8)) & 0x0000FF00UL;  temp1 = temp1 ^ temp ^ (temp <<  8);
48 
49 #define toBitInterleavingAndXOR(low, high, even, odd, temp, temp0, temp1) \
50         prepareToBitInterleaving(low, high, temp, temp0, temp1) \
51         even ^= (temp0 & 0x0000FFFF) | (temp1 << 16); \
52         odd ^= (temp0 >> 16) | (temp1 & 0xFFFF0000);
53 
54 #define toBitInterleavingAndAND(low, high, even, odd, temp, temp0, temp1) \
55         prepareToBitInterleaving(low, high, temp, temp0, temp1) \
56         even &= (temp0 & 0x0000FFFF) | (temp1 << 16); \
57         odd &= (temp0 >> 16) | (temp1 & 0xFFFF0000);
58 
59 #define toBitInterleavingAndSet(low, high, even, odd, temp, temp0, temp1) \
60         prepareToBitInterleaving(low, high, temp, temp0, temp1) \
61         even = (temp0 & 0x0000FFFF) | (temp1 << 16); \
62         odd = (temp0 >> 16) | (temp1 & 0xFFFF0000);
63 
64 /* Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
65 #define prepareFromBitInterleaving(even, odd, temp, temp0, temp1) \
66         temp0 = (even); \
67         temp1 = (odd); \
68         temp = (temp0 & 0x0000FFFF) | (temp1 << 16); \
69         temp1 = (temp0 >> 16) | (temp1 & 0xFFFF0000); \
70         temp0 = temp; \
71         temp = (temp0 ^ (temp0 >>  8)) & 0x0000FF00UL;  temp0 = temp0 ^ temp ^ (temp <<  8); \
72         temp = (temp0 ^ (temp0 >>  4)) & 0x00F000F0UL;  temp0 = temp0 ^ temp ^ (temp <<  4); \
73         temp = (temp0 ^ (temp0 >>  2)) & 0x0C0C0C0CUL;  temp0 = temp0 ^ temp ^ (temp <<  2); \
74         temp = (temp0 ^ (temp0 >>  1)) & 0x22222222UL;  temp0 = temp0 ^ temp ^ (temp <<  1); \
75         temp = (temp1 ^ (temp1 >>  8)) & 0x0000FF00UL;  temp1 = temp1 ^ temp ^ (temp <<  8); \
76         temp = (temp1 ^ (temp1 >>  4)) & 0x00F000F0UL;  temp1 = temp1 ^ temp ^ (temp <<  4); \
77         temp = (temp1 ^ (temp1 >>  2)) & 0x0C0C0C0CUL;  temp1 = temp1 ^ temp ^ (temp <<  2); \
78         temp = (temp1 ^ (temp1 >>  1)) & 0x22222222UL;  temp1 = temp1 ^ temp ^ (temp <<  1);
79 
80 #define fromBitInterleaving(even, odd, low, high, temp, temp0, temp1) \
81         prepareFromBitInterleaving(even, odd, temp, temp0, temp1) \
82         low = temp0; \
83         high = temp1;
84 
85 #define fromBitInterleavingAndXOR(even, odd, lowIn, highIn, lowOut, highOut, temp, temp0, temp1) \
86         prepareFromBitInterleaving(even, odd, temp, temp0, temp1) \
87         lowOut = lowIn ^ temp0; \
88         highOut = highIn ^ temp1;
89 
KeccakP1600_SetBytesInLaneToZero(void * state,unsigned int lanePosition,unsigned int offset,unsigned int length)90 void KeccakP1600_SetBytesInLaneToZero(void *state, unsigned int lanePosition, unsigned int offset, unsigned int length)
91 {
92     UINT8 laneAsBytes[8];
93     UINT32 low, high;
94     UINT32 temp, temp0, temp1;
95     UINT32 *stateAsHalfLanes = (UINT32*)state;
96 
97     memset(laneAsBytes, 0xFF, offset);
98     memset(laneAsBytes+offset, 0x00, length);
99     memset(laneAsBytes+offset+length, 0xFF, 8-offset-length);
100 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
101     low = *((UINT32*)(laneAsBytes+0));
102     high = *((UINT32*)(laneAsBytes+4));
103 #else
104     low = laneAsBytes[0]
105         | ((UINT32)(laneAsBytes[1]) << 8)
106         | ((UINT32)(laneAsBytes[2]) << 16)
107         | ((UINT32)(laneAsBytes[3]) << 24);
108     high = laneAsBytes[4]
109         | ((UINT32)(laneAsBytes[5]) << 8)
110         | ((UINT32)(laneAsBytes[6]) << 16)
111         | ((UINT32)(laneAsBytes[7]) << 24);
112 #endif
113     toBitInterleavingAndAND(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1);
114 }
115 
116 /* ---------------------------------------------------------------- */
117 
KeccakP1600_Initialize(void * state)118 void KeccakP1600_Initialize(void *state)
119 {
120     memset(state, 0, 200);
121 }
122 
123 /* ---------------------------------------------------------------- */
124 
KeccakP1600_AddByte(void * state,unsigned char byte,unsigned int offset)125 void KeccakP1600_AddByte(void *state, unsigned char byte, unsigned int offset)
126 {
127     unsigned int lanePosition = offset/8;
128     unsigned int offsetInLane = offset%8;
129     UINT32 low, high;
130     UINT32 temp, temp0, temp1;
131     UINT32 *stateAsHalfLanes = (UINT32*)state;
132 
133     if (offsetInLane < 4) {
134         low = (UINT32)byte << (offsetInLane*8);
135         high = 0;
136     }
137     else {
138         low = 0;
139         high = (UINT32)byte << ((offsetInLane-4)*8);
140     }
141     toBitInterleavingAndXOR(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1);
142 }
143 
144 /* ---------------------------------------------------------------- */
145 
KeccakP1600_AddBytesInLane(void * state,unsigned int lanePosition,const unsigned char * data,unsigned int offset,unsigned int length)146 void KeccakP1600_AddBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length)
147 {
148     UINT8 laneAsBytes[8];
149     UINT32 low, high;
150     UINT32 temp, temp0, temp1;
151     UINT32 *stateAsHalfLanes = (UINT32*)state;
152 
153     memset(laneAsBytes, 0, 8);
154     memcpy(laneAsBytes+offset, data, length);
155 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
156     low = *((UINT32*)(laneAsBytes+0));
157     high = *((UINT32*)(laneAsBytes+4));
158 #else
159     low = laneAsBytes[0]
160         | ((UINT32)(laneAsBytes[1]) << 8)
161         | ((UINT32)(laneAsBytes[2]) << 16)
162         | ((UINT32)(laneAsBytes[3]) << 24);
163     high = laneAsBytes[4]
164         | ((UINT32)(laneAsBytes[5]) << 8)
165         | ((UINT32)(laneAsBytes[6]) << 16)
166         | ((UINT32)(laneAsBytes[7]) << 24);
167 #endif
168     toBitInterleavingAndXOR(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1);
169 }
170 
171 /* ---------------------------------------------------------------- */
172 
173 ALLOW_MISALIGNED_ACCESS
KeccakP1600_AddLanes(void * state,const unsigned char * data,unsigned int laneCount)174 void KeccakP1600_AddLanes(void *state, const unsigned char *data, unsigned int laneCount)
175 {
176 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
177     const UINT32 * pI = (const UINT32 *)data;
178     UINT32 * pS = (UINT32*)state;
179     UINT32 t, x0, x1;
180     int i;
181     for (i = laneCount-1; i >= 0; --i) {
182 #ifdef NO_MISALIGNED_ACCESSES
183         UINT32 low;
184         UINT32 high;
185         memcpy(&low, pI++, 4);
186         memcpy(&high, pI++, 4);
187         toBitInterleavingAndXOR(low, high, *(pS++), *(pS++), t, x0, x1);
188 #else
189         toBitInterleavingAndXOR(*(pI++), *(pI++), *(pS++), *(pS++), t, x0, x1)
190 #endif
191     }
192 #else
193     unsigned int lanePosition;
194     for(lanePosition=0; lanePosition<laneCount; lanePosition++) {
195         UINT8 laneAsBytes[8];
196         memcpy(laneAsBytes, data+lanePosition*8, 8);
197         UINT32 low = laneAsBytes[0]
198             | ((UINT32)(laneAsBytes[1]) << 8)
199             | ((UINT32)(laneAsBytes[2]) << 16)
200             | ((UINT32)(laneAsBytes[3]) << 24);
201         UINT32 high = laneAsBytes[4]
202             | ((UINT32)(laneAsBytes[5]) << 8)
203             | ((UINT32)(laneAsBytes[6]) << 16)
204             | ((UINT32)(laneAsBytes[7]) << 24);
205         UINT32 even, odd, temp, temp0, temp1;
206         UINT32 *stateAsHalfLanes = (UINT32*)state;
207         toBitInterleavingAndXOR(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1);
208     }
209 #endif
210 }
211 
212 /* ---------------------------------------------------------------- */
213 
KeccakP1600_AddBytes(void * state,const unsigned char * data,unsigned int offset,unsigned int length)214 void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
215 {
216     SnP_AddBytes(state, data, offset, length, KeccakP1600_AddLanes, KeccakP1600_AddBytesInLane, 8);
217 }
218 
219 /* ---------------------------------------------------------------- */
220 
KeccakP1600_OverwriteBytesInLane(void * state,unsigned int lanePosition,const unsigned char * data,unsigned int offset,unsigned int length)221 void KeccakP1600_OverwriteBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length)
222 {
223     KeccakP1600_SetBytesInLaneToZero(state, lanePosition, offset, length);
224     KeccakP1600_AddBytesInLane(state, lanePosition, data, offset, length);
225 }
226 
227 /* ---------------------------------------------------------------- */
228 
229 ALLOW_MISALIGNED_ACCESS
KeccakP1600_OverwriteLanes(void * state,const unsigned char * data,unsigned int laneCount)230 void KeccakP1600_OverwriteLanes(void *state, const unsigned char *data, unsigned int laneCount)
231 {
232 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
233     const UINT32 * pI = (const UINT32 *)data;
234     UINT32 * pS = (UINT32 *)state;
235     UINT32 t, x0, x1;
236     int i;
237     for (i = laneCount-1; i >= 0; --i) {
238 #ifdef NO_MISALIGNED_ACCESSES
239         UINT32 low;
240         UINT32 high;
241         memcpy(&low, pI++, 4);
242         memcpy(&high, pI++, 4);
243         toBitInterleavingAndSet(low, high, *(pS++), *(pS++), t, x0, x1);
244 #else
245         toBitInterleavingAndSet(*(pI++), *(pI++), *(pS++), *(pS++), t, x0, x1)
246 #endif
247     }
248 #else
249     unsigned int lanePosition;
250     for(lanePosition=0; lanePosition<laneCount; lanePosition++) {
251         UINT8 laneAsBytes[8];
252         memcpy(laneAsBytes, data+lanePosition*8, 8);
253         UINT32 low = laneAsBytes[0]
254             | ((UINT32)(laneAsBytes[1]) << 8)
255             | ((UINT32)(laneAsBytes[2]) << 16)
256             | ((UINT32)(laneAsBytes[3]) << 24);
257         UINT32 high = laneAsBytes[4]
258             | ((UINT32)(laneAsBytes[5]) << 8)
259             | ((UINT32)(laneAsBytes[6]) << 16)
260             | ((UINT32)(laneAsBytes[7]) << 24);
261         UINT32 even, odd, temp, temp0, temp1;
262         UINT32 *stateAsHalfLanes = (UINT32*)state;
263         toBitInterleavingAndSet(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1);
264     }
265 #endif
266 }
267 
268 /* ---------------------------------------------------------------- */
269 
KeccakP1600_OverwriteBytes(void * state,const unsigned char * data,unsigned int offset,unsigned int length)270 void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
271 {
272     SnP_OverwriteBytes(state, data, offset, length, KeccakP1600_OverwriteLanes, KeccakP1600_OverwriteBytesInLane, 8);
273 }
274 
275 /* ---------------------------------------------------------------- */
276 
KeccakP1600_OverwriteWithZeroes(void * state,unsigned int byteCount)277 void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount)
278 {
279     UINT32 *stateAsHalfLanes = (UINT32*)state;
280     unsigned int i;
281 
282     for(i=0; i<byteCount/8; i++) {
283         stateAsHalfLanes[i*2+0] = 0;
284         stateAsHalfLanes[i*2+1] = 0;
285     }
286     if (byteCount%8 != 0)
287         KeccakP1600_SetBytesInLaneToZero(state, byteCount/8, 0, byteCount%8);
288 }
289 
290 /* ---------------------------------------------------------------- */
291 
KeccakP1600_ExtractBytesInLane(const void * state,unsigned int lanePosition,unsigned char * data,unsigned int offset,unsigned int length)292 void KeccakP1600_ExtractBytesInLane(const void *state, unsigned int lanePosition, unsigned char *data, unsigned int offset, unsigned int length)
293 {
294     UINT32 *stateAsHalfLanes = (UINT32*)state;
295     UINT32 low, high, temp, temp0, temp1;
296     UINT8 laneAsBytes[8];
297 
298     fromBitInterleaving(stateAsHalfLanes[lanePosition*2], stateAsHalfLanes[lanePosition*2+1], low, high, temp, temp0, temp1);
299 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
300     *((UINT32*)(laneAsBytes+0)) = low;
301     *((UINT32*)(laneAsBytes+4)) = high;
302 #else
303     laneAsBytes[0] = low & 0xFF;
304     laneAsBytes[1] = (low >> 8) & 0xFF;
305     laneAsBytes[2] = (low >> 16) & 0xFF;
306     laneAsBytes[3] = (low >> 24) & 0xFF;
307     laneAsBytes[4] = high & 0xFF;
308     laneAsBytes[5] = (high >> 8) & 0xFF;
309     laneAsBytes[6] = (high >> 16) & 0xFF;
310     laneAsBytes[7] = (high >> 24) & 0xFF;
311 #endif
312     memcpy(data, laneAsBytes+offset, length);
313 }
314 
315 /* ---------------------------------------------------------------- */
316 
317 ALLOW_MISALIGNED_ACCESS
KeccakP1600_ExtractLanes(const void * state,unsigned char * data,unsigned int laneCount)318 void KeccakP1600_ExtractLanes(const void *state, unsigned char *data, unsigned int laneCount)
319 {
320 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
321     UINT32 * pI = (UINT32 *)data;
322     const UINT32 * pS = ( const UINT32 *)state;
323     UINT32 t, x0, x1;
324     int i;
325     for (i = laneCount-1; i >= 0; --i) {
326 #ifdef NO_MISALIGNED_ACCESSES
327         UINT32 low;
328         UINT32 high;
329         fromBitInterleaving(*(pS++), *(pS++), low, high, t, x0, x1);
330         memcpy(pI++, &low, 4);
331         memcpy(pI++, &high, 4);
332 #else
333         fromBitInterleaving(*(pS++), *(pS++), *(pI++), *(pI++), t, x0, x1)
334 #endif
335     }
336 #else
337     unsigned int lanePosition;
338     for(lanePosition=0; lanePosition<laneCount; lanePosition++) {
339         UINT32 *stateAsHalfLanes = (UINT32*)state;
340         UINT32 low, high, temp, temp0, temp1;
341         fromBitInterleaving(stateAsHalfLanes[lanePosition*2], stateAsHalfLanes[lanePosition*2+1], low, high, temp, temp0, temp1);
342         UINT8 laneAsBytes[8];
343         laneAsBytes[0] = low & 0xFF;
344         laneAsBytes[1] = (low >> 8) & 0xFF;
345         laneAsBytes[2] = (low >> 16) & 0xFF;
346         laneAsBytes[3] = (low >> 24) & 0xFF;
347         laneAsBytes[4] = high & 0xFF;
348         laneAsBytes[5] = (high >> 8) & 0xFF;
349         laneAsBytes[6] = (high >> 16) & 0xFF;
350         laneAsBytes[7] = (high >> 24) & 0xFF;
351         memcpy(data+lanePosition*8, laneAsBytes, 8);
352     }
353 #endif
354 }
355 
356 /* ---------------------------------------------------------------- */
357 
KeccakP1600_ExtractBytes(const void * state,unsigned char * data,unsigned int offset,unsigned int length)358 void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length)
359 {
360     SnP_ExtractBytes(state, data, offset, length, KeccakP1600_ExtractLanes, KeccakP1600_ExtractBytesInLane, 8);
361 }
362 
363 /* ---------------------------------------------------------------- */
364 
KeccakP1600_ExtractAndAddBytesInLane(const void * state,unsigned int lanePosition,const unsigned char * input,unsigned char * output,unsigned int offset,unsigned int length)365 void KeccakP1600_ExtractAndAddBytesInLane(const void *state, unsigned int lanePosition, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
366 {
367     UINT32 *stateAsHalfLanes = (UINT32*)state;
368     UINT32 low, high, temp, temp0, temp1;
369     UINT8 laneAsBytes[8];
370     unsigned int i;
371 
372     fromBitInterleaving(stateAsHalfLanes[lanePosition*2], stateAsHalfLanes[lanePosition*2+1], low, high, temp, temp0, temp1);
373 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
374     *((UINT32*)(laneAsBytes+0)) = low;
375     *((UINT32*)(laneAsBytes+4)) = high;
376 #else
377     laneAsBytes[0] = low & 0xFF;
378     laneAsBytes[1] = (low >> 8) & 0xFF;
379     laneAsBytes[2] = (low >> 16) & 0xFF;
380     laneAsBytes[3] = (low >> 24) & 0xFF;
381     laneAsBytes[4] = high & 0xFF;
382     laneAsBytes[5] = (high >> 8) & 0xFF;
383     laneAsBytes[6] = (high >> 16) & 0xFF;
384     laneAsBytes[7] = (high >> 24) & 0xFF;
385 #endif
386     for(i=0; i<length; i++)
387         output[i] = input[i] ^ laneAsBytes[offset+i];
388 }
389 
390 /* ---------------------------------------------------------------- */
391 
392 ALLOW_MISALIGNED_ACCESS
KeccakP1600_ExtractAndAddLanes(const void * state,const unsigned char * input,unsigned char * output,unsigned int laneCount)393 void KeccakP1600_ExtractAndAddLanes(const void *state, const unsigned char *input, unsigned char *output, unsigned int laneCount)
394 {
395 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
396     const UINT32 * pI = (const UINT32 *)input;
397     UINT32 * pO = (UINT32 *)output;
398     const UINT32 * pS = (const UINT32 *)state;
399     UINT32 t, x0, x1;
400     int i;
401     for (i = laneCount-1; i >= 0; --i) {
402 #ifdef NO_MISALIGNED_ACCESSES
403         UINT32 low;
404         UINT32 high;
405         fromBitInterleaving(*(pS++), *(pS++), low, high, t, x0, x1);
406         *(pO++) = *(pI++) ^ low;
407         *(pO++) = *(pI++) ^ high;
408 #else
409         fromBitInterleavingAndXOR(*(pS++), *(pS++), *(pI++), *(pI++), *(pO++), *(pO++), t, x0, x1)
410 #endif
411     }
412 #else
413     unsigned int lanePosition;
414     for(lanePosition=0; lanePosition<laneCount; lanePosition++) {
415         UINT32 *stateAsHalfLanes = (UINT32*)state;
416         UINT32 low, high, temp, temp0, temp1;
417         fromBitInterleaving(stateAsHalfLanes[lanePosition*2], stateAsHalfLanes[lanePosition*2+1], low, high, temp, temp0, temp1);
418         UINT8 laneAsBytes[8];
419         laneAsBytes[0] = low & 0xFF;
420         laneAsBytes[1] = (low >> 8) & 0xFF;
421         laneAsBytes[2] = (low >> 16) & 0xFF;
422         laneAsBytes[3] = (low >> 24) & 0xFF;
423         laneAsBytes[4] = high & 0xFF;
424         laneAsBytes[5] = (high >> 8) & 0xFF;
425         laneAsBytes[6] = (high >> 16) & 0xFF;
426         laneAsBytes[7] = (high >> 24) & 0xFF;
427         ((UINT32*)(output+lanePosition*8))[0] = ((UINT32*)(input+lanePosition*8))[0] ^ (*(const UINT32*)(laneAsBytes+0));
428         ((UINT32*)(output+lanePosition*8))[1] = ((UINT32*)(input+lanePosition*8))[0] ^ (*(const UINT32*)(laneAsBytes+4));
429     }
430 #endif
431 }
432 /* ---------------------------------------------------------------- */
433 
KeccakP1600_ExtractAndAddBytes(const void * state,const unsigned char * input,unsigned char * output,unsigned int offset,unsigned int length)434 void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
435 {
436     SnP_ExtractAndAddBytes(state, input, output, offset, length, KeccakP1600_ExtractAndAddLanes, KeccakP1600_ExtractAndAddBytesInLane, 8);
437 }
438 
439 /* ---------------------------------------------------------------- */
440 
441 static const UINT32 KeccakF1600RoundConstants_int2[2*24+1] =
442 {
443     0x00000001UL,    0x00000000UL,
444     0x00000000UL,    0x00000089UL,
445     0x00000000UL,    0x8000008bUL,
446     0x00000000UL,    0x80008080UL,
447     0x00000001UL,    0x0000008bUL,
448     0x00000001UL,    0x00008000UL,
449     0x00000001UL,    0x80008088UL,
450     0x00000001UL,    0x80000082UL,
451     0x00000000UL,    0x0000000bUL,
452     0x00000000UL,    0x0000000aUL,
453     0x00000001UL,    0x00008082UL,
454     0x00000000UL,    0x00008003UL,
455     0x00000001UL,    0x0000808bUL,
456     0x00000001UL,    0x8000000bUL,
457     0x00000001UL,    0x8000008aUL,
458     0x00000001UL,    0x80000081UL,
459     0x00000000UL,    0x80000081UL,
460     0x00000000UL,    0x80000008UL,
461     0x00000000UL,    0x00000083UL,
462     0x00000000UL,    0x80008003UL,
463     0x00000001UL,    0x80008088UL,
464     0x00000000UL,    0x80000088UL,
465     0x00000001UL,    0x00008000UL,
466     0x00000000UL,    0x80008082UL,
467     0x000000FFUL
468 };
469 
470 #define KeccakRound0() \
471         Cx = Abu0^Agu0^Aku0^Amu0^Asu0; \
472         Du1 = Abe1^Age1^Ake1^Ame1^Ase1; \
473         Da0 = Cx^ROL32(Du1, 1); \
474         Cz = Abu1^Agu1^Aku1^Amu1^Asu1; \
475         Du0 = Abe0^Age0^Ake0^Ame0^Ase0; \
476         Da1 = Cz^Du0; \
477         Cw = Abi0^Agi0^Aki0^Ami0^Asi0; \
478         Do0 = Cw^ROL32(Cz, 1); \
479         Cy = Abi1^Agi1^Aki1^Ami1^Asi1; \
480         Do1 = Cy^Cx; \
481         Cx = Aba0^Aga0^Aka0^Ama0^Asa0; \
482         De0 = Cx^ROL32(Cy, 1); \
483         Cz = Aba1^Aga1^Aka1^Ama1^Asa1; \
484         De1 = Cz^Cw; \
485         Cy = Abo1^Ago1^Ako1^Amo1^Aso1; \
486         Di0 = Du0^ROL32(Cy, 1); \
487         Cw = Abo0^Ago0^Ako0^Amo0^Aso0; \
488         Di1 = Du1^Cw; \
489         Du0 = Cw^ROL32(Cz, 1); \
490         Du1 = Cy^Cx; \
491 \
492         Ba = (Aba0^Da0); \
493         Be = ROL32((Age0^De0), 22); \
494         Bi = ROL32((Aki1^Di1), 22); \
495         Bo = ROL32((Amo1^Do1), 11); \
496         Bu = ROL32((Asu0^Du0),  7); \
497         Aba0 =   Ba ^((~Be)&  Bi ); \
498         Aba0 ^= *(pRoundConstants++); \
499         Age0 =   Be ^((~Bi)&  Bo ); \
500         Aki1 =   Bi ^((~Bo)&  Bu ); \
501         Amo1 =   Bo ^((~Bu)&  Ba ); \
502         Asu0 =   Bu ^((~Ba)&  Be ); \
503         Ba = (Aba1^Da1); \
504         Be = ROL32((Age1^De1), 22); \
505         Bi = ROL32((Aki0^Di0), 21); \
506         Bo = ROL32((Amo0^Do0), 10); \
507         Bu = ROL32((Asu1^Du1),  7); \
508         Aba1 =   Ba ^((~Be)&  Bi ); \
509         Aba1 ^= *(pRoundConstants++); \
510         Age1 =   Be ^((~Bi)&  Bo ); \
511         Aki0 =   Bi ^((~Bo)&  Bu ); \
512         Amo0 =   Bo ^((~Bu)&  Ba ); \
513         Asu1 =   Bu ^((~Ba)&  Be ); \
514         Bi = ROL32((Aka1^Da1),  2); \
515         Bo = ROL32((Ame1^De1), 23); \
516         Bu = ROL32((Asi1^Di1), 31); \
517         Ba = ROL32((Abo0^Do0), 14); \
518         Be = ROL32((Agu0^Du0), 10); \
519         Aka1 =   Ba ^((~Be)&  Bi ); \
520         Ame1 =   Be ^((~Bi)&  Bo ); \
521         Asi1 =   Bi ^((~Bo)&  Bu ); \
522         Abo0 =   Bo ^((~Bu)&  Ba ); \
523         Agu0 =   Bu ^((~Ba)&  Be ); \
524         Bi = ROL32((Aka0^Da0),  1); \
525         Bo = ROL32((Ame0^De0), 22); \
526         Bu = ROL32((Asi0^Di0), 30); \
527         Ba = ROL32((Abo1^Do1), 14); \
528         Be = ROL32((Agu1^Du1), 10); \
529         Aka0 =   Ba ^((~Be)&  Bi ); \
530         Ame0 =   Be ^((~Bi)&  Bo ); \
531         Asi0 =   Bi ^((~Bo)&  Bu ); \
532         Abo1 =   Bo ^((~Bu)&  Ba ); \
533         Agu1 =   Bu ^((~Ba)&  Be ); \
534         Bu = ROL32((Asa0^Da0),  9); \
535         Ba = ROL32((Abe1^De1),  1); \
536         Be = ROL32((Agi0^Di0),  3); \
537         Bi = ROL32((Ako1^Do1), 13); \
538         Bo = ROL32((Amu0^Du0),  4); \
539         Asa0 =   Ba ^((~Be)&  Bi ); \
540         Abe1 =   Be ^((~Bi)&  Bo ); \
541         Agi0 =   Bi ^((~Bo)&  Bu ); \
542         Ako1 =   Bo ^((~Bu)&  Ba ); \
543         Amu0 =   Bu ^((~Ba)&  Be ); \
544         Bu = ROL32((Asa1^Da1),  9); \
545         Ba = (Abe0^De0); \
546         Be = ROL32((Agi1^Di1),  3); \
547         Bi = ROL32((Ako0^Do0), 12); \
548         Bo = ROL32((Amu1^Du1),  4); \
549         Asa1 =   Ba ^((~Be)&  Bi ); \
550         Abe0 =   Be ^((~Bi)&  Bo ); \
551         Agi1 =   Bi ^((~Bo)&  Bu ); \
552         Ako0 =   Bo ^((~Bu)&  Ba ); \
553         Amu1 =   Bu ^((~Ba)&  Be ); \
554         Be = ROL32((Aga0^Da0), 18); \
555         Bi = ROL32((Ake0^De0),  5); \
556         Bo = ROL32((Ami1^Di1),  8); \
557         Bu = ROL32((Aso0^Do0), 28); \
558         Ba = ROL32((Abu1^Du1), 14); \
559         Aga0 =   Ba ^((~Be)&  Bi ); \
560         Ake0 =   Be ^((~Bi)&  Bo ); \
561         Ami1 =   Bi ^((~Bo)&  Bu ); \
562         Aso0 =   Bo ^((~Bu)&  Ba ); \
563         Abu1 =   Bu ^((~Ba)&  Be ); \
564         Be = ROL32((Aga1^Da1), 18); \
565         Bi = ROL32((Ake1^De1),  5); \
566         Bo = ROL32((Ami0^Di0),  7); \
567         Bu = ROL32((Aso1^Do1), 28); \
568         Ba = ROL32((Abu0^Du0), 13); \
569         Aga1 =   Ba ^((~Be)&  Bi ); \
570         Ake1 =   Be ^((~Bi)&  Bo ); \
571         Ami0 =   Bi ^((~Bo)&  Bu ); \
572         Aso1 =   Bo ^((~Bu)&  Ba ); \
573         Abu0 =   Bu ^((~Ba)&  Be ); \
574         Bo = ROL32((Ama1^Da1), 21); \
575         Bu = ROL32((Ase0^De0),  1); \
576         Ba = ROL32((Abi0^Di0), 31); \
577         Be = ROL32((Ago1^Do1), 28); \
578         Bi = ROL32((Aku1^Du1), 20); \
579         Ama1 =   Ba ^((~Be)&  Bi ); \
580         Ase0 =   Be ^((~Bi)&  Bo ); \
581         Abi0 =   Bi ^((~Bo)&  Bu ); \
582         Ago1 =   Bo ^((~Bu)&  Ba ); \
583         Aku1 =   Bu ^((~Ba)&  Be ); \
584         Bo = ROL32((Ama0^Da0), 20); \
585         Bu = ROL32((Ase1^De1),  1); \
586         Ba = ROL32((Abi1^Di1), 31); \
587         Be = ROL32((Ago0^Do0), 27); \
588         Bi = ROL32((Aku0^Du0), 19); \
589         Ama0 =   Ba ^((~Be)&  Bi ); \
590         Ase1 =   Be ^((~Bi)&  Bo ); \
591         Abi1 =   Bi ^((~Bo)&  Bu ); \
592         Ago0 =   Bo ^((~Bu)&  Ba ); \
593         Aku0 =   Bu ^((~Ba)&  Be )
594 
595 #define KeccakRound1() \
596         Cx = Asu0^Agu0^Amu0^Abu1^Aku1; \
597         Du1 = Age1^Ame0^Abe0^Ake1^Ase1; \
598         Da0 = Cx^ROL32(Du1, 1); \
599         Cz = Asu1^Agu1^Amu1^Abu0^Aku0; \
600         Du0 = Age0^Ame1^Abe1^Ake0^Ase0; \
601         Da1 = Cz^Du0; \
602         Cw = Aki1^Asi1^Agi0^Ami1^Abi0; \
603         Do0 = Cw^ROL32(Cz, 1); \
604         Cy = Aki0^Asi0^Agi1^Ami0^Abi1; \
605         Do1 = Cy^Cx; \
606         Cx = Aba0^Aka1^Asa0^Aga0^Ama1; \
607         De0 = Cx^ROL32(Cy, 1); \
608         Cz = Aba1^Aka0^Asa1^Aga1^Ama0; \
609         De1 = Cz^Cw; \
610         Cy = Amo0^Abo1^Ako0^Aso1^Ago0; \
611         Di0 = Du0^ROL32(Cy, 1); \
612         Cw = Amo1^Abo0^Ako1^Aso0^Ago1; \
613         Di1 = Du1^Cw; \
614         Du0 = Cw^ROL32(Cz, 1); \
615         Du1 = Cy^Cx; \
616 \
617         Ba = (Aba0^Da0); \
618         Be = ROL32((Ame1^De0), 22); \
619         Bi = ROL32((Agi1^Di1), 22); \
620         Bo = ROL32((Aso1^Do1), 11); \
621         Bu = ROL32((Aku1^Du0),  7); \
622         Aba0 =   Ba ^((~Be)&  Bi ); \
623         Aba0 ^= *(pRoundConstants++); \
624         Ame1 =   Be ^((~Bi)&  Bo ); \
625         Agi1 =   Bi ^((~Bo)&  Bu ); \
626         Aso1 =   Bo ^((~Bu)&  Ba ); \
627         Aku1 =   Bu ^((~Ba)&  Be ); \
628         Ba = (Aba1^Da1); \
629         Be = ROL32((Ame0^De1), 22); \
630         Bi = ROL32((Agi0^Di0), 21); \
631         Bo = ROL32((Aso0^Do0), 10); \
632         Bu = ROL32((Aku0^Du1),  7); \
633         Aba1 =   Ba ^((~Be)&  Bi ); \
634         Aba1 ^= *(pRoundConstants++); \
635         Ame0 =   Be ^((~Bi)&  Bo ); \
636         Agi0 =   Bi ^((~Bo)&  Bu ); \
637         Aso0 =   Bo ^((~Bu)&  Ba ); \
638         Aku0 =   Bu ^((~Ba)&  Be ); \
639         Bi = ROL32((Asa1^Da1),  2); \
640         Bo = ROL32((Ake1^De1), 23); \
641         Bu = ROL32((Abi1^Di1), 31); \
642         Ba = ROL32((Amo1^Do0), 14); \
643         Be = ROL32((Agu0^Du0), 10); \
644         Asa1 =   Ba ^((~Be)&  Bi ); \
645         Ake1 =   Be ^((~Bi)&  Bo ); \
646         Abi1 =   Bi ^((~Bo)&  Bu ); \
647         Amo1 =   Bo ^((~Bu)&  Ba ); \
648         Agu0 =   Bu ^((~Ba)&  Be ); \
649         Bi = ROL32((Asa0^Da0),  1); \
650         Bo = ROL32((Ake0^De0), 22); \
651         Bu = ROL32((Abi0^Di0), 30); \
652         Ba = ROL32((Amo0^Do1), 14); \
653         Be = ROL32((Agu1^Du1), 10); \
654         Asa0 =   Ba ^((~Be)&  Bi ); \
655         Ake0 =   Be ^((~Bi)&  Bo ); \
656         Abi0 =   Bi ^((~Bo)&  Bu ); \
657         Amo0 =   Bo ^((~Bu)&  Ba ); \
658         Agu1 =   Bu ^((~Ba)&  Be ); \
659         Bu = ROL32((Ama1^Da0),  9); \
660         Ba = ROL32((Age1^De1),  1); \
661         Be = ROL32((Asi1^Di0),  3); \
662         Bi = ROL32((Ako0^Do1), 13); \
663         Bo = ROL32((Abu1^Du0),  4); \
664         Ama1 =   Ba ^((~Be)&  Bi ); \
665         Age1 =   Be ^((~Bi)&  Bo ); \
666         Asi1 =   Bi ^((~Bo)&  Bu ); \
667         Ako0 =   Bo ^((~Bu)&  Ba ); \
668         Abu1 =   Bu ^((~Ba)&  Be ); \
669         Bu = ROL32((Ama0^Da1),  9); \
670         Ba = (Age0^De0); \
671         Be = ROL32((Asi0^Di1),  3); \
672         Bi = ROL32((Ako1^Do0), 12); \
673         Bo = ROL32((Abu0^Du1),  4); \
674         Ama0 =   Ba ^((~Be)&  Bi ); \
675         Age0 =   Be ^((~Bi)&  Bo ); \
676         Asi0 =   Bi ^((~Bo)&  Bu ); \
677         Ako1 =   Bo ^((~Bu)&  Ba ); \
678         Abu0 =   Bu ^((~Ba)&  Be ); \
679         Be = ROL32((Aka1^Da0), 18); \
680         Bi = ROL32((Abe1^De0),  5); \
681         Bo = ROL32((Ami0^Di1),  8); \
682         Bu = ROL32((Ago1^Do0), 28); \
683         Ba = ROL32((Asu1^Du1), 14); \
684         Aka1 =   Ba ^((~Be)&  Bi ); \
685         Abe1 =   Be ^((~Bi)&  Bo ); \
686         Ami0 =   Bi ^((~Bo)&  Bu ); \
687         Ago1 =   Bo ^((~Bu)&  Ba ); \
688         Asu1 =   Bu ^((~Ba)&  Be ); \
689         Be = ROL32((Aka0^Da1), 18); \
690         Bi = ROL32((Abe0^De1),  5); \
691         Bo = ROL32((Ami1^Di0),  7); \
692         Bu = ROL32((Ago0^Do1), 28); \
693         Ba = ROL32((Asu0^Du0), 13); \
694         Aka0 =   Ba ^((~Be)&  Bi ); \
695         Abe0 =   Be ^((~Bi)&  Bo ); \
696         Ami1 =   Bi ^((~Bo)&  Bu ); \
697         Ago0 =   Bo ^((~Bu)&  Ba ); \
698         Asu0 =   Bu ^((~Ba)&  Be ); \
699         Bo = ROL32((Aga1^Da1), 21); \
700         Bu = ROL32((Ase0^De0),  1); \
701         Ba = ROL32((Aki1^Di0), 31); \
702         Be = ROL32((Abo1^Do1), 28); \
703         Bi = ROL32((Amu1^Du1), 20); \
704         Aga1 =   Ba ^((~Be)&  Bi ); \
705         Ase0 =   Be ^((~Bi)&  Bo ); \
706         Aki1 =   Bi ^((~Bo)&  Bu ); \
707         Abo1 =   Bo ^((~Bu)&  Ba ); \
708         Amu1 =   Bu ^((~Ba)&  Be ); \
709         Bo = ROL32((Aga0^Da0), 20); \
710         Bu = ROL32((Ase1^De1),  1); \
711         Ba = ROL32((Aki0^Di1), 31); \
712         Be = ROL32((Abo0^Do0), 27); \
713         Bi = ROL32((Amu0^Du0), 19); \
714         Aga0 =   Ba ^((~Be)&  Bi ); \
715         Ase1 =   Be ^((~Bi)&  Bo ); \
716         Aki0 =   Bi ^((~Bo)&  Bu ); \
717         Abo0 =   Bo ^((~Bu)&  Ba ); \
718         Amu0 =   Bu ^((~Ba)&  Be );
719 
720 #define KeccakRound2() \
721         Cx = Aku1^Agu0^Abu1^Asu1^Amu1; \
722         Du1 = Ame0^Ake0^Age0^Abe0^Ase1; \
723         Da0 = Cx^ROL32(Du1, 1); \
724         Cz = Aku0^Agu1^Abu0^Asu0^Amu0; \
725         Du0 = Ame1^Ake1^Age1^Abe1^Ase0; \
726         Da1 = Cz^Du0; \
727         Cw = Agi1^Abi1^Asi1^Ami0^Aki1; \
728         Do0 = Cw^ROL32(Cz, 1); \
729         Cy = Agi0^Abi0^Asi0^Ami1^Aki0; \
730         Do1 = Cy^Cx; \
731         Cx = Aba0^Asa1^Ama1^Aka1^Aga1; \
732         De0 = Cx^ROL32(Cy, 1); \
733         Cz = Aba1^Asa0^Ama0^Aka0^Aga0; \
734         De1 = Cz^Cw; \
735         Cy = Aso0^Amo0^Ako1^Ago0^Abo0; \
736         Di0 = Du0^ROL32(Cy, 1); \
737         Cw = Aso1^Amo1^Ako0^Ago1^Abo1; \
738         Di1 = Du1^Cw; \
739         Du0 = Cw^ROL32(Cz, 1); \
740         Du1 = Cy^Cx; \
741 \
742         Ba = (Aba0^Da0); \
743         Be = ROL32((Ake1^De0), 22); \
744         Bi = ROL32((Asi0^Di1), 22); \
745         Bo = ROL32((Ago0^Do1), 11); \
746         Bu = ROL32((Amu1^Du0),  7); \
747         Aba0 =   Ba ^((~Be)&  Bi ); \
748         Aba0 ^= *(pRoundConstants++); \
749         Ake1 =   Be ^((~Bi)&  Bo ); \
750         Asi0 =   Bi ^((~Bo)&  Bu ); \
751         Ago0 =   Bo ^((~Bu)&  Ba ); \
752         Amu1 =   Bu ^((~Ba)&  Be ); \
753         Ba = (Aba1^Da1); \
754         Be = ROL32((Ake0^De1), 22); \
755         Bi = ROL32((Asi1^Di0), 21); \
756         Bo = ROL32((Ago1^Do0), 10); \
757         Bu = ROL32((Amu0^Du1),  7); \
758         Aba1 =   Ba ^((~Be)&  Bi ); \
759         Aba1 ^= *(pRoundConstants++); \
760         Ake0 =   Be ^((~Bi)&  Bo ); \
761         Asi1 =   Bi ^((~Bo)&  Bu ); \
762         Ago1 =   Bo ^((~Bu)&  Ba ); \
763         Amu0 =   Bu ^((~Ba)&  Be ); \
764         Bi = ROL32((Ama0^Da1),  2); \
765         Bo = ROL32((Abe0^De1), 23); \
766         Bu = ROL32((Aki0^Di1), 31); \
767         Ba = ROL32((Aso1^Do0), 14); \
768         Be = ROL32((Agu0^Du0), 10); \
769         Ama0 =   Ba ^((~Be)&  Bi ); \
770         Abe0 =   Be ^((~Bi)&  Bo ); \
771         Aki0 =   Bi ^((~Bo)&  Bu ); \
772         Aso1 =   Bo ^((~Bu)&  Ba ); \
773         Agu0 =   Bu ^((~Ba)&  Be ); \
774         Bi = ROL32((Ama1^Da0),  1); \
775         Bo = ROL32((Abe1^De0), 22); \
776         Bu = ROL32((Aki1^Di0), 30); \
777         Ba = ROL32((Aso0^Do1), 14); \
778         Be = ROL32((Agu1^Du1), 10); \
779         Ama1 =   Ba ^((~Be)&  Bi ); \
780         Abe1 =   Be ^((~Bi)&  Bo ); \
781         Aki1 =   Bi ^((~Bo)&  Bu ); \
782         Aso0 =   Bo ^((~Bu)&  Ba ); \
783         Agu1 =   Bu ^((~Ba)&  Be ); \
784         Bu = ROL32((Aga1^Da0),  9); \
785         Ba = ROL32((Ame0^De1),  1); \
786         Be = ROL32((Abi1^Di0),  3); \
787         Bi = ROL32((Ako1^Do1), 13); \
788         Bo = ROL32((Asu1^Du0),  4); \
789         Aga1 =   Ba ^((~Be)&  Bi ); \
790         Ame0 =   Be ^((~Bi)&  Bo ); \
791         Abi1 =   Bi ^((~Bo)&  Bu ); \
792         Ako1 =   Bo ^((~Bu)&  Ba ); \
793         Asu1 =   Bu ^((~Ba)&  Be ); \
794         Bu = ROL32((Aga0^Da1),  9); \
795         Ba = (Ame1^De0); \
796         Be = ROL32((Abi0^Di1),  3); \
797         Bi = ROL32((Ako0^Do0), 12); \
798         Bo = ROL32((Asu0^Du1),  4); \
799         Aga0 =   Ba ^((~Be)&  Bi ); \
800         Ame1 =   Be ^((~Bi)&  Bo ); \
801         Abi0 =   Bi ^((~Bo)&  Bu ); \
802         Ako0 =   Bo ^((~Bu)&  Ba ); \
803         Asu0 =   Bu ^((~Ba)&  Be ); \
804         Be = ROL32((Asa1^Da0), 18); \
805         Bi = ROL32((Age1^De0),  5); \
806         Bo = ROL32((Ami1^Di1),  8); \
807         Bu = ROL32((Abo1^Do0), 28); \
808         Ba = ROL32((Aku0^Du1), 14); \
809         Asa1 =   Ba ^((~Be)&  Bi ); \
810         Age1 =   Be ^((~Bi)&  Bo ); \
811         Ami1 =   Bi ^((~Bo)&  Bu ); \
812         Abo1 =   Bo ^((~Bu)&  Ba ); \
813         Aku0 =   Bu ^((~Ba)&  Be ); \
814         Be = ROL32((Asa0^Da1), 18); \
815         Bi = ROL32((Age0^De1),  5); \
816         Bo = ROL32((Ami0^Di0),  7); \
817         Bu = ROL32((Abo0^Do1), 28); \
818         Ba = ROL32((Aku1^Du0), 13); \
819         Asa0 =   Ba ^((~Be)&  Bi ); \
820         Age0 =   Be ^((~Bi)&  Bo ); \
821         Ami0 =   Bi ^((~Bo)&  Bu ); \
822         Abo0 =   Bo ^((~Bu)&  Ba ); \
823         Aku1 =   Bu ^((~Ba)&  Be ); \
824         Bo = ROL32((Aka0^Da1), 21); \
825         Bu = ROL32((Ase0^De0),  1); \
826         Ba = ROL32((Agi1^Di0), 31); \
827         Be = ROL32((Amo0^Do1), 28); \
828         Bi = ROL32((Abu0^Du1), 20); \
829         Aka0 =   Ba ^((~Be)&  Bi ); \
830         Ase0 =   Be ^((~Bi)&  Bo ); \
831         Agi1 =   Bi ^((~Bo)&  Bu ); \
832         Amo0 =   Bo ^((~Bu)&  Ba ); \
833         Abu0 =   Bu ^((~Ba)&  Be ); \
834         Bo = ROL32((Aka1^Da0), 20); \
835         Bu = ROL32((Ase1^De1),  1); \
836         Ba = ROL32((Agi0^Di1), 31); \
837         Be = ROL32((Amo1^Do0), 27); \
838         Bi = ROL32((Abu1^Du0), 19); \
839         Aka1 =   Ba ^((~Be)&  Bi ); \
840         Ase1 =   Be ^((~Bi)&  Bo ); \
841         Agi0 =   Bi ^((~Bo)&  Bu ); \
842         Amo1 =   Bo ^((~Bu)&  Ba ); \
843         Abu1 =   Bu ^((~Ba)&  Be );
844 
845 #define KeccakRound3() \
846         Cx = Amu1^Agu0^Asu1^Aku0^Abu0; \
847         Du1 = Ake0^Abe1^Ame1^Age0^Ase1; \
848         Da0 = Cx^ROL32(Du1, 1); \
849         Cz = Amu0^Agu1^Asu0^Aku1^Abu1; \
850         Du0 = Ake1^Abe0^Ame0^Age1^Ase0; \
851         Da1 = Cz^Du0; \
852         Cw = Asi0^Aki0^Abi1^Ami1^Agi1; \
853         Do0 = Cw^ROL32(Cz, 1); \
854         Cy = Asi1^Aki1^Abi0^Ami0^Agi0; \
855         Do1 = Cy^Cx; \
856         Cx = Aba0^Ama0^Aga1^Asa1^Aka0; \
857         De0 = Cx^ROL32(Cy, 1); \
858         Cz = Aba1^Ama1^Aga0^Asa0^Aka1; \
859         De1 = Cz^Cw; \
860         Cy = Ago1^Aso0^Ako0^Abo0^Amo1; \
861         Di0 = Du0^ROL32(Cy, 1); \
862         Cw = Ago0^Aso1^Ako1^Abo1^Amo0; \
863         Di1 = Du1^Cw; \
864         Du0 = Cw^ROL32(Cz, 1); \
865         Du1 = Cy^Cx; \
866 \
867         Ba = (Aba0^Da0); \
868         Be = ROL32((Abe0^De0), 22); \
869         Bi = ROL32((Abi0^Di1), 22); \
870         Bo = ROL32((Abo0^Do1), 11); \
871         Bu = ROL32((Abu0^Du0),  7); \
872         Aba0 =   Ba ^((~Be)&  Bi ); \
873         Aba0 ^= *(pRoundConstants++); \
874         Abe0 =   Be ^((~Bi)&  Bo ); \
875         Abi0 =   Bi ^((~Bo)&  Bu ); \
876         Abo0 =   Bo ^((~Bu)&  Ba ); \
877         Abu0 =   Bu ^((~Ba)&  Be ); \
878         Ba = (Aba1^Da1); \
879         Be = ROL32((Abe1^De1), 22); \
880         Bi = ROL32((Abi1^Di0), 21); \
881         Bo = ROL32((Abo1^Do0), 10); \
882         Bu = ROL32((Abu1^Du1),  7); \
883         Aba1 =   Ba ^((~Be)&  Bi ); \
884         Aba1 ^= *(pRoundConstants++); \
885         Abe1 =   Be ^((~Bi)&  Bo ); \
886         Abi1 =   Bi ^((~Bo)&  Bu ); \
887         Abo1 =   Bo ^((~Bu)&  Ba ); \
888         Abu1 =   Bu ^((~Ba)&  Be ); \
889         Bi = ROL32((Aga0^Da1),  2); \
890         Bo = ROL32((Age0^De1), 23); \
891         Bu = ROL32((Agi0^Di1), 31); \
892         Ba = ROL32((Ago0^Do0), 14); \
893         Be = ROL32((Agu0^Du0), 10); \
894         Aga0 =   Ba ^((~Be)&  Bi ); \
895         Age0 =   Be ^((~Bi)&  Bo ); \
896         Agi0 =   Bi ^((~Bo)&  Bu ); \
897         Ago0 =   Bo ^((~Bu)&  Ba ); \
898         Agu0 =   Bu ^((~Ba)&  Be ); \
899         Bi = ROL32((Aga1^Da0),  1); \
900         Bo = ROL32((Age1^De0), 22); \
901         Bu = ROL32((Agi1^Di0), 30); \
902         Ba = ROL32((Ago1^Do1), 14); \
903         Be = ROL32((Agu1^Du1), 10); \
904         Aga1 =   Ba ^((~Be)&  Bi ); \
905         Age1 =   Be ^((~Bi)&  Bo ); \
906         Agi1 =   Bi ^((~Bo)&  Bu ); \
907         Ago1 =   Bo ^((~Bu)&  Ba ); \
908         Agu1 =   Bu ^((~Ba)&  Be ); \
909         Bu = ROL32((Aka0^Da0),  9); \
910         Ba = ROL32((Ake0^De1),  1); \
911         Be = ROL32((Aki0^Di0),  3); \
912         Bi = ROL32((Ako0^Do1), 13); \
913         Bo = ROL32((Aku0^Du0),  4); \
914         Aka0 =   Ba ^((~Be)&  Bi ); \
915         Ake0 =   Be ^((~Bi)&  Bo ); \
916         Aki0 =   Bi ^((~Bo)&  Bu ); \
917         Ako0 =   Bo ^((~Bu)&  Ba ); \
918         Aku0 =   Bu ^((~Ba)&  Be ); \
919         Bu = ROL32((Aka1^Da1),  9); \
920         Ba = (Ake1^De0); \
921         Be = ROL32((Aki1^Di1),  3); \
922         Bi = ROL32((Ako1^Do0), 12); \
923         Bo = ROL32((Aku1^Du1),  4); \
924         Aka1 =   Ba ^((~Be)&  Bi ); \
925         Ake1 =   Be ^((~Bi)&  Bo ); \
926         Aki1 =   Bi ^((~Bo)&  Bu ); \
927         Ako1 =   Bo ^((~Bu)&  Ba ); \
928         Aku1 =   Bu ^((~Ba)&  Be ); \
929         Be = ROL32((Ama0^Da0), 18); \
930         Bi = ROL32((Ame0^De0),  5); \
931         Bo = ROL32((Ami0^Di1),  8); \
932         Bu = ROL32((Amo0^Do0), 28); \
933         Ba = ROL32((Amu0^Du1), 14); \
934         Ama0 =   Ba ^((~Be)&  Bi ); \
935         Ame0 =   Be ^((~Bi)&  Bo ); \
936         Ami0 =   Bi ^((~Bo)&  Bu ); \
937         Amo0 =   Bo ^((~Bu)&  Ba ); \
938         Amu0 =   Bu ^((~Ba)&  Be ); \
939         Be = ROL32((Ama1^Da1), 18); \
940         Bi = ROL32((Ame1^De1),  5); \
941         Bo = ROL32((Ami1^Di0),  7); \
942         Bu = ROL32((Amo1^Do1), 28); \
943         Ba = ROL32((Amu1^Du0), 13); \
944         Ama1 =   Ba ^((~Be)&  Bi ); \
945         Ame1 =   Be ^((~Bi)&  Bo ); \
946         Ami1 =   Bi ^((~Bo)&  Bu ); \
947         Amo1 =   Bo ^((~Bu)&  Ba ); \
948         Amu1 =   Bu ^((~Ba)&  Be ); \
949         Bo = ROL32((Asa0^Da1), 21); \
950         Bu = ROL32((Ase0^De0),  1); \
951         Ba = ROL32((Asi0^Di0), 31); \
952         Be = ROL32((Aso0^Do1), 28); \
953         Bi = ROL32((Asu0^Du1), 20); \
954         Asa0 =   Ba ^((~Be)&  Bi ); \
955         Ase0 =   Be ^((~Bi)&  Bo ); \
956         Asi0 =   Bi ^((~Bo)&  Bu ); \
957         Aso0 =   Bo ^((~Bu)&  Ba ); \
958         Asu0 =   Bu ^((~Ba)&  Be ); \
959         Bo = ROL32((Asa1^Da0), 20); \
960         Bu = ROL32((Ase1^De1),  1); \
961         Ba = ROL32((Asi1^Di1), 31); \
962         Be = ROL32((Aso1^Do0), 27); \
963         Bi = ROL32((Asu1^Du0), 19); \
964         Asa1 =   Ba ^((~Be)&  Bi ); \
965         Ase1 =   Be ^((~Bi)&  Bo ); \
966         Asi1 =   Bi ^((~Bo)&  Bu ); \
967         Aso1 =   Bo ^((~Bu)&  Ba ); \
968         Asu1 =   Bu ^((~Ba)&  Be );
969 
KeccakP1600_Permute_Nrounds(void * state,unsigned int nRounds)970 void KeccakP1600_Permute_Nrounds(void *state, unsigned int nRounds)
971 {
972     UINT32 Da0, De0, Di0, Do0, Du0;
973     UINT32 Da1, De1, Di1, Do1, Du1;
974     UINT32 Ba, Be, Bi, Bo, Bu;
975     UINT32 Cx, Cy, Cz, Cw;
976     const UINT32 *pRoundConstants = KeccakF1600RoundConstants_int2+(24-nRounds)*2;
977     UINT32 *stateAsHalfLanes = (UINT32*)state;
978     #define Aba0 stateAsHalfLanes[ 0]
979     #define Aba1 stateAsHalfLanes[ 1]
980     #define Abe0 stateAsHalfLanes[ 2]
981     #define Abe1 stateAsHalfLanes[ 3]
982     #define Abi0 stateAsHalfLanes[ 4]
983     #define Abi1 stateAsHalfLanes[ 5]
984     #define Abo0 stateAsHalfLanes[ 6]
985     #define Abo1 stateAsHalfLanes[ 7]
986     #define Abu0 stateAsHalfLanes[ 8]
987     #define Abu1 stateAsHalfLanes[ 9]
988     #define Aga0 stateAsHalfLanes[10]
989     #define Aga1 stateAsHalfLanes[11]
990     #define Age0 stateAsHalfLanes[12]
991     #define Age1 stateAsHalfLanes[13]
992     #define Agi0 stateAsHalfLanes[14]
993     #define Agi1 stateAsHalfLanes[15]
994     #define Ago0 stateAsHalfLanes[16]
995     #define Ago1 stateAsHalfLanes[17]
996     #define Agu0 stateAsHalfLanes[18]
997     #define Agu1 stateAsHalfLanes[19]
998     #define Aka0 stateAsHalfLanes[20]
999     #define Aka1 stateAsHalfLanes[21]
1000     #define Ake0 stateAsHalfLanes[22]
1001     #define Ake1 stateAsHalfLanes[23]
1002     #define Aki0 stateAsHalfLanes[24]
1003     #define Aki1 stateAsHalfLanes[25]
1004     #define Ako0 stateAsHalfLanes[26]
1005     #define Ako1 stateAsHalfLanes[27]
1006     #define Aku0 stateAsHalfLanes[28]
1007     #define Aku1 stateAsHalfLanes[29]
1008     #define Ama0 stateAsHalfLanes[30]
1009     #define Ama1 stateAsHalfLanes[31]
1010     #define Ame0 stateAsHalfLanes[32]
1011     #define Ame1 stateAsHalfLanes[33]
1012     #define Ami0 stateAsHalfLanes[34]
1013     #define Ami1 stateAsHalfLanes[35]
1014     #define Amo0 stateAsHalfLanes[36]
1015     #define Amo1 stateAsHalfLanes[37]
1016     #define Amu0 stateAsHalfLanes[38]
1017     #define Amu1 stateAsHalfLanes[39]
1018     #define Asa0 stateAsHalfLanes[40]
1019     #define Asa1 stateAsHalfLanes[41]
1020     #define Ase0 stateAsHalfLanes[42]
1021     #define Ase1 stateAsHalfLanes[43]
1022     #define Asi0 stateAsHalfLanes[44]
1023     #define Asi1 stateAsHalfLanes[45]
1024     #define Aso0 stateAsHalfLanes[46]
1025     #define Aso1 stateAsHalfLanes[47]
1026     #define Asu0 stateAsHalfLanes[48]
1027     #define Asu1 stateAsHalfLanes[49]
1028 
1029     nRounds &= 3;
1030     switch ( nRounds )
1031     {
1032         #define I0 Ba
1033         #define I1 Be
1034         #define T0 Bi
1035         #define T1 Bo
1036         #define SwapPI13( in0,in1,in2,in3,eo0,eo1,eo2,eo3 ) \
1037             I0 = (in0)[0]; I1 = (in0)[1];       \
1038             T0 = (in1)[0]; T1 = (in1)[1];       \
1039             (in0)[eo0] = T0; (in0)[eo0^1] = T1; \
1040             T0 = (in2)[0]; T1 = (in2)[1];       \
1041             (in1)[eo1] = T0; (in1)[eo1^1] = T1; \
1042             T0 = (in3)[0]; T1 = (in3)[1];       \
1043             (in2)[eo2] = T0; (in2)[eo2^1] = T1; \
1044             (in3)[eo3] = I0; (in3)[eo3^1] = I1
1045         #define SwapPI2( in0,in1,in2,in3 ) \
1046             I0 = (in0)[0]; I1 = (in0)[1]; \
1047             T0 = (in1)[0]; T1 = (in1)[1]; \
1048             (in0)[1] = T0; (in0)[0] = T1; \
1049             (in1)[1] = I0; (in1)[0] = I1; \
1050             I0 = (in2)[0]; I1 = (in2)[1]; \
1051             T0 = (in3)[0]; T1 = (in3)[1]; \
1052             (in2)[1] = T0; (in2)[0] = T1; \
1053             (in3)[1] = I0; (in3)[0] = I1
1054         #define SwapEO( even,odd ) T0 = even; even = odd; odd = T0
1055 
1056         case 1:
1057             SwapPI13( &Aga0, &Aka0, &Asa0, &Ama0, 1, 0, 1, 0 );
1058             SwapPI13( &Abe0, &Age0, &Ame0, &Ake0, 0, 1, 0, 1 );
1059             SwapPI13( &Abi0, &Aki0, &Agi0, &Asi0, 1, 0, 1, 0 );
1060             SwapEO( Ami0, Ami1 );
1061             SwapPI13( &Abo0, &Amo0, &Aso0, &Ago0, 1, 0, 1, 0 );
1062             SwapEO( Ako0, Ako1 );
1063             SwapPI13( &Abu0, &Asu0, &Aku0, &Amu0, 0, 1, 0, 1 );
1064             break;
1065 
1066         case 2:
1067             SwapPI2( &Aga0, &Asa0, &Aka0, &Ama0 );
1068             SwapPI2( &Abe0, &Ame0, &Age0, &Ake0 );
1069             SwapPI2( &Abi0, &Agi0, &Aki0, &Asi0 );
1070             SwapPI2( &Abo0, &Aso0, &Ago0, &Amo0 );
1071             SwapPI2( &Abu0, &Aku0, &Amu0, &Asu0 );
1072             break;
1073 
1074         case 3:
1075             SwapPI13( &Aga0, &Ama0, &Asa0, &Aka0, 0, 1, 0, 1 );
1076             SwapPI13( &Abe0, &Ake0, &Ame0, &Age0, 1, 0, 1, 0 );
1077             SwapPI13( &Abi0, &Asi0, &Agi0, &Aki0, 0, 1, 0, 1 );
1078             SwapEO( Ami0, Ami1 );
1079             SwapPI13( &Abo0, &Ago0, &Aso0, &Amo0, 0, 1, 0, 1 );
1080             SwapEO( Ako0, Ako1 );
1081             SwapPI13( &Abu0, &Amu0, &Aku0, &Asu0, 1, 0, 1, 0 );
1082             break;
1083         #undef I0
1084         #undef I1
1085         #undef T0
1086         #undef T1
1087         #undef SwapPI13
1088         #undef SwapPI2
1089         #undef SwapEO
1090     }
1091 
1092     do
1093     {
1094         /* Code for 4 rounds, using factor 2 interleaving, 64-bit lanes mapped to 32-bit words */
1095         switch ( nRounds )
1096         {
1097             case 0: KeccakRound0(); /* fall through */
1098             case 3: KeccakRound1();
1099             case 2: KeccakRound2();
1100             case 1: KeccakRound3();
1101         }
1102         nRounds = 0;
1103     }
1104     while ( *pRoundConstants != 0xFF );
1105 
1106     #undef Aba0
1107     #undef Aba1
1108     #undef Abe0
1109     #undef Abe1
1110     #undef Abi0
1111     #undef Abi1
1112     #undef Abo0
1113     #undef Abo1
1114     #undef Abu0
1115     #undef Abu1
1116     #undef Aga0
1117     #undef Aga1
1118     #undef Age0
1119     #undef Age1
1120     #undef Agi0
1121     #undef Agi1
1122     #undef Ago0
1123     #undef Ago1
1124     #undef Agu0
1125     #undef Agu1
1126     #undef Aka0
1127     #undef Aka1
1128     #undef Ake0
1129     #undef Ake1
1130     #undef Aki0
1131     #undef Aki1
1132     #undef Ako0
1133     #undef Ako1
1134     #undef Aku0
1135     #undef Aku1
1136     #undef Ama0
1137     #undef Ama1
1138     #undef Ame0
1139     #undef Ame1
1140     #undef Ami0
1141     #undef Ami1
1142     #undef Amo0
1143     #undef Amo1
1144     #undef Amu0
1145     #undef Amu1
1146     #undef Asa0
1147     #undef Asa1
1148     #undef Ase0
1149     #undef Ase1
1150     #undef Asi0
1151     #undef Asi1
1152     #undef Aso0
1153     #undef Aso1
1154     #undef Asu0
1155     #undef Asu1
1156 }
1157 
1158 /* ---------------------------------------------------------------- */
1159 
KeccakP1600_Permute_12rounds(void * state)1160 void KeccakP1600_Permute_12rounds(void *state)
1161 {
1162      KeccakP1600_Permute_Nrounds(state, 12);
1163 }
1164 
1165 /* ---------------------------------------------------------------- */
1166 
KeccakP1600_Permute_24rounds(void * state)1167 void KeccakP1600_Permute_24rounds(void *state)
1168 {
1169      KeccakP1600_Permute_Nrounds(state, 24);
1170 }
1171