1 /*
2 Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
3 Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
4 denoted as "the implementer".
5 
6 For more information, feedback or questions, please refer to our websites:
7 http://keccak.noekeon.org/
8 http://keyak.noekeon.org/
9 http://ketje.noekeon.org/
10 
11 To the extent possible under law, the implementer has waived all copyright
12 and related or neighboring rights to the source code in this file.
13 http://creativecommons.org/publicdomain/zero/1.0/
14 */
15 
16 #include    <string.h>
17 #include "brg_endian.h"
18 #include "KeccakP-1600-SnP.h"
19 #include "SnP-Relaned.h"
20 
21 typedef unsigned char UINT8;
22 typedef unsigned int UINT32;
23 /* WARNING: on 8-bit and 16-bit platforms, this should be replaced by: */
24 /* typedef unsigned long       UINT32; */
25 
26 #define ROL32(a, offset) ((((UINT32)a) << (offset)) ^ (((UINT32)a) >> (32-(offset))))
27 
28 /* Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
29 #define prepareToBitInterleaving(low, high, temp, temp0, temp1) \
30         temp0 = (low); \
31         temp = (temp0 ^ (temp0 >>  1)) & 0x22222222UL;  temp0 = temp0 ^ temp ^ (temp <<  1); \
32         temp = (temp0 ^ (temp0 >>  2)) & 0x0C0C0C0CUL;  temp0 = temp0 ^ temp ^ (temp <<  2); \
33         temp = (temp0 ^ (temp0 >>  4)) & 0x00F000F0UL;  temp0 = temp0 ^ temp ^ (temp <<  4); \
34         temp = (temp0 ^ (temp0 >>  8)) & 0x0000FF00UL;  temp0 = temp0 ^ temp ^ (temp <<  8); \
35         temp1 = (high); \
36         temp = (temp1 ^ (temp1 >>  1)) & 0x22222222UL;  temp1 = temp1 ^ temp ^ (temp <<  1); \
37         temp = (temp1 ^ (temp1 >>  2)) & 0x0C0C0C0CUL;  temp1 = temp1 ^ temp ^ (temp <<  2); \
38         temp = (temp1 ^ (temp1 >>  4)) & 0x00F000F0UL;  temp1 = temp1 ^ temp ^ (temp <<  4); \
39         temp = (temp1 ^ (temp1 >>  8)) & 0x0000FF00UL;  temp1 = temp1 ^ temp ^ (temp <<  8);
40 
41 #define toBitInterleavingAndXOR(low, high, even, odd, temp, temp0, temp1) \
42         prepareToBitInterleaving(low, high, temp, temp0, temp1) \
43         even ^= (temp0 & 0x0000FFFF) | (temp1 << 16); \
44         odd ^= (temp0 >> 16) | (temp1 & 0xFFFF0000);
45 
46 #define toBitInterleavingAndAND(low, high, even, odd, temp, temp0, temp1) \
47         prepareToBitInterleaving(low, high, temp, temp0, temp1) \
48         even &= (temp0 & 0x0000FFFF) | (temp1 << 16); \
49         odd &= (temp0 >> 16) | (temp1 & 0xFFFF0000);
50 
51 #define toBitInterleavingAndSet(low, high, even, odd, temp, temp0, temp1) \
52         prepareToBitInterleaving(low, high, temp, temp0, temp1) \
53         even = (temp0 & 0x0000FFFF) | (temp1 << 16); \
54         odd = (temp0 >> 16) | (temp1 & 0xFFFF0000);
55 
56 /* Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
57 #define prepareFromBitInterleaving(even, odd, temp, temp0, temp1) \
58         temp0 = (even); \
59         temp1 = (odd); \
60         temp = (temp0 & 0x0000FFFF) | (temp1 << 16); \
61         temp1 = (temp0 >> 16) | (temp1 & 0xFFFF0000); \
62         temp0 = temp; \
63         temp = (temp0 ^ (temp0 >>  8)) & 0x0000FF00UL;  temp0 = temp0 ^ temp ^ (temp <<  8); \
64         temp = (temp0 ^ (temp0 >>  4)) & 0x00F000F0UL;  temp0 = temp0 ^ temp ^ (temp <<  4); \
65         temp = (temp0 ^ (temp0 >>  2)) & 0x0C0C0C0CUL;  temp0 = temp0 ^ temp ^ (temp <<  2); \
66         temp = (temp0 ^ (temp0 >>  1)) & 0x22222222UL;  temp0 = temp0 ^ temp ^ (temp <<  1); \
67         temp = (temp1 ^ (temp1 >>  8)) & 0x0000FF00UL;  temp1 = temp1 ^ temp ^ (temp <<  8); \
68         temp = (temp1 ^ (temp1 >>  4)) & 0x00F000F0UL;  temp1 = temp1 ^ temp ^ (temp <<  4); \
69         temp = (temp1 ^ (temp1 >>  2)) & 0x0C0C0C0CUL;  temp1 = temp1 ^ temp ^ (temp <<  2); \
70         temp = (temp1 ^ (temp1 >>  1)) & 0x22222222UL;  temp1 = temp1 ^ temp ^ (temp <<  1);
71 
72 #define fromBitInterleaving(even, odd, low, high, temp, temp0, temp1) \
73         prepareFromBitInterleaving(even, odd, temp, temp0, temp1) \
74         low = temp0; \
75         high = temp1;
76 
77 #define fromBitInterleavingAndXOR(even, odd, lowIn, highIn, lowOut, highOut, temp, temp0, temp1) \
78         prepareFromBitInterleaving(even, odd, temp, temp0, temp1) \
79         lowOut = lowIn ^ temp0; \
80         highOut = highIn ^ temp1;
81 
KeccakP1600_SetBytesInLaneToZero(void * state,unsigned int lanePosition,unsigned int offset,unsigned int length)82 void KeccakP1600_SetBytesInLaneToZero(void *state, unsigned int lanePosition, unsigned int offset, unsigned int length)
83 {
84     UINT8 laneAsBytes[8];
85     UINT32 low, high;
86     UINT32 temp, temp0, temp1;
87     UINT32 *stateAsHalfLanes = (UINT32*)state;
88 
89     memset(laneAsBytes, 0xFF, offset);
90     memset(laneAsBytes+offset, 0x00, length);
91     memset(laneAsBytes+offset+length, 0xFF, 8-offset-length);
92 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
93     low = *((UINT32*)(laneAsBytes+0));
94     high = *((UINT32*)(laneAsBytes+4));
95 #else
96     low = laneAsBytes[0]
97         | ((UINT32)(laneAsBytes[1]) << 8)
98         | ((UINT32)(laneAsBytes[2]) << 16)
99         | ((UINT32)(laneAsBytes[3]) << 24);
100     high = laneAsBytes[4]
101         | ((UINT32)(laneAsBytes[5]) << 8)
102         | ((UINT32)(laneAsBytes[6]) << 16)
103         | ((UINT32)(laneAsBytes[7]) << 24);
104 #endif
105     toBitInterleavingAndAND(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1);
106 }
107 
108 /* ---------------------------------------------------------------- */
109 
KeccakP1600_Initialize(void * state)110 void KeccakP1600_Initialize(void *state)
111 {
112     memset(state, 0, 200);
113 }
114 
115 /* ---------------------------------------------------------------- */
116 
KeccakP1600_AddByte(void * state,unsigned char byte,unsigned int offset)117 void KeccakP1600_AddByte(void *state, unsigned char byte, unsigned int offset)
118 {
119     unsigned int lanePosition = offset/8;
120     unsigned int offsetInLane = offset%8;
121     UINT32 low, high;
122     UINT32 temp, temp0, temp1;
123     UINT32 *stateAsHalfLanes = (UINT32*)state;
124 
125     if (offsetInLane < 4) {
126         low = (UINT32)byte << (offsetInLane*8);
127         high = 0;
128     }
129     else {
130         low = 0;
131         high = (UINT32)byte << ((offsetInLane-4)*8);
132     }
133     toBitInterleavingAndXOR(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1);
134 }
135 
136 /* ---------------------------------------------------------------- */
137 
KeccakP1600_AddBytesInLane(void * state,unsigned int lanePosition,const unsigned char * data,unsigned int offset,unsigned int length)138 void KeccakP1600_AddBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length)
139 {
140     UINT8 laneAsBytes[8];
141     UINT32 low, high;
142     UINT32 temp, temp0, temp1;
143     UINT32 *stateAsHalfLanes = (UINT32*)state;
144 
145     memset(laneAsBytes, 0, 8);
146     memcpy(laneAsBytes+offset, data, length);
147 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
148     low = *((UINT32*)(laneAsBytes+0));
149     high = *((UINT32*)(laneAsBytes+4));
150 #else
151     low = laneAsBytes[0]
152         | ((UINT32)(laneAsBytes[1]) << 8)
153         | ((UINT32)(laneAsBytes[2]) << 16)
154         | ((UINT32)(laneAsBytes[3]) << 24);
155     high = laneAsBytes[4]
156         | ((UINT32)(laneAsBytes[5]) << 8)
157         | ((UINT32)(laneAsBytes[6]) << 16)
158         | ((UINT32)(laneAsBytes[7]) << 24);
159 #endif
160     toBitInterleavingAndXOR(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1);
161 }
162 
163 /* ---------------------------------------------------------------- */
164 
KeccakP1600_AddLanes(void * state,const unsigned char * data,unsigned int laneCount)165 void KeccakP1600_AddLanes(void *state, const unsigned char *data, unsigned int laneCount)
166 {
167 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
168     const UINT32 * pI = (const UINT32 *)data;
169     UINT32 * pS = (UINT32*)state;
170     UINT32 t, x0, x1;
171     int i;
172     for (i = laneCount-1; i >= 0; --i) {
173 #ifdef NO_MISALIGNED_ACCESSES
174         UINT32 low;
175         UINT32 high;
176         memcpy(&low, pI++, 4);
177         memcpy(&high, pI++, 4);
178         toBitInterleavingAndXOR(low, high, *(pS++), *(pS++), t, x0, x1);
179 #else
180         toBitInterleavingAndXOR(*(pI++), *(pI++), *(pS++), *(pS++), t, x0, x1)
181 #endif
182     }
183 #else
184     unsigned int lanePosition;
185     for(lanePosition=0; lanePosition<laneCount; lanePosition++) {
186         UINT8 laneAsBytes[8];
187         memcpy(laneAsBytes, data+lanePosition*8, 8);
188         UINT32 low = laneAsBytes[0]
189             | ((UINT32)(laneAsBytes[1]) << 8)
190             | ((UINT32)(laneAsBytes[2]) << 16)
191             | ((UINT32)(laneAsBytes[3]) << 24);
192         UINT32 high = laneAsBytes[4]
193             | ((UINT32)(laneAsBytes[5]) << 8)
194             | ((UINT32)(laneAsBytes[6]) << 16)
195             | ((UINT32)(laneAsBytes[7]) << 24);
196         UINT32 even, odd, temp, temp0, temp1;
197         UINT32 *stateAsHalfLanes = (UINT32*)state;
198         toBitInterleavingAndXOR(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1);
199     }
200 #endif
201 }
202 
203 /* ---------------------------------------------------------------- */
204 
KeccakP1600_AddBytes(void * state,const unsigned char * data,unsigned int offset,unsigned int length)205 void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
206 {
207     SnP_AddBytes(state, data, offset, length, KeccakP1600_AddLanes, KeccakP1600_AddBytesInLane, 8);
208 }
209 
210 /* ---------------------------------------------------------------- */
211 
KeccakP1600_OverwriteBytesInLane(void * state,unsigned int lanePosition,const unsigned char * data,unsigned int offset,unsigned int length)212 void KeccakP1600_OverwriteBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length)
213 {
214     KeccakP1600_SetBytesInLaneToZero(state, lanePosition, offset, length);
215     KeccakP1600_AddBytesInLane(state, lanePosition, data, offset, length);
216 }
217 
218 /* ---------------------------------------------------------------- */
219 
KeccakP1600_OverwriteLanes(void * state,const unsigned char * data,unsigned int laneCount)220 void KeccakP1600_OverwriteLanes(void *state, const unsigned char *data, unsigned int laneCount)
221 {
222 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
223     const UINT32 * pI = (const UINT32 *)data;
224     UINT32 * pS = (UINT32 *)state;
225     UINT32 t, x0, x1;
226     int i;
227     for (i = laneCount-1; i >= 0; --i) {
228 #ifdef NO_MISALIGNED_ACCESSES
229         UINT32 low;
230         UINT32 high;
231         memcpy(&low, pI++, 4);
232         memcpy(&high, pI++, 4);
233         toBitInterleavingAndSet(low, high, *(pS++), *(pS++), t, x0, x1);
234 #else
235         toBitInterleavingAndSet(*(pI++), *(pI++), *(pS++), *(pS++), t, x0, x1)
236 #endif
237     }
238 #else
239     unsigned int lanePosition;
240     for(lanePosition=0; lanePosition<laneCount; lanePosition++) {
241         UINT8 laneAsBytes[8];
242         memcpy(laneAsBytes, data+lanePosition*8, 8);
243         UINT32 low = laneAsBytes[0]
244             | ((UINT32)(laneAsBytes[1]) << 8)
245             | ((UINT32)(laneAsBytes[2]) << 16)
246             | ((UINT32)(laneAsBytes[3]) << 24);
247         UINT32 high = laneAsBytes[4]
248             | ((UINT32)(laneAsBytes[5]) << 8)
249             | ((UINT32)(laneAsBytes[6]) << 16)
250             | ((UINT32)(laneAsBytes[7]) << 24);
251         UINT32 even, odd, temp, temp0, temp1;
252         UINT32 *stateAsHalfLanes = (UINT32*)state;
253         toBitInterleavingAndSet(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1);
254     }
255 #endif
256 }
257 
258 /* ---------------------------------------------------------------- */
259 
KeccakP1600_OverwriteBytes(void * state,const unsigned char * data,unsigned int offset,unsigned int length)260 void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
261 {
262     SnP_OverwriteBytes(state, data, offset, length, KeccakP1600_OverwriteLanes, KeccakP1600_OverwriteBytesInLane, 8);
263 }
264 
265 /* ---------------------------------------------------------------- */
266 
KeccakP1600_OverwriteWithZeroes(void * state,unsigned int byteCount)267 void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount)
268 {
269     UINT32 *stateAsHalfLanes = (UINT32*)state;
270     unsigned int i;
271 
272     for(i=0; i<byteCount/8; i++) {
273         stateAsHalfLanes[i*2+0] = 0;
274         stateAsHalfLanes[i*2+1] = 0;
275     }
276     if (byteCount%8 != 0)
277         KeccakP1600_SetBytesInLaneToZero(state, byteCount/8, 0, byteCount%8);
278 }
279 
280 /* ---------------------------------------------------------------- */
281 
KeccakP1600_ExtractBytesInLane(const void * state,unsigned int lanePosition,unsigned char * data,unsigned int offset,unsigned int length)282 void KeccakP1600_ExtractBytesInLane(const void *state, unsigned int lanePosition, unsigned char *data, unsigned int offset, unsigned int length)
283 {
284     UINT32 *stateAsHalfLanes = (UINT32*)state;
285     UINT32 low, high, temp, temp0, temp1;
286     UINT8 laneAsBytes[8];
287 
288     fromBitInterleaving(stateAsHalfLanes[lanePosition*2], stateAsHalfLanes[lanePosition*2+1], low, high, temp, temp0, temp1);
289 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
290     *((UINT32*)(laneAsBytes+0)) = low;
291     *((UINT32*)(laneAsBytes+4)) = high;
292 #else
293     laneAsBytes[0] = low & 0xFF;
294     laneAsBytes[1] = (low >> 8) & 0xFF;
295     laneAsBytes[2] = (low >> 16) & 0xFF;
296     laneAsBytes[3] = (low >> 24) & 0xFF;
297     laneAsBytes[4] = high & 0xFF;
298     laneAsBytes[5] = (high >> 8) & 0xFF;
299     laneAsBytes[6] = (high >> 16) & 0xFF;
300     laneAsBytes[7] = (high >> 24) & 0xFF;
301 #endif
302     memcpy(data, laneAsBytes+offset, length);
303 }
304 
305 /* ---------------------------------------------------------------- */
306 
KeccakP1600_ExtractLanes(const void * state,unsigned char * data,unsigned int laneCount)307 void KeccakP1600_ExtractLanes(const void *state, unsigned char *data, unsigned int laneCount)
308 {
309 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
310     UINT32 * pI = (UINT32 *)data;
311     const UINT32 * pS = ( const UINT32 *)state;
312     UINT32 t, x0, x1;
313     int i;
314     for (i = laneCount-1; i >= 0; --i) {
315 #ifdef NO_MISALIGNED_ACCESSES
316         UINT32 low;
317         UINT32 high;
318         fromBitInterleaving(*(pS++), *(pS++), low, high, t, x0, x1);
319         memcpy(pI++, &low, 4);
320         memcpy(pI++, &high, 4);
321 #else
322         fromBitInterleaving(*(pS++), *(pS++), *(pI++), *(pI++), t, x0, x1)
323 #endif
324     }
325 #else
326     unsigned int lanePosition;
327     for(lanePosition=0; lanePosition<laneCount; lanePosition++) {
328         UINT32 *stateAsHalfLanes = (UINT32*)state;
329         UINT32 low, high, temp, temp0, temp1;
330         fromBitInterleaving(stateAsHalfLanes[lanePosition*2], stateAsHalfLanes[lanePosition*2+1], low, high, temp, temp0, temp1);
331         UINT8 laneAsBytes[8];
332         laneAsBytes[0] = low & 0xFF;
333         laneAsBytes[1] = (low >> 8) & 0xFF;
334         laneAsBytes[2] = (low >> 16) & 0xFF;
335         laneAsBytes[3] = (low >> 24) & 0xFF;
336         laneAsBytes[4] = high & 0xFF;
337         laneAsBytes[5] = (high >> 8) & 0xFF;
338         laneAsBytes[6] = (high >> 16) & 0xFF;
339         laneAsBytes[7] = (high >> 24) & 0xFF;
340         memcpy(data+lanePosition*8, laneAsBytes, 8);
341     }
342 #endif
343 }
344 
345 /* ---------------------------------------------------------------- */
346 
KeccakP1600_ExtractBytes(const void * state,unsigned char * data,unsigned int offset,unsigned int length)347 void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length)
348 {
349     SnP_ExtractBytes(state, data, offset, length, KeccakP1600_ExtractLanes, KeccakP1600_ExtractBytesInLane, 8);
350 }
351 
352 /* ---------------------------------------------------------------- */
353 
KeccakP1600_ExtractAndAddBytesInLane(const void * state,unsigned int lanePosition,const unsigned char * input,unsigned char * output,unsigned int offset,unsigned int length)354 void KeccakP1600_ExtractAndAddBytesInLane(const void *state, unsigned int lanePosition, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
355 {
356     UINT32 *stateAsHalfLanes = (UINT32*)state;
357     UINT32 low, high, temp, temp0, temp1;
358     UINT8 laneAsBytes[8];
359     unsigned int i;
360 
361     fromBitInterleaving(stateAsHalfLanes[lanePosition*2], stateAsHalfLanes[lanePosition*2+1], low, high, temp, temp0, temp1);
362 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
363     *((UINT32*)(laneAsBytes+0)) = low;
364     *((UINT32*)(laneAsBytes+4)) = high;
365 #else
366     laneAsBytes[0] = low & 0xFF;
367     laneAsBytes[1] = (low >> 8) & 0xFF;
368     laneAsBytes[2] = (low >> 16) & 0xFF;
369     laneAsBytes[3] = (low >> 24) & 0xFF;
370     laneAsBytes[4] = high & 0xFF;
371     laneAsBytes[5] = (high >> 8) & 0xFF;
372     laneAsBytes[6] = (high >> 16) & 0xFF;
373     laneAsBytes[7] = (high >> 24) & 0xFF;
374 #endif
375     for(i=0; i<length; i++)
376         output[i] = input[i] ^ laneAsBytes[offset+i];
377 }
378 
379 /* ---------------------------------------------------------------- */
380 
KeccakP1600_ExtractAndAddLanes(const void * state,const unsigned char * input,unsigned char * output,unsigned int laneCount)381 void KeccakP1600_ExtractAndAddLanes(const void *state, const unsigned char *input, unsigned char *output, unsigned int laneCount)
382 {
383 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
384     const UINT32 * pI = (const UINT32 *)input;
385     UINT32 * pO = (UINT32 *)output;
386     const UINT32 * pS = (const UINT32 *)state;
387     UINT32 t, x0, x1;
388     int i;
389     for (i = laneCount-1; i >= 0; --i) {
390 #ifdef NO_MISALIGNED_ACCESSES
391         UINT32 low;
392         UINT32 high;
393         fromBitInterleaving(*(pS++), *(pS++), low, high, t, x0, x1);
394         *(pO++) = *(pI++) ^ low;
395         *(pO++) = *(pI++) ^ high;
396 #else
397         fromBitInterleavingAndXOR(*(pS++), *(pS++), *(pI++), *(pI++), *(pO++), *(pO++), t, x0, x1)
398 #endif
399     }
400 #else
401     unsigned int lanePosition;
402     for(lanePosition=0; lanePosition<laneCount; lanePosition++) {
403         UINT32 *stateAsHalfLanes = (UINT32*)state;
404         UINT32 low, high, temp, temp0, temp1;
405         fromBitInterleaving(stateAsHalfLanes[lanePosition*2], stateAsHalfLanes[lanePosition*2+1], low, high, temp, temp0, temp1);
406         UINT8 laneAsBytes[8];
407         laneAsBytes[0] = low & 0xFF;
408         laneAsBytes[1] = (low >> 8) & 0xFF;
409         laneAsBytes[2] = (low >> 16) & 0xFF;
410         laneAsBytes[3] = (low >> 24) & 0xFF;
411         laneAsBytes[4] = high & 0xFF;
412         laneAsBytes[5] = (high >> 8) & 0xFF;
413         laneAsBytes[6] = (high >> 16) & 0xFF;
414         laneAsBytes[7] = (high >> 24) & 0xFF;
415         ((UINT32*)(output+lanePosition*8))[0] = ((UINT32*)(input+lanePosition*8))[0] ^ (*(const UINT32*)(laneAsBytes+0));
416         ((UINT32*)(output+lanePosition*8))[1] = ((UINT32*)(input+lanePosition*8))[0] ^ (*(const UINT32*)(laneAsBytes+4));
417     }
418 #endif
419 }
420 /* ---------------------------------------------------------------- */
421 
KeccakP1600_ExtractAndAddBytes(const void * state,const unsigned char * input,unsigned char * output,unsigned int offset,unsigned int length)422 void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
423 {
424     SnP_ExtractAndAddBytes(state, input, output, offset, length, KeccakP1600_ExtractAndAddLanes, KeccakP1600_ExtractAndAddBytesInLane, 8);
425 }
426 
427 /* ---------------------------------------------------------------- */
428 
429 static const UINT32 KeccakF1600RoundConstants_int2[2*24+1] =
430 {
431     0x00000001UL,    0x00000000UL,
432     0x00000000UL,    0x00000089UL,
433     0x00000000UL,    0x8000008bUL,
434     0x00000000UL,    0x80008080UL,
435     0x00000001UL,    0x0000008bUL,
436     0x00000001UL,    0x00008000UL,
437     0x00000001UL,    0x80008088UL,
438     0x00000001UL,    0x80000082UL,
439     0x00000000UL,    0x0000000bUL,
440     0x00000000UL,    0x0000000aUL,
441     0x00000001UL,    0x00008082UL,
442     0x00000000UL,    0x00008003UL,
443     0x00000001UL,    0x0000808bUL,
444     0x00000001UL,    0x8000000bUL,
445     0x00000001UL,    0x8000008aUL,
446     0x00000001UL,    0x80000081UL,
447     0x00000000UL,    0x80000081UL,
448     0x00000000UL,    0x80000008UL,
449     0x00000000UL,    0x00000083UL,
450     0x00000000UL,    0x80008003UL,
451     0x00000001UL,    0x80008088UL,
452     0x00000000UL,    0x80000088UL,
453     0x00000001UL,    0x00008000UL,
454     0x00000000UL,    0x80008082UL,
455     0x000000FFUL
456 };
457 
458 #define KeccakRound0() \
459         Cx = Abu0^Agu0^Aku0^Amu0^Asu0; \
460         Du1 = Abe1^Age1^Ake1^Ame1^Ase1; \
461         Da0 = Cx^ROL32(Du1, 1); \
462         Cz = Abu1^Agu1^Aku1^Amu1^Asu1; \
463         Du0 = Abe0^Age0^Ake0^Ame0^Ase0; \
464         Da1 = Cz^Du0; \
465         Cw = Abi0^Agi0^Aki0^Ami0^Asi0; \
466         Do0 = Cw^ROL32(Cz, 1); \
467         Cy = Abi1^Agi1^Aki1^Ami1^Asi1; \
468         Do1 = Cy^Cx; \
469         Cx = Aba0^Aga0^Aka0^Ama0^Asa0; \
470         De0 = Cx^ROL32(Cy, 1); \
471         Cz = Aba1^Aga1^Aka1^Ama1^Asa1; \
472         De1 = Cz^Cw; \
473         Cy = Abo1^Ago1^Ako1^Amo1^Aso1; \
474         Di0 = Du0^ROL32(Cy, 1); \
475         Cw = Abo0^Ago0^Ako0^Amo0^Aso0; \
476         Di1 = Du1^Cw; \
477         Du0 = Cw^ROL32(Cz, 1); \
478         Du1 = Cy^Cx; \
479 \
480         Ba = (Aba0^Da0); \
481         Be = ROL32((Age0^De0), 22); \
482         Bi = ROL32((Aki1^Di1), 22); \
483         Bo = ROL32((Amo1^Do1), 11); \
484         Bu = ROL32((Asu0^Du0),  7); \
485         Aba0 =   Ba ^((~Be)&  Bi ); \
486         Aba0 ^= *(pRoundConstants++); \
487         Age0 =   Be ^((~Bi)&  Bo ); \
488         Aki1 =   Bi ^((~Bo)&  Bu ); \
489         Amo1 =   Bo ^((~Bu)&  Ba ); \
490         Asu0 =   Bu ^((~Ba)&  Be ); \
491         Ba = (Aba1^Da1); \
492         Be = ROL32((Age1^De1), 22); \
493         Bi = ROL32((Aki0^Di0), 21); \
494         Bo = ROL32((Amo0^Do0), 10); \
495         Bu = ROL32((Asu1^Du1),  7); \
496         Aba1 =   Ba ^((~Be)&  Bi ); \
497         Aba1 ^= *(pRoundConstants++); \
498         Age1 =   Be ^((~Bi)&  Bo ); \
499         Aki0 =   Bi ^((~Bo)&  Bu ); \
500         Amo0 =   Bo ^((~Bu)&  Ba ); \
501         Asu1 =   Bu ^((~Ba)&  Be ); \
502         Bi = ROL32((Aka1^Da1),  2); \
503         Bo = ROL32((Ame1^De1), 23); \
504         Bu = ROL32((Asi1^Di1), 31); \
505         Ba = ROL32((Abo0^Do0), 14); \
506         Be = ROL32((Agu0^Du0), 10); \
507         Aka1 =   Ba ^((~Be)&  Bi ); \
508         Ame1 =   Be ^((~Bi)&  Bo ); \
509         Asi1 =   Bi ^((~Bo)&  Bu ); \
510         Abo0 =   Bo ^((~Bu)&  Ba ); \
511         Agu0 =   Bu ^((~Ba)&  Be ); \
512         Bi = ROL32((Aka0^Da0),  1); \
513         Bo = ROL32((Ame0^De0), 22); \
514         Bu = ROL32((Asi0^Di0), 30); \
515         Ba = ROL32((Abo1^Do1), 14); \
516         Be = ROL32((Agu1^Du1), 10); \
517         Aka0 =   Ba ^((~Be)&  Bi ); \
518         Ame0 =   Be ^((~Bi)&  Bo ); \
519         Asi0 =   Bi ^((~Bo)&  Bu ); \
520         Abo1 =   Bo ^((~Bu)&  Ba ); \
521         Agu1 =   Bu ^((~Ba)&  Be ); \
522         Bu = ROL32((Asa0^Da0),  9); \
523         Ba = ROL32((Abe1^De1),  1); \
524         Be = ROL32((Agi0^Di0),  3); \
525         Bi = ROL32((Ako1^Do1), 13); \
526         Bo = ROL32((Amu0^Du0),  4); \
527         Asa0 =   Ba ^((~Be)&  Bi ); \
528         Abe1 =   Be ^((~Bi)&  Bo ); \
529         Agi0 =   Bi ^((~Bo)&  Bu ); \
530         Ako1 =   Bo ^((~Bu)&  Ba ); \
531         Amu0 =   Bu ^((~Ba)&  Be ); \
532         Bu = ROL32((Asa1^Da1),  9); \
533         Ba = (Abe0^De0); \
534         Be = ROL32((Agi1^Di1),  3); \
535         Bi = ROL32((Ako0^Do0), 12); \
536         Bo = ROL32((Amu1^Du1),  4); \
537         Asa1 =   Ba ^((~Be)&  Bi ); \
538         Abe0 =   Be ^((~Bi)&  Bo ); \
539         Agi1 =   Bi ^((~Bo)&  Bu ); \
540         Ako0 =   Bo ^((~Bu)&  Ba ); \
541         Amu1 =   Bu ^((~Ba)&  Be ); \
542         Be = ROL32((Aga0^Da0), 18); \
543         Bi = ROL32((Ake0^De0),  5); \
544         Bo = ROL32((Ami1^Di1),  8); \
545         Bu = ROL32((Aso0^Do0), 28); \
546         Ba = ROL32((Abu1^Du1), 14); \
547         Aga0 =   Ba ^((~Be)&  Bi ); \
548         Ake0 =   Be ^((~Bi)&  Bo ); \
549         Ami1 =   Bi ^((~Bo)&  Bu ); \
550         Aso0 =   Bo ^((~Bu)&  Ba ); \
551         Abu1 =   Bu ^((~Ba)&  Be ); \
552         Be = ROL32((Aga1^Da1), 18); \
553         Bi = ROL32((Ake1^De1),  5); \
554         Bo = ROL32((Ami0^Di0),  7); \
555         Bu = ROL32((Aso1^Do1), 28); \
556         Ba = ROL32((Abu0^Du0), 13); \
557         Aga1 =   Ba ^((~Be)&  Bi ); \
558         Ake1 =   Be ^((~Bi)&  Bo ); \
559         Ami0 =   Bi ^((~Bo)&  Bu ); \
560         Aso1 =   Bo ^((~Bu)&  Ba ); \
561         Abu0 =   Bu ^((~Ba)&  Be ); \
562         Bo = ROL32((Ama1^Da1), 21); \
563         Bu = ROL32((Ase0^De0),  1); \
564         Ba = ROL32((Abi0^Di0), 31); \
565         Be = ROL32((Ago1^Do1), 28); \
566         Bi = ROL32((Aku1^Du1), 20); \
567         Ama1 =   Ba ^((~Be)&  Bi ); \
568         Ase0 =   Be ^((~Bi)&  Bo ); \
569         Abi0 =   Bi ^((~Bo)&  Bu ); \
570         Ago1 =   Bo ^((~Bu)&  Ba ); \
571         Aku1 =   Bu ^((~Ba)&  Be ); \
572         Bo = ROL32((Ama0^Da0), 20); \
573         Bu = ROL32((Ase1^De1),  1); \
574         Ba = ROL32((Abi1^Di1), 31); \
575         Be = ROL32((Ago0^Do0), 27); \
576         Bi = ROL32((Aku0^Du0), 19); \
577         Ama0 =   Ba ^((~Be)&  Bi ); \
578         Ase1 =   Be ^((~Bi)&  Bo ); \
579         Abi1 =   Bi ^((~Bo)&  Bu ); \
580         Ago0 =   Bo ^((~Bu)&  Ba ); \
581         Aku0 =   Bu ^((~Ba)&  Be )
582 
583 #define KeccakRound1() \
584         Cx = Asu0^Agu0^Amu0^Abu1^Aku1; \
585         Du1 = Age1^Ame0^Abe0^Ake1^Ase1; \
586         Da0 = Cx^ROL32(Du1, 1); \
587         Cz = Asu1^Agu1^Amu1^Abu0^Aku0; \
588         Du0 = Age0^Ame1^Abe1^Ake0^Ase0; \
589         Da1 = Cz^Du0; \
590         Cw = Aki1^Asi1^Agi0^Ami1^Abi0; \
591         Do0 = Cw^ROL32(Cz, 1); \
592         Cy = Aki0^Asi0^Agi1^Ami0^Abi1; \
593         Do1 = Cy^Cx; \
594         Cx = Aba0^Aka1^Asa0^Aga0^Ama1; \
595         De0 = Cx^ROL32(Cy, 1); \
596         Cz = Aba1^Aka0^Asa1^Aga1^Ama0; \
597         De1 = Cz^Cw; \
598         Cy = Amo0^Abo1^Ako0^Aso1^Ago0; \
599         Di0 = Du0^ROL32(Cy, 1); \
600         Cw = Amo1^Abo0^Ako1^Aso0^Ago1; \
601         Di1 = Du1^Cw; \
602         Du0 = Cw^ROL32(Cz, 1); \
603         Du1 = Cy^Cx; \
604 \
605         Ba = (Aba0^Da0); \
606         Be = ROL32((Ame1^De0), 22); \
607         Bi = ROL32((Agi1^Di1), 22); \
608         Bo = ROL32((Aso1^Do1), 11); \
609         Bu = ROL32((Aku1^Du0),  7); \
610         Aba0 =   Ba ^((~Be)&  Bi ); \
611         Aba0 ^= *(pRoundConstants++); \
612         Ame1 =   Be ^((~Bi)&  Bo ); \
613         Agi1 =   Bi ^((~Bo)&  Bu ); \
614         Aso1 =   Bo ^((~Bu)&  Ba ); \
615         Aku1 =   Bu ^((~Ba)&  Be ); \
616         Ba = (Aba1^Da1); \
617         Be = ROL32((Ame0^De1), 22); \
618         Bi = ROL32((Agi0^Di0), 21); \
619         Bo = ROL32((Aso0^Do0), 10); \
620         Bu = ROL32((Aku0^Du1),  7); \
621         Aba1 =   Ba ^((~Be)&  Bi ); \
622         Aba1 ^= *(pRoundConstants++); \
623         Ame0 =   Be ^((~Bi)&  Bo ); \
624         Agi0 =   Bi ^((~Bo)&  Bu ); \
625         Aso0 =   Bo ^((~Bu)&  Ba ); \
626         Aku0 =   Bu ^((~Ba)&  Be ); \
627         Bi = ROL32((Asa1^Da1),  2); \
628         Bo = ROL32((Ake1^De1), 23); \
629         Bu = ROL32((Abi1^Di1), 31); \
630         Ba = ROL32((Amo1^Do0), 14); \
631         Be = ROL32((Agu0^Du0), 10); \
632         Asa1 =   Ba ^((~Be)&  Bi ); \
633         Ake1 =   Be ^((~Bi)&  Bo ); \
634         Abi1 =   Bi ^((~Bo)&  Bu ); \
635         Amo1 =   Bo ^((~Bu)&  Ba ); \
636         Agu0 =   Bu ^((~Ba)&  Be ); \
637         Bi = ROL32((Asa0^Da0),  1); \
638         Bo = ROL32((Ake0^De0), 22); \
639         Bu = ROL32((Abi0^Di0), 30); \
640         Ba = ROL32((Amo0^Do1), 14); \
641         Be = ROL32((Agu1^Du1), 10); \
642         Asa0 =   Ba ^((~Be)&  Bi ); \
643         Ake0 =   Be ^((~Bi)&  Bo ); \
644         Abi0 =   Bi ^((~Bo)&  Bu ); \
645         Amo0 =   Bo ^((~Bu)&  Ba ); \
646         Agu1 =   Bu ^((~Ba)&  Be ); \
647         Bu = ROL32((Ama1^Da0),  9); \
648         Ba = ROL32((Age1^De1),  1); \
649         Be = ROL32((Asi1^Di0),  3); \
650         Bi = ROL32((Ako0^Do1), 13); \
651         Bo = ROL32((Abu1^Du0),  4); \
652         Ama1 =   Ba ^((~Be)&  Bi ); \
653         Age1 =   Be ^((~Bi)&  Bo ); \
654         Asi1 =   Bi ^((~Bo)&  Bu ); \
655         Ako0 =   Bo ^((~Bu)&  Ba ); \
656         Abu1 =   Bu ^((~Ba)&  Be ); \
657         Bu = ROL32((Ama0^Da1),  9); \
658         Ba = (Age0^De0); \
659         Be = ROL32((Asi0^Di1),  3); \
660         Bi = ROL32((Ako1^Do0), 12); \
661         Bo = ROL32((Abu0^Du1),  4); \
662         Ama0 =   Ba ^((~Be)&  Bi ); \
663         Age0 =   Be ^((~Bi)&  Bo ); \
664         Asi0 =   Bi ^((~Bo)&  Bu ); \
665         Ako1 =   Bo ^((~Bu)&  Ba ); \
666         Abu0 =   Bu ^((~Ba)&  Be ); \
667         Be = ROL32((Aka1^Da0), 18); \
668         Bi = ROL32((Abe1^De0),  5); \
669         Bo = ROL32((Ami0^Di1),  8); \
670         Bu = ROL32((Ago1^Do0), 28); \
671         Ba = ROL32((Asu1^Du1), 14); \
672         Aka1 =   Ba ^((~Be)&  Bi ); \
673         Abe1 =   Be ^((~Bi)&  Bo ); \
674         Ami0 =   Bi ^((~Bo)&  Bu ); \
675         Ago1 =   Bo ^((~Bu)&  Ba ); \
676         Asu1 =   Bu ^((~Ba)&  Be ); \
677         Be = ROL32((Aka0^Da1), 18); \
678         Bi = ROL32((Abe0^De1),  5); \
679         Bo = ROL32((Ami1^Di0),  7); \
680         Bu = ROL32((Ago0^Do1), 28); \
681         Ba = ROL32((Asu0^Du0), 13); \
682         Aka0 =   Ba ^((~Be)&  Bi ); \
683         Abe0 =   Be ^((~Bi)&  Bo ); \
684         Ami1 =   Bi ^((~Bo)&  Bu ); \
685         Ago0 =   Bo ^((~Bu)&  Ba ); \
686         Asu0 =   Bu ^((~Ba)&  Be ); \
687         Bo = ROL32((Aga1^Da1), 21); \
688         Bu = ROL32((Ase0^De0),  1); \
689         Ba = ROL32((Aki1^Di0), 31); \
690         Be = ROL32((Abo1^Do1), 28); \
691         Bi = ROL32((Amu1^Du1), 20); \
692         Aga1 =   Ba ^((~Be)&  Bi ); \
693         Ase0 =   Be ^((~Bi)&  Bo ); \
694         Aki1 =   Bi ^((~Bo)&  Bu ); \
695         Abo1 =   Bo ^((~Bu)&  Ba ); \
696         Amu1 =   Bu ^((~Ba)&  Be ); \
697         Bo = ROL32((Aga0^Da0), 20); \
698         Bu = ROL32((Ase1^De1),  1); \
699         Ba = ROL32((Aki0^Di1), 31); \
700         Be = ROL32((Abo0^Do0), 27); \
701         Bi = ROL32((Amu0^Du0), 19); \
702         Aga0 =   Ba ^((~Be)&  Bi ); \
703         Ase1 =   Be ^((~Bi)&  Bo ); \
704         Aki0 =   Bi ^((~Bo)&  Bu ); \
705         Abo0 =   Bo ^((~Bu)&  Ba ); \
706         Amu0 =   Bu ^((~Ba)&  Be );
707 
708 #define KeccakRound2() \
709         Cx = Aku1^Agu0^Abu1^Asu1^Amu1; \
710         Du1 = Ame0^Ake0^Age0^Abe0^Ase1; \
711         Da0 = Cx^ROL32(Du1, 1); \
712         Cz = Aku0^Agu1^Abu0^Asu0^Amu0; \
713         Du0 = Ame1^Ake1^Age1^Abe1^Ase0; \
714         Da1 = Cz^Du0; \
715         Cw = Agi1^Abi1^Asi1^Ami0^Aki1; \
716         Do0 = Cw^ROL32(Cz, 1); \
717         Cy = Agi0^Abi0^Asi0^Ami1^Aki0; \
718         Do1 = Cy^Cx; \
719         Cx = Aba0^Asa1^Ama1^Aka1^Aga1; \
720         De0 = Cx^ROL32(Cy, 1); \
721         Cz = Aba1^Asa0^Ama0^Aka0^Aga0; \
722         De1 = Cz^Cw; \
723         Cy = Aso0^Amo0^Ako1^Ago0^Abo0; \
724         Di0 = Du0^ROL32(Cy, 1); \
725         Cw = Aso1^Amo1^Ako0^Ago1^Abo1; \
726         Di1 = Du1^Cw; \
727         Du0 = Cw^ROL32(Cz, 1); \
728         Du1 = Cy^Cx; \
729 \
730         Ba = (Aba0^Da0); \
731         Be = ROL32((Ake1^De0), 22); \
732         Bi = ROL32((Asi0^Di1), 22); \
733         Bo = ROL32((Ago0^Do1), 11); \
734         Bu = ROL32((Amu1^Du0),  7); \
735         Aba0 =   Ba ^((~Be)&  Bi ); \
736         Aba0 ^= *(pRoundConstants++); \
737         Ake1 =   Be ^((~Bi)&  Bo ); \
738         Asi0 =   Bi ^((~Bo)&  Bu ); \
739         Ago0 =   Bo ^((~Bu)&  Ba ); \
740         Amu1 =   Bu ^((~Ba)&  Be ); \
741         Ba = (Aba1^Da1); \
742         Be = ROL32((Ake0^De1), 22); \
743         Bi = ROL32((Asi1^Di0), 21); \
744         Bo = ROL32((Ago1^Do0), 10); \
745         Bu = ROL32((Amu0^Du1),  7); \
746         Aba1 =   Ba ^((~Be)&  Bi ); \
747         Aba1 ^= *(pRoundConstants++); \
748         Ake0 =   Be ^((~Bi)&  Bo ); \
749         Asi1 =   Bi ^((~Bo)&  Bu ); \
750         Ago1 =   Bo ^((~Bu)&  Ba ); \
751         Amu0 =   Bu ^((~Ba)&  Be ); \
752         Bi = ROL32((Ama0^Da1),  2); \
753         Bo = ROL32((Abe0^De1), 23); \
754         Bu = ROL32((Aki0^Di1), 31); \
755         Ba = ROL32((Aso1^Do0), 14); \
756         Be = ROL32((Agu0^Du0), 10); \
757         Ama0 =   Ba ^((~Be)&  Bi ); \
758         Abe0 =   Be ^((~Bi)&  Bo ); \
759         Aki0 =   Bi ^((~Bo)&  Bu ); \
760         Aso1 =   Bo ^((~Bu)&  Ba ); \
761         Agu0 =   Bu ^((~Ba)&  Be ); \
762         Bi = ROL32((Ama1^Da0),  1); \
763         Bo = ROL32((Abe1^De0), 22); \
764         Bu = ROL32((Aki1^Di0), 30); \
765         Ba = ROL32((Aso0^Do1), 14); \
766         Be = ROL32((Agu1^Du1), 10); \
767         Ama1 =   Ba ^((~Be)&  Bi ); \
768         Abe1 =   Be ^((~Bi)&  Bo ); \
769         Aki1 =   Bi ^((~Bo)&  Bu ); \
770         Aso0 =   Bo ^((~Bu)&  Ba ); \
771         Agu1 =   Bu ^((~Ba)&  Be ); \
772         Bu = ROL32((Aga1^Da0),  9); \
773         Ba = ROL32((Ame0^De1),  1); \
774         Be = ROL32((Abi1^Di0),  3); \
775         Bi = ROL32((Ako1^Do1), 13); \
776         Bo = ROL32((Asu1^Du0),  4); \
777         Aga1 =   Ba ^((~Be)&  Bi ); \
778         Ame0 =   Be ^((~Bi)&  Bo ); \
779         Abi1 =   Bi ^((~Bo)&  Bu ); \
780         Ako1 =   Bo ^((~Bu)&  Ba ); \
781         Asu1 =   Bu ^((~Ba)&  Be ); \
782         Bu = ROL32((Aga0^Da1),  9); \
783         Ba = (Ame1^De0); \
784         Be = ROL32((Abi0^Di1),  3); \
785         Bi = ROL32((Ako0^Do0), 12); \
786         Bo = ROL32((Asu0^Du1),  4); \
787         Aga0 =   Ba ^((~Be)&  Bi ); \
788         Ame1 =   Be ^((~Bi)&  Bo ); \
789         Abi0 =   Bi ^((~Bo)&  Bu ); \
790         Ako0 =   Bo ^((~Bu)&  Ba ); \
791         Asu0 =   Bu ^((~Ba)&  Be ); \
792         Be = ROL32((Asa1^Da0), 18); \
793         Bi = ROL32((Age1^De0),  5); \
794         Bo = ROL32((Ami1^Di1),  8); \
795         Bu = ROL32((Abo1^Do0), 28); \
796         Ba = ROL32((Aku0^Du1), 14); \
797         Asa1 =   Ba ^((~Be)&  Bi ); \
798         Age1 =   Be ^((~Bi)&  Bo ); \
799         Ami1 =   Bi ^((~Bo)&  Bu ); \
800         Abo1 =   Bo ^((~Bu)&  Ba ); \
801         Aku0 =   Bu ^((~Ba)&  Be ); \
802         Be = ROL32((Asa0^Da1), 18); \
803         Bi = ROL32((Age0^De1),  5); \
804         Bo = ROL32((Ami0^Di0),  7); \
805         Bu = ROL32((Abo0^Do1), 28); \
806         Ba = ROL32((Aku1^Du0), 13); \
807         Asa0 =   Ba ^((~Be)&  Bi ); \
808         Age0 =   Be ^((~Bi)&  Bo ); \
809         Ami0 =   Bi ^((~Bo)&  Bu ); \
810         Abo0 =   Bo ^((~Bu)&  Ba ); \
811         Aku1 =   Bu ^((~Ba)&  Be ); \
812         Bo = ROL32((Aka0^Da1), 21); \
813         Bu = ROL32((Ase0^De0),  1); \
814         Ba = ROL32((Agi1^Di0), 31); \
815         Be = ROL32((Amo0^Do1), 28); \
816         Bi = ROL32((Abu0^Du1), 20); \
817         Aka0 =   Ba ^((~Be)&  Bi ); \
818         Ase0 =   Be ^((~Bi)&  Bo ); \
819         Agi1 =   Bi ^((~Bo)&  Bu ); \
820         Amo0 =   Bo ^((~Bu)&  Ba ); \
821         Abu0 =   Bu ^((~Ba)&  Be ); \
822         Bo = ROL32((Aka1^Da0), 20); \
823         Bu = ROL32((Ase1^De1),  1); \
824         Ba = ROL32((Agi0^Di1), 31); \
825         Be = ROL32((Amo1^Do0), 27); \
826         Bi = ROL32((Abu1^Du0), 19); \
827         Aka1 =   Ba ^((~Be)&  Bi ); \
828         Ase1 =   Be ^((~Bi)&  Bo ); \
829         Agi0 =   Bi ^((~Bo)&  Bu ); \
830         Amo1 =   Bo ^((~Bu)&  Ba ); \
831         Abu1 =   Bu ^((~Ba)&  Be );
832 
833 #define KeccakRound3() \
834         Cx = Amu1^Agu0^Asu1^Aku0^Abu0; \
835         Du1 = Ake0^Abe1^Ame1^Age0^Ase1; \
836         Da0 = Cx^ROL32(Du1, 1); \
837         Cz = Amu0^Agu1^Asu0^Aku1^Abu1; \
838         Du0 = Ake1^Abe0^Ame0^Age1^Ase0; \
839         Da1 = Cz^Du0; \
840         Cw = Asi0^Aki0^Abi1^Ami1^Agi1; \
841         Do0 = Cw^ROL32(Cz, 1); \
842         Cy = Asi1^Aki1^Abi0^Ami0^Agi0; \
843         Do1 = Cy^Cx; \
844         Cx = Aba0^Ama0^Aga1^Asa1^Aka0; \
845         De0 = Cx^ROL32(Cy, 1); \
846         Cz = Aba1^Ama1^Aga0^Asa0^Aka1; \
847         De1 = Cz^Cw; \
848         Cy = Ago1^Aso0^Ako0^Abo0^Amo1; \
849         Di0 = Du0^ROL32(Cy, 1); \
850         Cw = Ago0^Aso1^Ako1^Abo1^Amo0; \
851         Di1 = Du1^Cw; \
852         Du0 = Cw^ROL32(Cz, 1); \
853         Du1 = Cy^Cx; \
854 \
855         Ba = (Aba0^Da0); \
856         Be = ROL32((Abe0^De0), 22); \
857         Bi = ROL32((Abi0^Di1), 22); \
858         Bo = ROL32((Abo0^Do1), 11); \
859         Bu = ROL32((Abu0^Du0),  7); \
860         Aba0 =   Ba ^((~Be)&  Bi ); \
861         Aba0 ^= *(pRoundConstants++); \
862         Abe0 =   Be ^((~Bi)&  Bo ); \
863         Abi0 =   Bi ^((~Bo)&  Bu ); \
864         Abo0 =   Bo ^((~Bu)&  Ba ); \
865         Abu0 =   Bu ^((~Ba)&  Be ); \
866         Ba = (Aba1^Da1); \
867         Be = ROL32((Abe1^De1), 22); \
868         Bi = ROL32((Abi1^Di0), 21); \
869         Bo = ROL32((Abo1^Do0), 10); \
870         Bu = ROL32((Abu1^Du1),  7); \
871         Aba1 =   Ba ^((~Be)&  Bi ); \
872         Aba1 ^= *(pRoundConstants++); \
873         Abe1 =   Be ^((~Bi)&  Bo ); \
874         Abi1 =   Bi ^((~Bo)&  Bu ); \
875         Abo1 =   Bo ^((~Bu)&  Ba ); \
876         Abu1 =   Bu ^((~Ba)&  Be ); \
877         Bi = ROL32((Aga0^Da1),  2); \
878         Bo = ROL32((Age0^De1), 23); \
879         Bu = ROL32((Agi0^Di1), 31); \
880         Ba = ROL32((Ago0^Do0), 14); \
881         Be = ROL32((Agu0^Du0), 10); \
882         Aga0 =   Ba ^((~Be)&  Bi ); \
883         Age0 =   Be ^((~Bi)&  Bo ); \
884         Agi0 =   Bi ^((~Bo)&  Bu ); \
885         Ago0 =   Bo ^((~Bu)&  Ba ); \
886         Agu0 =   Bu ^((~Ba)&  Be ); \
887         Bi = ROL32((Aga1^Da0),  1); \
888         Bo = ROL32((Age1^De0), 22); \
889         Bu = ROL32((Agi1^Di0), 30); \
890         Ba = ROL32((Ago1^Do1), 14); \
891         Be = ROL32((Agu1^Du1), 10); \
892         Aga1 =   Ba ^((~Be)&  Bi ); \
893         Age1 =   Be ^((~Bi)&  Bo ); \
894         Agi1 =   Bi ^((~Bo)&  Bu ); \
895         Ago1 =   Bo ^((~Bu)&  Ba ); \
896         Agu1 =   Bu ^((~Ba)&  Be ); \
897         Bu = ROL32((Aka0^Da0),  9); \
898         Ba = ROL32((Ake0^De1),  1); \
899         Be = ROL32((Aki0^Di0),  3); \
900         Bi = ROL32((Ako0^Do1), 13); \
901         Bo = ROL32((Aku0^Du0),  4); \
902         Aka0 =   Ba ^((~Be)&  Bi ); \
903         Ake0 =   Be ^((~Bi)&  Bo ); \
904         Aki0 =   Bi ^((~Bo)&  Bu ); \
905         Ako0 =   Bo ^((~Bu)&  Ba ); \
906         Aku0 =   Bu ^((~Ba)&  Be ); \
907         Bu = ROL32((Aka1^Da1),  9); \
908         Ba = (Ake1^De0); \
909         Be = ROL32((Aki1^Di1),  3); \
910         Bi = ROL32((Ako1^Do0), 12); \
911         Bo = ROL32((Aku1^Du1),  4); \
912         Aka1 =   Ba ^((~Be)&  Bi ); \
913         Ake1 =   Be ^((~Bi)&  Bo ); \
914         Aki1 =   Bi ^((~Bo)&  Bu ); \
915         Ako1 =   Bo ^((~Bu)&  Ba ); \
916         Aku1 =   Bu ^((~Ba)&  Be ); \
917         Be = ROL32((Ama0^Da0), 18); \
918         Bi = ROL32((Ame0^De0),  5); \
919         Bo = ROL32((Ami0^Di1),  8); \
920         Bu = ROL32((Amo0^Do0), 28); \
921         Ba = ROL32((Amu0^Du1), 14); \
922         Ama0 =   Ba ^((~Be)&  Bi ); \
923         Ame0 =   Be ^((~Bi)&  Bo ); \
924         Ami0 =   Bi ^((~Bo)&  Bu ); \
925         Amo0 =   Bo ^((~Bu)&  Ba ); \
926         Amu0 =   Bu ^((~Ba)&  Be ); \
927         Be = ROL32((Ama1^Da1), 18); \
928         Bi = ROL32((Ame1^De1),  5); \
929         Bo = ROL32((Ami1^Di0),  7); \
930         Bu = ROL32((Amo1^Do1), 28); \
931         Ba = ROL32((Amu1^Du0), 13); \
932         Ama1 =   Ba ^((~Be)&  Bi ); \
933         Ame1 =   Be ^((~Bi)&  Bo ); \
934         Ami1 =   Bi ^((~Bo)&  Bu ); \
935         Amo1 =   Bo ^((~Bu)&  Ba ); \
936         Amu1 =   Bu ^((~Ba)&  Be ); \
937         Bo = ROL32((Asa0^Da1), 21); \
938         Bu = ROL32((Ase0^De0),  1); \
939         Ba = ROL32((Asi0^Di0), 31); \
940         Be = ROL32((Aso0^Do1), 28); \
941         Bi = ROL32((Asu0^Du1), 20); \
942         Asa0 =   Ba ^((~Be)&  Bi ); \
943         Ase0 =   Be ^((~Bi)&  Bo ); \
944         Asi0 =   Bi ^((~Bo)&  Bu ); \
945         Aso0 =   Bo ^((~Bu)&  Ba ); \
946         Asu0 =   Bu ^((~Ba)&  Be ); \
947         Bo = ROL32((Asa1^Da0), 20); \
948         Bu = ROL32((Ase1^De1),  1); \
949         Ba = ROL32((Asi1^Di1), 31); \
950         Be = ROL32((Aso1^Do0), 27); \
951         Bi = ROL32((Asu1^Du0), 19); \
952         Asa1 =   Ba ^((~Be)&  Bi ); \
953         Ase1 =   Be ^((~Bi)&  Bo ); \
954         Asi1 =   Bi ^((~Bo)&  Bu ); \
955         Aso1 =   Bo ^((~Bu)&  Ba ); \
956         Asu1 =   Bu ^((~Ba)&  Be );
957 
KeccakP1600_Permute_Nrounds(void * state,unsigned int nRounds)958 void KeccakP1600_Permute_Nrounds(void *state, unsigned int nRounds)
959 {
960     UINT32 Da0, De0, Di0, Do0, Du0;
961     UINT32 Da1, De1, Di1, Do1, Du1;
962     UINT32 Ba, Be, Bi, Bo, Bu;
963     UINT32 Cx, Cy, Cz, Cw;
964     const UINT32 *pRoundConstants = KeccakF1600RoundConstants_int2+(24-nRounds)*2;
965     UINT32 *stateAsHalfLanes = (UINT32*)state;
966     #define Aba0 stateAsHalfLanes[ 0]
967     #define Aba1 stateAsHalfLanes[ 1]
968     #define Abe0 stateAsHalfLanes[ 2]
969     #define Abe1 stateAsHalfLanes[ 3]
970     #define Abi0 stateAsHalfLanes[ 4]
971     #define Abi1 stateAsHalfLanes[ 5]
972     #define Abo0 stateAsHalfLanes[ 6]
973     #define Abo1 stateAsHalfLanes[ 7]
974     #define Abu0 stateAsHalfLanes[ 8]
975     #define Abu1 stateAsHalfLanes[ 9]
976     #define Aga0 stateAsHalfLanes[10]
977     #define Aga1 stateAsHalfLanes[11]
978     #define Age0 stateAsHalfLanes[12]
979     #define Age1 stateAsHalfLanes[13]
980     #define Agi0 stateAsHalfLanes[14]
981     #define Agi1 stateAsHalfLanes[15]
982     #define Ago0 stateAsHalfLanes[16]
983     #define Ago1 stateAsHalfLanes[17]
984     #define Agu0 stateAsHalfLanes[18]
985     #define Agu1 stateAsHalfLanes[19]
986     #define Aka0 stateAsHalfLanes[20]
987     #define Aka1 stateAsHalfLanes[21]
988     #define Ake0 stateAsHalfLanes[22]
989     #define Ake1 stateAsHalfLanes[23]
990     #define Aki0 stateAsHalfLanes[24]
991     #define Aki1 stateAsHalfLanes[25]
992     #define Ako0 stateAsHalfLanes[26]
993     #define Ako1 stateAsHalfLanes[27]
994     #define Aku0 stateAsHalfLanes[28]
995     #define Aku1 stateAsHalfLanes[29]
996     #define Ama0 stateAsHalfLanes[30]
997     #define Ama1 stateAsHalfLanes[31]
998     #define Ame0 stateAsHalfLanes[32]
999     #define Ame1 stateAsHalfLanes[33]
1000     #define Ami0 stateAsHalfLanes[34]
1001     #define Ami1 stateAsHalfLanes[35]
1002     #define Amo0 stateAsHalfLanes[36]
1003     #define Amo1 stateAsHalfLanes[37]
1004     #define Amu0 stateAsHalfLanes[38]
1005     #define Amu1 stateAsHalfLanes[39]
1006     #define Asa0 stateAsHalfLanes[40]
1007     #define Asa1 stateAsHalfLanes[41]
1008     #define Ase0 stateAsHalfLanes[42]
1009     #define Ase1 stateAsHalfLanes[43]
1010     #define Asi0 stateAsHalfLanes[44]
1011     #define Asi1 stateAsHalfLanes[45]
1012     #define Aso0 stateAsHalfLanes[46]
1013     #define Aso1 stateAsHalfLanes[47]
1014     #define Asu0 stateAsHalfLanes[48]
1015     #define Asu1 stateAsHalfLanes[49]
1016 
1017     nRounds &= 3;
1018     switch ( nRounds )
1019     {
1020         #define I0 Ba
1021         #define I1 Be
1022         #define T0 Bi
1023         #define T1 Bo
1024         #define SwapPI13( in0,in1,in2,in3,eo0,eo1,eo2,eo3 ) \
1025             I0 = (in0)[0]; I1 = (in0)[1];       \
1026             T0 = (in1)[0]; T1 = (in1)[1];       \
1027             (in0)[eo0] = T0; (in0)[eo0^1] = T1; \
1028             T0 = (in2)[0]; T1 = (in2)[1];       \
1029             (in1)[eo1] = T0; (in1)[eo1^1] = T1; \
1030             T0 = (in3)[0]; T1 = (in3)[1];       \
1031             (in2)[eo2] = T0; (in2)[eo2^1] = T1; \
1032             (in3)[eo3] = I0; (in3)[eo3^1] = I1
1033         #define SwapPI2( in0,in1,in2,in3 ) \
1034             I0 = (in0)[0]; I1 = (in0)[1]; \
1035             T0 = (in1)[0]; T1 = (in1)[1]; \
1036             (in0)[1] = T0; (in0)[0] = T1; \
1037             (in1)[1] = I0; (in1)[0] = I1; \
1038             I0 = (in2)[0]; I1 = (in2)[1]; \
1039             T0 = (in3)[0]; T1 = (in3)[1]; \
1040             (in2)[1] = T0; (in2)[0] = T1; \
1041             (in3)[1] = I0; (in3)[0] = I1
1042         #define SwapEO( even,odd ) T0 = even; even = odd; odd = T0
1043 
1044         case 1:
1045             SwapPI13( &Aga0, &Aka0, &Asa0, &Ama0, 1, 0, 1, 0 );
1046             SwapPI13( &Abe0, &Age0, &Ame0, &Ake0, 0, 1, 0, 1 );
1047             SwapPI13( &Abi0, &Aki0, &Agi0, &Asi0, 1, 0, 1, 0 );
1048             SwapEO( Ami0, Ami1 );
1049             SwapPI13( &Abo0, &Amo0, &Aso0, &Ago0, 1, 0, 1, 0 );
1050             SwapEO( Ako0, Ako1 );
1051             SwapPI13( &Abu0, &Asu0, &Aku0, &Amu0, 0, 1, 0, 1 );
1052             break;
1053 
1054         case 2:
1055             SwapPI2( &Aga0, &Asa0, &Aka0, &Ama0 );
1056             SwapPI2( &Abe0, &Ame0, &Age0, &Ake0 );
1057             SwapPI2( &Abi0, &Agi0, &Aki0, &Asi0 );
1058             SwapPI2( &Abo0, &Aso0, &Ago0, &Amo0 );
1059             SwapPI2( &Abu0, &Aku0, &Amu0, &Asu0 );
1060             break;
1061 
1062         case 3:
1063             SwapPI13( &Aga0, &Ama0, &Asa0, &Aka0, 0, 1, 0, 1 );
1064             SwapPI13( &Abe0, &Ake0, &Ame0, &Age0, 1, 0, 1, 0 );
1065             SwapPI13( &Abi0, &Asi0, &Agi0, &Aki0, 0, 1, 0, 1 );
1066             SwapEO( Ami0, Ami1 );
1067             SwapPI13( &Abo0, &Ago0, &Aso0, &Amo0, 0, 1, 0, 1 );
1068             SwapEO( Ako0, Ako1 );
1069             SwapPI13( &Abu0, &Amu0, &Aku0, &Asu0, 1, 0, 1, 0 );
1070             break;
1071         #undef I0
1072         #undef I1
1073         #undef T0
1074         #undef T1
1075         #undef SwapPI13
1076         #undef SwapPI2
1077         #undef SwapEO
1078     }
1079 
1080     do
1081     {
1082         /* Code for 4 rounds, using factor 2 interleaving, 64-bit lanes mapped to 32-bit words */
1083         switch ( nRounds )
1084         {
1085             case 0: KeccakRound0(); /* fall through */
1086             case 3: KeccakRound1();
1087             case 2: KeccakRound2();
1088             case 1: KeccakRound3();
1089         }
1090         nRounds = 0;
1091     }
1092     while ( *pRoundConstants != 0xFF );
1093 
1094     #undef Aba0
1095     #undef Aba1
1096     #undef Abe0
1097     #undef Abe1
1098     #undef Abi0
1099     #undef Abi1
1100     #undef Abo0
1101     #undef Abo1
1102     #undef Abu0
1103     #undef Abu1
1104     #undef Aga0
1105     #undef Aga1
1106     #undef Age0
1107     #undef Age1
1108     #undef Agi0
1109     #undef Agi1
1110     #undef Ago0
1111     #undef Ago1
1112     #undef Agu0
1113     #undef Agu1
1114     #undef Aka0
1115     #undef Aka1
1116     #undef Ake0
1117     #undef Ake1
1118     #undef Aki0
1119     #undef Aki1
1120     #undef Ako0
1121     #undef Ako1
1122     #undef Aku0
1123     #undef Aku1
1124     #undef Ama0
1125     #undef Ama1
1126     #undef Ame0
1127     #undef Ame1
1128     #undef Ami0
1129     #undef Ami1
1130     #undef Amo0
1131     #undef Amo1
1132     #undef Amu0
1133     #undef Amu1
1134     #undef Asa0
1135     #undef Asa1
1136     #undef Ase0
1137     #undef Ase1
1138     #undef Asi0
1139     #undef Asi1
1140     #undef Aso0
1141     #undef Aso1
1142     #undef Asu0
1143     #undef Asu1
1144 }
1145 
1146 /* ---------------------------------------------------------------- */
1147 
KeccakP1600_Permute_12rounds(void * state)1148 void KeccakP1600_Permute_12rounds(void *state)
1149 {
1150      KeccakP1600_Permute_Nrounds(state, 12);
1151 }
1152 
1153 /* ---------------------------------------------------------------- */
1154 
KeccakP1600_Permute_24rounds(void * state)1155 void KeccakP1600_Permute_24rounds(void *state)
1156 {
1157      KeccakP1600_Permute_Nrounds(state, 24);
1158 }
1159