1 /*
2 Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
3 Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
4 denoted as "the implementer".
5
6 For more information, feedback or questions, please refer to our websites:
7 http://keccak.noekeon.org/
8 http://keyak.noekeon.org/
9 http://ketje.noekeon.org/
10
11 To the extent possible under law, the implementer has waived all copyright
12 and related or neighboring rights to the source code in this file.
13 http://creativecommons.org/publicdomain/zero/1.0/
14 */
15
16 #include <string.h>
17 #include "brg_endian.h"
18 #include "KeccakP-1600-SnP.h"
19 #include "SnP-Relaned.h"
20
21 typedef unsigned char UINT8;
22 typedef unsigned int UINT32;
23 /* WARNING: on 8-bit and 16-bit platforms, this should be replaced by: */
24 /* typedef unsigned long UINT32; */
25
26 #define ROL32(a, offset) ((((UINT32)a) << (offset)) ^ (((UINT32)a) >> (32-(offset))))
27
28 /* Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
29 #define prepareToBitInterleaving(low, high, temp, temp0, temp1) \
30 temp0 = (low); \
31 temp = (temp0 ^ (temp0 >> 1)) & 0x22222222UL; temp0 = temp0 ^ temp ^ (temp << 1); \
32 temp = (temp0 ^ (temp0 >> 2)) & 0x0C0C0C0CUL; temp0 = temp0 ^ temp ^ (temp << 2); \
33 temp = (temp0 ^ (temp0 >> 4)) & 0x00F000F0UL; temp0 = temp0 ^ temp ^ (temp << 4); \
34 temp = (temp0 ^ (temp0 >> 8)) & 0x0000FF00UL; temp0 = temp0 ^ temp ^ (temp << 8); \
35 temp1 = (high); \
36 temp = (temp1 ^ (temp1 >> 1)) & 0x22222222UL; temp1 = temp1 ^ temp ^ (temp << 1); \
37 temp = (temp1 ^ (temp1 >> 2)) & 0x0C0C0C0CUL; temp1 = temp1 ^ temp ^ (temp << 2); \
38 temp = (temp1 ^ (temp1 >> 4)) & 0x00F000F0UL; temp1 = temp1 ^ temp ^ (temp << 4); \
39 temp = (temp1 ^ (temp1 >> 8)) & 0x0000FF00UL; temp1 = temp1 ^ temp ^ (temp << 8);
40
41 #define toBitInterleavingAndXOR(low, high, even, odd, temp, temp0, temp1) \
42 prepareToBitInterleaving(low, high, temp, temp0, temp1) \
43 even ^= (temp0 & 0x0000FFFF) | (temp1 << 16); \
44 odd ^= (temp0 >> 16) | (temp1 & 0xFFFF0000);
45
46 #define toBitInterleavingAndAND(low, high, even, odd, temp, temp0, temp1) \
47 prepareToBitInterleaving(low, high, temp, temp0, temp1) \
48 even &= (temp0 & 0x0000FFFF) | (temp1 << 16); \
49 odd &= (temp0 >> 16) | (temp1 & 0xFFFF0000);
50
51 #define toBitInterleavingAndSet(low, high, even, odd, temp, temp0, temp1) \
52 prepareToBitInterleaving(low, high, temp, temp0, temp1) \
53 even = (temp0 & 0x0000FFFF) | (temp1 << 16); \
54 odd = (temp0 >> 16) | (temp1 & 0xFFFF0000);
55
56 /* Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
57 #define prepareFromBitInterleaving(even, odd, temp, temp0, temp1) \
58 temp0 = (even); \
59 temp1 = (odd); \
60 temp = (temp0 & 0x0000FFFF) | (temp1 << 16); \
61 temp1 = (temp0 >> 16) | (temp1 & 0xFFFF0000); \
62 temp0 = temp; \
63 temp = (temp0 ^ (temp0 >> 8)) & 0x0000FF00UL; temp0 = temp0 ^ temp ^ (temp << 8); \
64 temp = (temp0 ^ (temp0 >> 4)) & 0x00F000F0UL; temp0 = temp0 ^ temp ^ (temp << 4); \
65 temp = (temp0 ^ (temp0 >> 2)) & 0x0C0C0C0CUL; temp0 = temp0 ^ temp ^ (temp << 2); \
66 temp = (temp0 ^ (temp0 >> 1)) & 0x22222222UL; temp0 = temp0 ^ temp ^ (temp << 1); \
67 temp = (temp1 ^ (temp1 >> 8)) & 0x0000FF00UL; temp1 = temp1 ^ temp ^ (temp << 8); \
68 temp = (temp1 ^ (temp1 >> 4)) & 0x00F000F0UL; temp1 = temp1 ^ temp ^ (temp << 4); \
69 temp = (temp1 ^ (temp1 >> 2)) & 0x0C0C0C0CUL; temp1 = temp1 ^ temp ^ (temp << 2); \
70 temp = (temp1 ^ (temp1 >> 1)) & 0x22222222UL; temp1 = temp1 ^ temp ^ (temp << 1);
71
72 #define fromBitInterleaving(even, odd, low, high, temp, temp0, temp1) \
73 prepareFromBitInterleaving(even, odd, temp, temp0, temp1) \
74 low = temp0; \
75 high = temp1;
76
77 #define fromBitInterleavingAndXOR(even, odd, lowIn, highIn, lowOut, highOut, temp, temp0, temp1) \
78 prepareFromBitInterleaving(even, odd, temp, temp0, temp1) \
79 lowOut = lowIn ^ temp0; \
80 highOut = highIn ^ temp1;
81
KeccakP1600_SetBytesInLaneToZero(void * state,unsigned int lanePosition,unsigned int offset,unsigned int length)82 void KeccakP1600_SetBytesInLaneToZero(void *state, unsigned int lanePosition, unsigned int offset, unsigned int length)
83 {
84 UINT8 laneAsBytes[8];
85 UINT32 low, high;
86 UINT32 temp, temp0, temp1;
87 UINT32 *stateAsHalfLanes = (UINT32*)state;
88
89 memset(laneAsBytes, 0xFF, offset);
90 memset(laneAsBytes+offset, 0x00, length);
91 memset(laneAsBytes+offset+length, 0xFF, 8-offset-length);
92 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
93 low = *((UINT32*)(laneAsBytes+0));
94 high = *((UINT32*)(laneAsBytes+4));
95 #else
96 low = laneAsBytes[0]
97 | ((UINT32)(laneAsBytes[1]) << 8)
98 | ((UINT32)(laneAsBytes[2]) << 16)
99 | ((UINT32)(laneAsBytes[3]) << 24);
100 high = laneAsBytes[4]
101 | ((UINT32)(laneAsBytes[5]) << 8)
102 | ((UINT32)(laneAsBytes[6]) << 16)
103 | ((UINT32)(laneAsBytes[7]) << 24);
104 #endif
105 toBitInterleavingAndAND(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1);
106 }
107
108 /* ---------------------------------------------------------------- */
109
KeccakP1600_Initialize(void * state)110 void KeccakP1600_Initialize(void *state)
111 {
112 memset(state, 0, 200);
113 }
114
115 /* ---------------------------------------------------------------- */
116
KeccakP1600_AddByte(void * state,unsigned char byte,unsigned int offset)117 void KeccakP1600_AddByte(void *state, unsigned char byte, unsigned int offset)
118 {
119 unsigned int lanePosition = offset/8;
120 unsigned int offsetInLane = offset%8;
121 UINT32 low, high;
122 UINT32 temp, temp0, temp1;
123 UINT32 *stateAsHalfLanes = (UINT32*)state;
124
125 if (offsetInLane < 4) {
126 low = (UINT32)byte << (offsetInLane*8);
127 high = 0;
128 }
129 else {
130 low = 0;
131 high = (UINT32)byte << ((offsetInLane-4)*8);
132 }
133 toBitInterleavingAndXOR(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1);
134 }
135
136 /* ---------------------------------------------------------------- */
137
KeccakP1600_AddBytesInLane(void * state,unsigned int lanePosition,const unsigned char * data,unsigned int offset,unsigned int length)138 void KeccakP1600_AddBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length)
139 {
140 UINT8 laneAsBytes[8];
141 UINT32 low, high;
142 UINT32 temp, temp0, temp1;
143 UINT32 *stateAsHalfLanes = (UINT32*)state;
144
145 memset(laneAsBytes, 0, 8);
146 memcpy(laneAsBytes+offset, data, length);
147 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
148 low = *((UINT32*)(laneAsBytes+0));
149 high = *((UINT32*)(laneAsBytes+4));
150 #else
151 low = laneAsBytes[0]
152 | ((UINT32)(laneAsBytes[1]) << 8)
153 | ((UINT32)(laneAsBytes[2]) << 16)
154 | ((UINT32)(laneAsBytes[3]) << 24);
155 high = laneAsBytes[4]
156 | ((UINT32)(laneAsBytes[5]) << 8)
157 | ((UINT32)(laneAsBytes[6]) << 16)
158 | ((UINT32)(laneAsBytes[7]) << 24);
159 #endif
160 toBitInterleavingAndXOR(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1);
161 }
162
163 /* ---------------------------------------------------------------- */
164
KeccakP1600_AddLanes(void * state,const unsigned char * data,unsigned int laneCount)165 void KeccakP1600_AddLanes(void *state, const unsigned char *data, unsigned int laneCount)
166 {
167 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
168 const UINT32 * pI = (const UINT32 *)data;
169 UINT32 * pS = (UINT32*)state;
170 UINT32 t, x0, x1;
171 int i;
172 for (i = laneCount-1; i >= 0; --i) {
173 #ifdef NO_MISALIGNED_ACCESSES
174 UINT32 low;
175 UINT32 high;
176 memcpy(&low, pI++, 4);
177 memcpy(&high, pI++, 4);
178 toBitInterleavingAndXOR(low, high, *(pS++), *(pS++), t, x0, x1);
179 #else
180 toBitInterleavingAndXOR(*(pI++), *(pI++), *(pS++), *(pS++), t, x0, x1)
181 #endif
182 }
183 #else
184 unsigned int lanePosition;
185 for(lanePosition=0; lanePosition<laneCount; lanePosition++) {
186 UINT8 laneAsBytes[8];
187 memcpy(laneAsBytes, data+lanePosition*8, 8);
188 UINT32 low = laneAsBytes[0]
189 | ((UINT32)(laneAsBytes[1]) << 8)
190 | ((UINT32)(laneAsBytes[2]) << 16)
191 | ((UINT32)(laneAsBytes[3]) << 24);
192 UINT32 high = laneAsBytes[4]
193 | ((UINT32)(laneAsBytes[5]) << 8)
194 | ((UINT32)(laneAsBytes[6]) << 16)
195 | ((UINT32)(laneAsBytes[7]) << 24);
196 UINT32 even, odd, temp, temp0, temp1;
197 UINT32 *stateAsHalfLanes = (UINT32*)state;
198 toBitInterleavingAndXOR(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1);
199 }
200 #endif
201 }
202
203 /* ---------------------------------------------------------------- */
204
KeccakP1600_AddBytes(void * state,const unsigned char * data,unsigned int offset,unsigned int length)205 void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
206 {
207 SnP_AddBytes(state, data, offset, length, KeccakP1600_AddLanes, KeccakP1600_AddBytesInLane, 8);
208 }
209
210 /* ---------------------------------------------------------------- */
211
KeccakP1600_OverwriteBytesInLane(void * state,unsigned int lanePosition,const unsigned char * data,unsigned int offset,unsigned int length)212 void KeccakP1600_OverwriteBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length)
213 {
214 KeccakP1600_SetBytesInLaneToZero(state, lanePosition, offset, length);
215 KeccakP1600_AddBytesInLane(state, lanePosition, data, offset, length);
216 }
217
218 /* ---------------------------------------------------------------- */
219
KeccakP1600_OverwriteLanes(void * state,const unsigned char * data,unsigned int laneCount)220 void KeccakP1600_OverwriteLanes(void *state, const unsigned char *data, unsigned int laneCount)
221 {
222 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
223 const UINT32 * pI = (const UINT32 *)data;
224 UINT32 * pS = (UINT32 *)state;
225 UINT32 t, x0, x1;
226 int i;
227 for (i = laneCount-1; i >= 0; --i) {
228 #ifdef NO_MISALIGNED_ACCESSES
229 UINT32 low;
230 UINT32 high;
231 memcpy(&low, pI++, 4);
232 memcpy(&high, pI++, 4);
233 toBitInterleavingAndSet(low, high, *(pS++), *(pS++), t, x0, x1);
234 #else
235 toBitInterleavingAndSet(*(pI++), *(pI++), *(pS++), *(pS++), t, x0, x1)
236 #endif
237 }
238 #else
239 unsigned int lanePosition;
240 for(lanePosition=0; lanePosition<laneCount; lanePosition++) {
241 UINT8 laneAsBytes[8];
242 memcpy(laneAsBytes, data+lanePosition*8, 8);
243 UINT32 low = laneAsBytes[0]
244 | ((UINT32)(laneAsBytes[1]) << 8)
245 | ((UINT32)(laneAsBytes[2]) << 16)
246 | ((UINT32)(laneAsBytes[3]) << 24);
247 UINT32 high = laneAsBytes[4]
248 | ((UINT32)(laneAsBytes[5]) << 8)
249 | ((UINT32)(laneAsBytes[6]) << 16)
250 | ((UINT32)(laneAsBytes[7]) << 24);
251 UINT32 even, odd, temp, temp0, temp1;
252 UINT32 *stateAsHalfLanes = (UINT32*)state;
253 toBitInterleavingAndSet(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1);
254 }
255 #endif
256 }
257
258 /* ---------------------------------------------------------------- */
259
KeccakP1600_OverwriteBytes(void * state,const unsigned char * data,unsigned int offset,unsigned int length)260 void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
261 {
262 SnP_OverwriteBytes(state, data, offset, length, KeccakP1600_OverwriteLanes, KeccakP1600_OverwriteBytesInLane, 8);
263 }
264
265 /* ---------------------------------------------------------------- */
266
KeccakP1600_OverwriteWithZeroes(void * state,unsigned int byteCount)267 void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount)
268 {
269 UINT32 *stateAsHalfLanes = (UINT32*)state;
270 unsigned int i;
271
272 for(i=0; i<byteCount/8; i++) {
273 stateAsHalfLanes[i*2+0] = 0;
274 stateAsHalfLanes[i*2+1] = 0;
275 }
276 if (byteCount%8 != 0)
277 KeccakP1600_SetBytesInLaneToZero(state, byteCount/8, 0, byteCount%8);
278 }
279
280 /* ---------------------------------------------------------------- */
281
KeccakP1600_ExtractBytesInLane(const void * state,unsigned int lanePosition,unsigned char * data,unsigned int offset,unsigned int length)282 void KeccakP1600_ExtractBytesInLane(const void *state, unsigned int lanePosition, unsigned char *data, unsigned int offset, unsigned int length)
283 {
284 UINT32 *stateAsHalfLanes = (UINT32*)state;
285 UINT32 low, high, temp, temp0, temp1;
286 UINT8 laneAsBytes[8];
287
288 fromBitInterleaving(stateAsHalfLanes[lanePosition*2], stateAsHalfLanes[lanePosition*2+1], low, high, temp, temp0, temp1);
289 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
290 *((UINT32*)(laneAsBytes+0)) = low;
291 *((UINT32*)(laneAsBytes+4)) = high;
292 #else
293 laneAsBytes[0] = low & 0xFF;
294 laneAsBytes[1] = (low >> 8) & 0xFF;
295 laneAsBytes[2] = (low >> 16) & 0xFF;
296 laneAsBytes[3] = (low >> 24) & 0xFF;
297 laneAsBytes[4] = high & 0xFF;
298 laneAsBytes[5] = (high >> 8) & 0xFF;
299 laneAsBytes[6] = (high >> 16) & 0xFF;
300 laneAsBytes[7] = (high >> 24) & 0xFF;
301 #endif
302 memcpy(data, laneAsBytes+offset, length);
303 }
304
305 /* ---------------------------------------------------------------- */
306
KeccakP1600_ExtractLanes(const void * state,unsigned char * data,unsigned int laneCount)307 void KeccakP1600_ExtractLanes(const void *state, unsigned char *data, unsigned int laneCount)
308 {
309 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
310 UINT32 * pI = (UINT32 *)data;
311 const UINT32 * pS = ( const UINT32 *)state;
312 UINT32 t, x0, x1;
313 int i;
314 for (i = laneCount-1; i >= 0; --i) {
315 #ifdef NO_MISALIGNED_ACCESSES
316 UINT32 low;
317 UINT32 high;
318 fromBitInterleaving(*(pS++), *(pS++), low, high, t, x0, x1);
319 memcpy(pI++, &low, 4);
320 memcpy(pI++, &high, 4);
321 #else
322 fromBitInterleaving(*(pS++), *(pS++), *(pI++), *(pI++), t, x0, x1)
323 #endif
324 }
325 #else
326 unsigned int lanePosition;
327 for(lanePosition=0; lanePosition<laneCount; lanePosition++) {
328 UINT32 *stateAsHalfLanes = (UINT32*)state;
329 UINT32 low, high, temp, temp0, temp1;
330 fromBitInterleaving(stateAsHalfLanes[lanePosition*2], stateAsHalfLanes[lanePosition*2+1], low, high, temp, temp0, temp1);
331 UINT8 laneAsBytes[8];
332 laneAsBytes[0] = low & 0xFF;
333 laneAsBytes[1] = (low >> 8) & 0xFF;
334 laneAsBytes[2] = (low >> 16) & 0xFF;
335 laneAsBytes[3] = (low >> 24) & 0xFF;
336 laneAsBytes[4] = high & 0xFF;
337 laneAsBytes[5] = (high >> 8) & 0xFF;
338 laneAsBytes[6] = (high >> 16) & 0xFF;
339 laneAsBytes[7] = (high >> 24) & 0xFF;
340 memcpy(data+lanePosition*8, laneAsBytes, 8);
341 }
342 #endif
343 }
344
345 /* ---------------------------------------------------------------- */
346
KeccakP1600_ExtractBytes(const void * state,unsigned char * data,unsigned int offset,unsigned int length)347 void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length)
348 {
349 SnP_ExtractBytes(state, data, offset, length, KeccakP1600_ExtractLanes, KeccakP1600_ExtractBytesInLane, 8);
350 }
351
352 /* ---------------------------------------------------------------- */
353
KeccakP1600_ExtractAndAddBytesInLane(const void * state,unsigned int lanePosition,const unsigned char * input,unsigned char * output,unsigned int offset,unsigned int length)354 void KeccakP1600_ExtractAndAddBytesInLane(const void *state, unsigned int lanePosition, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
355 {
356 UINT32 *stateAsHalfLanes = (UINT32*)state;
357 UINT32 low, high, temp, temp0, temp1;
358 UINT8 laneAsBytes[8];
359 unsigned int i;
360
361 fromBitInterleaving(stateAsHalfLanes[lanePosition*2], stateAsHalfLanes[lanePosition*2+1], low, high, temp, temp0, temp1);
362 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
363 *((UINT32*)(laneAsBytes+0)) = low;
364 *((UINT32*)(laneAsBytes+4)) = high;
365 #else
366 laneAsBytes[0] = low & 0xFF;
367 laneAsBytes[1] = (low >> 8) & 0xFF;
368 laneAsBytes[2] = (low >> 16) & 0xFF;
369 laneAsBytes[3] = (low >> 24) & 0xFF;
370 laneAsBytes[4] = high & 0xFF;
371 laneAsBytes[5] = (high >> 8) & 0xFF;
372 laneAsBytes[6] = (high >> 16) & 0xFF;
373 laneAsBytes[7] = (high >> 24) & 0xFF;
374 #endif
375 for(i=0; i<length; i++)
376 output[i] = input[i] ^ laneAsBytes[offset+i];
377 }
378
379 /* ---------------------------------------------------------------- */
380
KeccakP1600_ExtractAndAddLanes(const void * state,const unsigned char * input,unsigned char * output,unsigned int laneCount)381 void KeccakP1600_ExtractAndAddLanes(const void *state, const unsigned char *input, unsigned char *output, unsigned int laneCount)
382 {
383 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
384 const UINT32 * pI = (const UINT32 *)input;
385 UINT32 * pO = (UINT32 *)output;
386 const UINT32 * pS = (const UINT32 *)state;
387 UINT32 t, x0, x1;
388 int i;
389 for (i = laneCount-1; i >= 0; --i) {
390 #ifdef NO_MISALIGNED_ACCESSES
391 UINT32 low;
392 UINT32 high;
393 fromBitInterleaving(*(pS++), *(pS++), low, high, t, x0, x1);
394 *(pO++) = *(pI++) ^ low;
395 *(pO++) = *(pI++) ^ high;
396 #else
397 fromBitInterleavingAndXOR(*(pS++), *(pS++), *(pI++), *(pI++), *(pO++), *(pO++), t, x0, x1)
398 #endif
399 }
400 #else
401 unsigned int lanePosition;
402 for(lanePosition=0; lanePosition<laneCount; lanePosition++) {
403 UINT32 *stateAsHalfLanes = (UINT32*)state;
404 UINT32 low, high, temp, temp0, temp1;
405 fromBitInterleaving(stateAsHalfLanes[lanePosition*2], stateAsHalfLanes[lanePosition*2+1], low, high, temp, temp0, temp1);
406 UINT8 laneAsBytes[8];
407 laneAsBytes[0] = low & 0xFF;
408 laneAsBytes[1] = (low >> 8) & 0xFF;
409 laneAsBytes[2] = (low >> 16) & 0xFF;
410 laneAsBytes[3] = (low >> 24) & 0xFF;
411 laneAsBytes[4] = high & 0xFF;
412 laneAsBytes[5] = (high >> 8) & 0xFF;
413 laneAsBytes[6] = (high >> 16) & 0xFF;
414 laneAsBytes[7] = (high >> 24) & 0xFF;
415 ((UINT32*)(output+lanePosition*8))[0] = ((UINT32*)(input+lanePosition*8))[0] ^ (*(const UINT32*)(laneAsBytes+0));
416 ((UINT32*)(output+lanePosition*8))[1] = ((UINT32*)(input+lanePosition*8))[0] ^ (*(const UINT32*)(laneAsBytes+4));
417 }
418 #endif
419 }
420 /* ---------------------------------------------------------------- */
421
KeccakP1600_ExtractAndAddBytes(const void * state,const unsigned char * input,unsigned char * output,unsigned int offset,unsigned int length)422 void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
423 {
424 SnP_ExtractAndAddBytes(state, input, output, offset, length, KeccakP1600_ExtractAndAddLanes, KeccakP1600_ExtractAndAddBytesInLane, 8);
425 }
426
427 /* ---------------------------------------------------------------- */
428
429 static const UINT32 KeccakF1600RoundConstants_int2[2*24+1] =
430 {
431 0x00000001UL, 0x00000000UL,
432 0x00000000UL, 0x00000089UL,
433 0x00000000UL, 0x8000008bUL,
434 0x00000000UL, 0x80008080UL,
435 0x00000001UL, 0x0000008bUL,
436 0x00000001UL, 0x00008000UL,
437 0x00000001UL, 0x80008088UL,
438 0x00000001UL, 0x80000082UL,
439 0x00000000UL, 0x0000000bUL,
440 0x00000000UL, 0x0000000aUL,
441 0x00000001UL, 0x00008082UL,
442 0x00000000UL, 0x00008003UL,
443 0x00000001UL, 0x0000808bUL,
444 0x00000001UL, 0x8000000bUL,
445 0x00000001UL, 0x8000008aUL,
446 0x00000001UL, 0x80000081UL,
447 0x00000000UL, 0x80000081UL,
448 0x00000000UL, 0x80000008UL,
449 0x00000000UL, 0x00000083UL,
450 0x00000000UL, 0x80008003UL,
451 0x00000001UL, 0x80008088UL,
452 0x00000000UL, 0x80000088UL,
453 0x00000001UL, 0x00008000UL,
454 0x00000000UL, 0x80008082UL,
455 0x000000FFUL
456 };
457
458 #define KeccakRound0() \
459 Cx = Abu0^Agu0^Aku0^Amu0^Asu0; \
460 Du1 = Abe1^Age1^Ake1^Ame1^Ase1; \
461 Da0 = Cx^ROL32(Du1, 1); \
462 Cz = Abu1^Agu1^Aku1^Amu1^Asu1; \
463 Du0 = Abe0^Age0^Ake0^Ame0^Ase0; \
464 Da1 = Cz^Du0; \
465 Cw = Abi0^Agi0^Aki0^Ami0^Asi0; \
466 Do0 = Cw^ROL32(Cz, 1); \
467 Cy = Abi1^Agi1^Aki1^Ami1^Asi1; \
468 Do1 = Cy^Cx; \
469 Cx = Aba0^Aga0^Aka0^Ama0^Asa0; \
470 De0 = Cx^ROL32(Cy, 1); \
471 Cz = Aba1^Aga1^Aka1^Ama1^Asa1; \
472 De1 = Cz^Cw; \
473 Cy = Abo1^Ago1^Ako1^Amo1^Aso1; \
474 Di0 = Du0^ROL32(Cy, 1); \
475 Cw = Abo0^Ago0^Ako0^Amo0^Aso0; \
476 Di1 = Du1^Cw; \
477 Du0 = Cw^ROL32(Cz, 1); \
478 Du1 = Cy^Cx; \
479 \
480 Ba = (Aba0^Da0); \
481 Be = ROL32((Age0^De0), 22); \
482 Bi = ROL32((Aki1^Di1), 22); \
483 Bo = ROL32((Amo1^Do1), 11); \
484 Bu = ROL32((Asu0^Du0), 7); \
485 Aba0 = Ba ^((~Be)& Bi ); \
486 Aba0 ^= *(pRoundConstants++); \
487 Age0 = Be ^((~Bi)& Bo ); \
488 Aki1 = Bi ^((~Bo)& Bu ); \
489 Amo1 = Bo ^((~Bu)& Ba ); \
490 Asu0 = Bu ^((~Ba)& Be ); \
491 Ba = (Aba1^Da1); \
492 Be = ROL32((Age1^De1), 22); \
493 Bi = ROL32((Aki0^Di0), 21); \
494 Bo = ROL32((Amo0^Do0), 10); \
495 Bu = ROL32((Asu1^Du1), 7); \
496 Aba1 = Ba ^((~Be)& Bi ); \
497 Aba1 ^= *(pRoundConstants++); \
498 Age1 = Be ^((~Bi)& Bo ); \
499 Aki0 = Bi ^((~Bo)& Bu ); \
500 Amo0 = Bo ^((~Bu)& Ba ); \
501 Asu1 = Bu ^((~Ba)& Be ); \
502 Bi = ROL32((Aka1^Da1), 2); \
503 Bo = ROL32((Ame1^De1), 23); \
504 Bu = ROL32((Asi1^Di1), 31); \
505 Ba = ROL32((Abo0^Do0), 14); \
506 Be = ROL32((Agu0^Du0), 10); \
507 Aka1 = Ba ^((~Be)& Bi ); \
508 Ame1 = Be ^((~Bi)& Bo ); \
509 Asi1 = Bi ^((~Bo)& Bu ); \
510 Abo0 = Bo ^((~Bu)& Ba ); \
511 Agu0 = Bu ^((~Ba)& Be ); \
512 Bi = ROL32((Aka0^Da0), 1); \
513 Bo = ROL32((Ame0^De0), 22); \
514 Bu = ROL32((Asi0^Di0), 30); \
515 Ba = ROL32((Abo1^Do1), 14); \
516 Be = ROL32((Agu1^Du1), 10); \
517 Aka0 = Ba ^((~Be)& Bi ); \
518 Ame0 = Be ^((~Bi)& Bo ); \
519 Asi0 = Bi ^((~Bo)& Bu ); \
520 Abo1 = Bo ^((~Bu)& Ba ); \
521 Agu1 = Bu ^((~Ba)& Be ); \
522 Bu = ROL32((Asa0^Da0), 9); \
523 Ba = ROL32((Abe1^De1), 1); \
524 Be = ROL32((Agi0^Di0), 3); \
525 Bi = ROL32((Ako1^Do1), 13); \
526 Bo = ROL32((Amu0^Du0), 4); \
527 Asa0 = Ba ^((~Be)& Bi ); \
528 Abe1 = Be ^((~Bi)& Bo ); \
529 Agi0 = Bi ^((~Bo)& Bu ); \
530 Ako1 = Bo ^((~Bu)& Ba ); \
531 Amu0 = Bu ^((~Ba)& Be ); \
532 Bu = ROL32((Asa1^Da1), 9); \
533 Ba = (Abe0^De0); \
534 Be = ROL32((Agi1^Di1), 3); \
535 Bi = ROL32((Ako0^Do0), 12); \
536 Bo = ROL32((Amu1^Du1), 4); \
537 Asa1 = Ba ^((~Be)& Bi ); \
538 Abe0 = Be ^((~Bi)& Bo ); \
539 Agi1 = Bi ^((~Bo)& Bu ); \
540 Ako0 = Bo ^((~Bu)& Ba ); \
541 Amu1 = Bu ^((~Ba)& Be ); \
542 Be = ROL32((Aga0^Da0), 18); \
543 Bi = ROL32((Ake0^De0), 5); \
544 Bo = ROL32((Ami1^Di1), 8); \
545 Bu = ROL32((Aso0^Do0), 28); \
546 Ba = ROL32((Abu1^Du1), 14); \
547 Aga0 = Ba ^((~Be)& Bi ); \
548 Ake0 = Be ^((~Bi)& Bo ); \
549 Ami1 = Bi ^((~Bo)& Bu ); \
550 Aso0 = Bo ^((~Bu)& Ba ); \
551 Abu1 = Bu ^((~Ba)& Be ); \
552 Be = ROL32((Aga1^Da1), 18); \
553 Bi = ROL32((Ake1^De1), 5); \
554 Bo = ROL32((Ami0^Di0), 7); \
555 Bu = ROL32((Aso1^Do1), 28); \
556 Ba = ROL32((Abu0^Du0), 13); \
557 Aga1 = Ba ^((~Be)& Bi ); \
558 Ake1 = Be ^((~Bi)& Bo ); \
559 Ami0 = Bi ^((~Bo)& Bu ); \
560 Aso1 = Bo ^((~Bu)& Ba ); \
561 Abu0 = Bu ^((~Ba)& Be ); \
562 Bo = ROL32((Ama1^Da1), 21); \
563 Bu = ROL32((Ase0^De0), 1); \
564 Ba = ROL32((Abi0^Di0), 31); \
565 Be = ROL32((Ago1^Do1), 28); \
566 Bi = ROL32((Aku1^Du1), 20); \
567 Ama1 = Ba ^((~Be)& Bi ); \
568 Ase0 = Be ^((~Bi)& Bo ); \
569 Abi0 = Bi ^((~Bo)& Bu ); \
570 Ago1 = Bo ^((~Bu)& Ba ); \
571 Aku1 = Bu ^((~Ba)& Be ); \
572 Bo = ROL32((Ama0^Da0), 20); \
573 Bu = ROL32((Ase1^De1), 1); \
574 Ba = ROL32((Abi1^Di1), 31); \
575 Be = ROL32((Ago0^Do0), 27); \
576 Bi = ROL32((Aku0^Du0), 19); \
577 Ama0 = Ba ^((~Be)& Bi ); \
578 Ase1 = Be ^((~Bi)& Bo ); \
579 Abi1 = Bi ^((~Bo)& Bu ); \
580 Ago0 = Bo ^((~Bu)& Ba ); \
581 Aku0 = Bu ^((~Ba)& Be )
582
583 #define KeccakRound1() \
584 Cx = Asu0^Agu0^Amu0^Abu1^Aku1; \
585 Du1 = Age1^Ame0^Abe0^Ake1^Ase1; \
586 Da0 = Cx^ROL32(Du1, 1); \
587 Cz = Asu1^Agu1^Amu1^Abu0^Aku0; \
588 Du0 = Age0^Ame1^Abe1^Ake0^Ase0; \
589 Da1 = Cz^Du0; \
590 Cw = Aki1^Asi1^Agi0^Ami1^Abi0; \
591 Do0 = Cw^ROL32(Cz, 1); \
592 Cy = Aki0^Asi0^Agi1^Ami0^Abi1; \
593 Do1 = Cy^Cx; \
594 Cx = Aba0^Aka1^Asa0^Aga0^Ama1; \
595 De0 = Cx^ROL32(Cy, 1); \
596 Cz = Aba1^Aka0^Asa1^Aga1^Ama0; \
597 De1 = Cz^Cw; \
598 Cy = Amo0^Abo1^Ako0^Aso1^Ago0; \
599 Di0 = Du0^ROL32(Cy, 1); \
600 Cw = Amo1^Abo0^Ako1^Aso0^Ago1; \
601 Di1 = Du1^Cw; \
602 Du0 = Cw^ROL32(Cz, 1); \
603 Du1 = Cy^Cx; \
604 \
605 Ba = (Aba0^Da0); \
606 Be = ROL32((Ame1^De0), 22); \
607 Bi = ROL32((Agi1^Di1), 22); \
608 Bo = ROL32((Aso1^Do1), 11); \
609 Bu = ROL32((Aku1^Du0), 7); \
610 Aba0 = Ba ^((~Be)& Bi ); \
611 Aba0 ^= *(pRoundConstants++); \
612 Ame1 = Be ^((~Bi)& Bo ); \
613 Agi1 = Bi ^((~Bo)& Bu ); \
614 Aso1 = Bo ^((~Bu)& Ba ); \
615 Aku1 = Bu ^((~Ba)& Be ); \
616 Ba = (Aba1^Da1); \
617 Be = ROL32((Ame0^De1), 22); \
618 Bi = ROL32((Agi0^Di0), 21); \
619 Bo = ROL32((Aso0^Do0), 10); \
620 Bu = ROL32((Aku0^Du1), 7); \
621 Aba1 = Ba ^((~Be)& Bi ); \
622 Aba1 ^= *(pRoundConstants++); \
623 Ame0 = Be ^((~Bi)& Bo ); \
624 Agi0 = Bi ^((~Bo)& Bu ); \
625 Aso0 = Bo ^((~Bu)& Ba ); \
626 Aku0 = Bu ^((~Ba)& Be ); \
627 Bi = ROL32((Asa1^Da1), 2); \
628 Bo = ROL32((Ake1^De1), 23); \
629 Bu = ROL32((Abi1^Di1), 31); \
630 Ba = ROL32((Amo1^Do0), 14); \
631 Be = ROL32((Agu0^Du0), 10); \
632 Asa1 = Ba ^((~Be)& Bi ); \
633 Ake1 = Be ^((~Bi)& Bo ); \
634 Abi1 = Bi ^((~Bo)& Bu ); \
635 Amo1 = Bo ^((~Bu)& Ba ); \
636 Agu0 = Bu ^((~Ba)& Be ); \
637 Bi = ROL32((Asa0^Da0), 1); \
638 Bo = ROL32((Ake0^De0), 22); \
639 Bu = ROL32((Abi0^Di0), 30); \
640 Ba = ROL32((Amo0^Do1), 14); \
641 Be = ROL32((Agu1^Du1), 10); \
642 Asa0 = Ba ^((~Be)& Bi ); \
643 Ake0 = Be ^((~Bi)& Bo ); \
644 Abi0 = Bi ^((~Bo)& Bu ); \
645 Amo0 = Bo ^((~Bu)& Ba ); \
646 Agu1 = Bu ^((~Ba)& Be ); \
647 Bu = ROL32((Ama1^Da0), 9); \
648 Ba = ROL32((Age1^De1), 1); \
649 Be = ROL32((Asi1^Di0), 3); \
650 Bi = ROL32((Ako0^Do1), 13); \
651 Bo = ROL32((Abu1^Du0), 4); \
652 Ama1 = Ba ^((~Be)& Bi ); \
653 Age1 = Be ^((~Bi)& Bo ); \
654 Asi1 = Bi ^((~Bo)& Bu ); \
655 Ako0 = Bo ^((~Bu)& Ba ); \
656 Abu1 = Bu ^((~Ba)& Be ); \
657 Bu = ROL32((Ama0^Da1), 9); \
658 Ba = (Age0^De0); \
659 Be = ROL32((Asi0^Di1), 3); \
660 Bi = ROL32((Ako1^Do0), 12); \
661 Bo = ROL32((Abu0^Du1), 4); \
662 Ama0 = Ba ^((~Be)& Bi ); \
663 Age0 = Be ^((~Bi)& Bo ); \
664 Asi0 = Bi ^((~Bo)& Bu ); \
665 Ako1 = Bo ^((~Bu)& Ba ); \
666 Abu0 = Bu ^((~Ba)& Be ); \
667 Be = ROL32((Aka1^Da0), 18); \
668 Bi = ROL32((Abe1^De0), 5); \
669 Bo = ROL32((Ami0^Di1), 8); \
670 Bu = ROL32((Ago1^Do0), 28); \
671 Ba = ROL32((Asu1^Du1), 14); \
672 Aka1 = Ba ^((~Be)& Bi ); \
673 Abe1 = Be ^((~Bi)& Bo ); \
674 Ami0 = Bi ^((~Bo)& Bu ); \
675 Ago1 = Bo ^((~Bu)& Ba ); \
676 Asu1 = Bu ^((~Ba)& Be ); \
677 Be = ROL32((Aka0^Da1), 18); \
678 Bi = ROL32((Abe0^De1), 5); \
679 Bo = ROL32((Ami1^Di0), 7); \
680 Bu = ROL32((Ago0^Do1), 28); \
681 Ba = ROL32((Asu0^Du0), 13); \
682 Aka0 = Ba ^((~Be)& Bi ); \
683 Abe0 = Be ^((~Bi)& Bo ); \
684 Ami1 = Bi ^((~Bo)& Bu ); \
685 Ago0 = Bo ^((~Bu)& Ba ); \
686 Asu0 = Bu ^((~Ba)& Be ); \
687 Bo = ROL32((Aga1^Da1), 21); \
688 Bu = ROL32((Ase0^De0), 1); \
689 Ba = ROL32((Aki1^Di0), 31); \
690 Be = ROL32((Abo1^Do1), 28); \
691 Bi = ROL32((Amu1^Du1), 20); \
692 Aga1 = Ba ^((~Be)& Bi ); \
693 Ase0 = Be ^((~Bi)& Bo ); \
694 Aki1 = Bi ^((~Bo)& Bu ); \
695 Abo1 = Bo ^((~Bu)& Ba ); \
696 Amu1 = Bu ^((~Ba)& Be ); \
697 Bo = ROL32((Aga0^Da0), 20); \
698 Bu = ROL32((Ase1^De1), 1); \
699 Ba = ROL32((Aki0^Di1), 31); \
700 Be = ROL32((Abo0^Do0), 27); \
701 Bi = ROL32((Amu0^Du0), 19); \
702 Aga0 = Ba ^((~Be)& Bi ); \
703 Ase1 = Be ^((~Bi)& Bo ); \
704 Aki0 = Bi ^((~Bo)& Bu ); \
705 Abo0 = Bo ^((~Bu)& Ba ); \
706 Amu0 = Bu ^((~Ba)& Be );
707
708 #define KeccakRound2() \
709 Cx = Aku1^Agu0^Abu1^Asu1^Amu1; \
710 Du1 = Ame0^Ake0^Age0^Abe0^Ase1; \
711 Da0 = Cx^ROL32(Du1, 1); \
712 Cz = Aku0^Agu1^Abu0^Asu0^Amu0; \
713 Du0 = Ame1^Ake1^Age1^Abe1^Ase0; \
714 Da1 = Cz^Du0; \
715 Cw = Agi1^Abi1^Asi1^Ami0^Aki1; \
716 Do0 = Cw^ROL32(Cz, 1); \
717 Cy = Agi0^Abi0^Asi0^Ami1^Aki0; \
718 Do1 = Cy^Cx; \
719 Cx = Aba0^Asa1^Ama1^Aka1^Aga1; \
720 De0 = Cx^ROL32(Cy, 1); \
721 Cz = Aba1^Asa0^Ama0^Aka0^Aga0; \
722 De1 = Cz^Cw; \
723 Cy = Aso0^Amo0^Ako1^Ago0^Abo0; \
724 Di0 = Du0^ROL32(Cy, 1); \
725 Cw = Aso1^Amo1^Ako0^Ago1^Abo1; \
726 Di1 = Du1^Cw; \
727 Du0 = Cw^ROL32(Cz, 1); \
728 Du1 = Cy^Cx; \
729 \
730 Ba = (Aba0^Da0); \
731 Be = ROL32((Ake1^De0), 22); \
732 Bi = ROL32((Asi0^Di1), 22); \
733 Bo = ROL32((Ago0^Do1), 11); \
734 Bu = ROL32((Amu1^Du0), 7); \
735 Aba0 = Ba ^((~Be)& Bi ); \
736 Aba0 ^= *(pRoundConstants++); \
737 Ake1 = Be ^((~Bi)& Bo ); \
738 Asi0 = Bi ^((~Bo)& Bu ); \
739 Ago0 = Bo ^((~Bu)& Ba ); \
740 Amu1 = Bu ^((~Ba)& Be ); \
741 Ba = (Aba1^Da1); \
742 Be = ROL32((Ake0^De1), 22); \
743 Bi = ROL32((Asi1^Di0), 21); \
744 Bo = ROL32((Ago1^Do0), 10); \
745 Bu = ROL32((Amu0^Du1), 7); \
746 Aba1 = Ba ^((~Be)& Bi ); \
747 Aba1 ^= *(pRoundConstants++); \
748 Ake0 = Be ^((~Bi)& Bo ); \
749 Asi1 = Bi ^((~Bo)& Bu ); \
750 Ago1 = Bo ^((~Bu)& Ba ); \
751 Amu0 = Bu ^((~Ba)& Be ); \
752 Bi = ROL32((Ama0^Da1), 2); \
753 Bo = ROL32((Abe0^De1), 23); \
754 Bu = ROL32((Aki0^Di1), 31); \
755 Ba = ROL32((Aso1^Do0), 14); \
756 Be = ROL32((Agu0^Du0), 10); \
757 Ama0 = Ba ^((~Be)& Bi ); \
758 Abe0 = Be ^((~Bi)& Bo ); \
759 Aki0 = Bi ^((~Bo)& Bu ); \
760 Aso1 = Bo ^((~Bu)& Ba ); \
761 Agu0 = Bu ^((~Ba)& Be ); \
762 Bi = ROL32((Ama1^Da0), 1); \
763 Bo = ROL32((Abe1^De0), 22); \
764 Bu = ROL32((Aki1^Di0), 30); \
765 Ba = ROL32((Aso0^Do1), 14); \
766 Be = ROL32((Agu1^Du1), 10); \
767 Ama1 = Ba ^((~Be)& Bi ); \
768 Abe1 = Be ^((~Bi)& Bo ); \
769 Aki1 = Bi ^((~Bo)& Bu ); \
770 Aso0 = Bo ^((~Bu)& Ba ); \
771 Agu1 = Bu ^((~Ba)& Be ); \
772 Bu = ROL32((Aga1^Da0), 9); \
773 Ba = ROL32((Ame0^De1), 1); \
774 Be = ROL32((Abi1^Di0), 3); \
775 Bi = ROL32((Ako1^Do1), 13); \
776 Bo = ROL32((Asu1^Du0), 4); \
777 Aga1 = Ba ^((~Be)& Bi ); \
778 Ame0 = Be ^((~Bi)& Bo ); \
779 Abi1 = Bi ^((~Bo)& Bu ); \
780 Ako1 = Bo ^((~Bu)& Ba ); \
781 Asu1 = Bu ^((~Ba)& Be ); \
782 Bu = ROL32((Aga0^Da1), 9); \
783 Ba = (Ame1^De0); \
784 Be = ROL32((Abi0^Di1), 3); \
785 Bi = ROL32((Ako0^Do0), 12); \
786 Bo = ROL32((Asu0^Du1), 4); \
787 Aga0 = Ba ^((~Be)& Bi ); \
788 Ame1 = Be ^((~Bi)& Bo ); \
789 Abi0 = Bi ^((~Bo)& Bu ); \
790 Ako0 = Bo ^((~Bu)& Ba ); \
791 Asu0 = Bu ^((~Ba)& Be ); \
792 Be = ROL32((Asa1^Da0), 18); \
793 Bi = ROL32((Age1^De0), 5); \
794 Bo = ROL32((Ami1^Di1), 8); \
795 Bu = ROL32((Abo1^Do0), 28); \
796 Ba = ROL32((Aku0^Du1), 14); \
797 Asa1 = Ba ^((~Be)& Bi ); \
798 Age1 = Be ^((~Bi)& Bo ); \
799 Ami1 = Bi ^((~Bo)& Bu ); \
800 Abo1 = Bo ^((~Bu)& Ba ); \
801 Aku0 = Bu ^((~Ba)& Be ); \
802 Be = ROL32((Asa0^Da1), 18); \
803 Bi = ROL32((Age0^De1), 5); \
804 Bo = ROL32((Ami0^Di0), 7); \
805 Bu = ROL32((Abo0^Do1), 28); \
806 Ba = ROL32((Aku1^Du0), 13); \
807 Asa0 = Ba ^((~Be)& Bi ); \
808 Age0 = Be ^((~Bi)& Bo ); \
809 Ami0 = Bi ^((~Bo)& Bu ); \
810 Abo0 = Bo ^((~Bu)& Ba ); \
811 Aku1 = Bu ^((~Ba)& Be ); \
812 Bo = ROL32((Aka0^Da1), 21); \
813 Bu = ROL32((Ase0^De0), 1); \
814 Ba = ROL32((Agi1^Di0), 31); \
815 Be = ROL32((Amo0^Do1), 28); \
816 Bi = ROL32((Abu0^Du1), 20); \
817 Aka0 = Ba ^((~Be)& Bi ); \
818 Ase0 = Be ^((~Bi)& Bo ); \
819 Agi1 = Bi ^((~Bo)& Bu ); \
820 Amo0 = Bo ^((~Bu)& Ba ); \
821 Abu0 = Bu ^((~Ba)& Be ); \
822 Bo = ROL32((Aka1^Da0), 20); \
823 Bu = ROL32((Ase1^De1), 1); \
824 Ba = ROL32((Agi0^Di1), 31); \
825 Be = ROL32((Amo1^Do0), 27); \
826 Bi = ROL32((Abu1^Du0), 19); \
827 Aka1 = Ba ^((~Be)& Bi ); \
828 Ase1 = Be ^((~Bi)& Bo ); \
829 Agi0 = Bi ^((~Bo)& Bu ); \
830 Amo1 = Bo ^((~Bu)& Ba ); \
831 Abu1 = Bu ^((~Ba)& Be );
832
833 #define KeccakRound3() \
834 Cx = Amu1^Agu0^Asu1^Aku0^Abu0; \
835 Du1 = Ake0^Abe1^Ame1^Age0^Ase1; \
836 Da0 = Cx^ROL32(Du1, 1); \
837 Cz = Amu0^Agu1^Asu0^Aku1^Abu1; \
838 Du0 = Ake1^Abe0^Ame0^Age1^Ase0; \
839 Da1 = Cz^Du0; \
840 Cw = Asi0^Aki0^Abi1^Ami1^Agi1; \
841 Do0 = Cw^ROL32(Cz, 1); \
842 Cy = Asi1^Aki1^Abi0^Ami0^Agi0; \
843 Do1 = Cy^Cx; \
844 Cx = Aba0^Ama0^Aga1^Asa1^Aka0; \
845 De0 = Cx^ROL32(Cy, 1); \
846 Cz = Aba1^Ama1^Aga0^Asa0^Aka1; \
847 De1 = Cz^Cw; \
848 Cy = Ago1^Aso0^Ako0^Abo0^Amo1; \
849 Di0 = Du0^ROL32(Cy, 1); \
850 Cw = Ago0^Aso1^Ako1^Abo1^Amo0; \
851 Di1 = Du1^Cw; \
852 Du0 = Cw^ROL32(Cz, 1); \
853 Du1 = Cy^Cx; \
854 \
855 Ba = (Aba0^Da0); \
856 Be = ROL32((Abe0^De0), 22); \
857 Bi = ROL32((Abi0^Di1), 22); \
858 Bo = ROL32((Abo0^Do1), 11); \
859 Bu = ROL32((Abu0^Du0), 7); \
860 Aba0 = Ba ^((~Be)& Bi ); \
861 Aba0 ^= *(pRoundConstants++); \
862 Abe0 = Be ^((~Bi)& Bo ); \
863 Abi0 = Bi ^((~Bo)& Bu ); \
864 Abo0 = Bo ^((~Bu)& Ba ); \
865 Abu0 = Bu ^((~Ba)& Be ); \
866 Ba = (Aba1^Da1); \
867 Be = ROL32((Abe1^De1), 22); \
868 Bi = ROL32((Abi1^Di0), 21); \
869 Bo = ROL32((Abo1^Do0), 10); \
870 Bu = ROL32((Abu1^Du1), 7); \
871 Aba1 = Ba ^((~Be)& Bi ); \
872 Aba1 ^= *(pRoundConstants++); \
873 Abe1 = Be ^((~Bi)& Bo ); \
874 Abi1 = Bi ^((~Bo)& Bu ); \
875 Abo1 = Bo ^((~Bu)& Ba ); \
876 Abu1 = Bu ^((~Ba)& Be ); \
877 Bi = ROL32((Aga0^Da1), 2); \
878 Bo = ROL32((Age0^De1), 23); \
879 Bu = ROL32((Agi0^Di1), 31); \
880 Ba = ROL32((Ago0^Do0), 14); \
881 Be = ROL32((Agu0^Du0), 10); \
882 Aga0 = Ba ^((~Be)& Bi ); \
883 Age0 = Be ^((~Bi)& Bo ); \
884 Agi0 = Bi ^((~Bo)& Bu ); \
885 Ago0 = Bo ^((~Bu)& Ba ); \
886 Agu0 = Bu ^((~Ba)& Be ); \
887 Bi = ROL32((Aga1^Da0), 1); \
888 Bo = ROL32((Age1^De0), 22); \
889 Bu = ROL32((Agi1^Di0), 30); \
890 Ba = ROL32((Ago1^Do1), 14); \
891 Be = ROL32((Agu1^Du1), 10); \
892 Aga1 = Ba ^((~Be)& Bi ); \
893 Age1 = Be ^((~Bi)& Bo ); \
894 Agi1 = Bi ^((~Bo)& Bu ); \
895 Ago1 = Bo ^((~Bu)& Ba ); \
896 Agu1 = Bu ^((~Ba)& Be ); \
897 Bu = ROL32((Aka0^Da0), 9); \
898 Ba = ROL32((Ake0^De1), 1); \
899 Be = ROL32((Aki0^Di0), 3); \
900 Bi = ROL32((Ako0^Do1), 13); \
901 Bo = ROL32((Aku0^Du0), 4); \
902 Aka0 = Ba ^((~Be)& Bi ); \
903 Ake0 = Be ^((~Bi)& Bo ); \
904 Aki0 = Bi ^((~Bo)& Bu ); \
905 Ako0 = Bo ^((~Bu)& Ba ); \
906 Aku0 = Bu ^((~Ba)& Be ); \
907 Bu = ROL32((Aka1^Da1), 9); \
908 Ba = (Ake1^De0); \
909 Be = ROL32((Aki1^Di1), 3); \
910 Bi = ROL32((Ako1^Do0), 12); \
911 Bo = ROL32((Aku1^Du1), 4); \
912 Aka1 = Ba ^((~Be)& Bi ); \
913 Ake1 = Be ^((~Bi)& Bo ); \
914 Aki1 = Bi ^((~Bo)& Bu ); \
915 Ako1 = Bo ^((~Bu)& Ba ); \
916 Aku1 = Bu ^((~Ba)& Be ); \
917 Be = ROL32((Ama0^Da0), 18); \
918 Bi = ROL32((Ame0^De0), 5); \
919 Bo = ROL32((Ami0^Di1), 8); \
920 Bu = ROL32((Amo0^Do0), 28); \
921 Ba = ROL32((Amu0^Du1), 14); \
922 Ama0 = Ba ^((~Be)& Bi ); \
923 Ame0 = Be ^((~Bi)& Bo ); \
924 Ami0 = Bi ^((~Bo)& Bu ); \
925 Amo0 = Bo ^((~Bu)& Ba ); \
926 Amu0 = Bu ^((~Ba)& Be ); \
927 Be = ROL32((Ama1^Da1), 18); \
928 Bi = ROL32((Ame1^De1), 5); \
929 Bo = ROL32((Ami1^Di0), 7); \
930 Bu = ROL32((Amo1^Do1), 28); \
931 Ba = ROL32((Amu1^Du0), 13); \
932 Ama1 = Ba ^((~Be)& Bi ); \
933 Ame1 = Be ^((~Bi)& Bo ); \
934 Ami1 = Bi ^((~Bo)& Bu ); \
935 Amo1 = Bo ^((~Bu)& Ba ); \
936 Amu1 = Bu ^((~Ba)& Be ); \
937 Bo = ROL32((Asa0^Da1), 21); \
938 Bu = ROL32((Ase0^De0), 1); \
939 Ba = ROL32((Asi0^Di0), 31); \
940 Be = ROL32((Aso0^Do1), 28); \
941 Bi = ROL32((Asu0^Du1), 20); \
942 Asa0 = Ba ^((~Be)& Bi ); \
943 Ase0 = Be ^((~Bi)& Bo ); \
944 Asi0 = Bi ^((~Bo)& Bu ); \
945 Aso0 = Bo ^((~Bu)& Ba ); \
946 Asu0 = Bu ^((~Ba)& Be ); \
947 Bo = ROL32((Asa1^Da0), 20); \
948 Bu = ROL32((Ase1^De1), 1); \
949 Ba = ROL32((Asi1^Di1), 31); \
950 Be = ROL32((Aso1^Do0), 27); \
951 Bi = ROL32((Asu1^Du0), 19); \
952 Asa1 = Ba ^((~Be)& Bi ); \
953 Ase1 = Be ^((~Bi)& Bo ); \
954 Asi1 = Bi ^((~Bo)& Bu ); \
955 Aso1 = Bo ^((~Bu)& Ba ); \
956 Asu1 = Bu ^((~Ba)& Be );
957
KeccakP1600_Permute_Nrounds(void * state,unsigned int nRounds)958 void KeccakP1600_Permute_Nrounds(void *state, unsigned int nRounds)
959 {
960 UINT32 Da0, De0, Di0, Do0, Du0;
961 UINT32 Da1, De1, Di1, Do1, Du1;
962 UINT32 Ba, Be, Bi, Bo, Bu;
963 UINT32 Cx, Cy, Cz, Cw;
964 const UINT32 *pRoundConstants = KeccakF1600RoundConstants_int2+(24-nRounds)*2;
965 UINT32 *stateAsHalfLanes = (UINT32*)state;
966 #define Aba0 stateAsHalfLanes[ 0]
967 #define Aba1 stateAsHalfLanes[ 1]
968 #define Abe0 stateAsHalfLanes[ 2]
969 #define Abe1 stateAsHalfLanes[ 3]
970 #define Abi0 stateAsHalfLanes[ 4]
971 #define Abi1 stateAsHalfLanes[ 5]
972 #define Abo0 stateAsHalfLanes[ 6]
973 #define Abo1 stateAsHalfLanes[ 7]
974 #define Abu0 stateAsHalfLanes[ 8]
975 #define Abu1 stateAsHalfLanes[ 9]
976 #define Aga0 stateAsHalfLanes[10]
977 #define Aga1 stateAsHalfLanes[11]
978 #define Age0 stateAsHalfLanes[12]
979 #define Age1 stateAsHalfLanes[13]
980 #define Agi0 stateAsHalfLanes[14]
981 #define Agi1 stateAsHalfLanes[15]
982 #define Ago0 stateAsHalfLanes[16]
983 #define Ago1 stateAsHalfLanes[17]
984 #define Agu0 stateAsHalfLanes[18]
985 #define Agu1 stateAsHalfLanes[19]
986 #define Aka0 stateAsHalfLanes[20]
987 #define Aka1 stateAsHalfLanes[21]
988 #define Ake0 stateAsHalfLanes[22]
989 #define Ake1 stateAsHalfLanes[23]
990 #define Aki0 stateAsHalfLanes[24]
991 #define Aki1 stateAsHalfLanes[25]
992 #define Ako0 stateAsHalfLanes[26]
993 #define Ako1 stateAsHalfLanes[27]
994 #define Aku0 stateAsHalfLanes[28]
995 #define Aku1 stateAsHalfLanes[29]
996 #define Ama0 stateAsHalfLanes[30]
997 #define Ama1 stateAsHalfLanes[31]
998 #define Ame0 stateAsHalfLanes[32]
999 #define Ame1 stateAsHalfLanes[33]
1000 #define Ami0 stateAsHalfLanes[34]
1001 #define Ami1 stateAsHalfLanes[35]
1002 #define Amo0 stateAsHalfLanes[36]
1003 #define Amo1 stateAsHalfLanes[37]
1004 #define Amu0 stateAsHalfLanes[38]
1005 #define Amu1 stateAsHalfLanes[39]
1006 #define Asa0 stateAsHalfLanes[40]
1007 #define Asa1 stateAsHalfLanes[41]
1008 #define Ase0 stateAsHalfLanes[42]
1009 #define Ase1 stateAsHalfLanes[43]
1010 #define Asi0 stateAsHalfLanes[44]
1011 #define Asi1 stateAsHalfLanes[45]
1012 #define Aso0 stateAsHalfLanes[46]
1013 #define Aso1 stateAsHalfLanes[47]
1014 #define Asu0 stateAsHalfLanes[48]
1015 #define Asu1 stateAsHalfLanes[49]
1016
1017 nRounds &= 3;
1018 switch ( nRounds )
1019 {
1020 #define I0 Ba
1021 #define I1 Be
1022 #define T0 Bi
1023 #define T1 Bo
1024 #define SwapPI13( in0,in1,in2,in3,eo0,eo1,eo2,eo3 ) \
1025 I0 = (in0)[0]; I1 = (in0)[1]; \
1026 T0 = (in1)[0]; T1 = (in1)[1]; \
1027 (in0)[eo0] = T0; (in0)[eo0^1] = T1; \
1028 T0 = (in2)[0]; T1 = (in2)[1]; \
1029 (in1)[eo1] = T0; (in1)[eo1^1] = T1; \
1030 T0 = (in3)[0]; T1 = (in3)[1]; \
1031 (in2)[eo2] = T0; (in2)[eo2^1] = T1; \
1032 (in3)[eo3] = I0; (in3)[eo3^1] = I1
1033 #define SwapPI2( in0,in1,in2,in3 ) \
1034 I0 = (in0)[0]; I1 = (in0)[1]; \
1035 T0 = (in1)[0]; T1 = (in1)[1]; \
1036 (in0)[1] = T0; (in0)[0] = T1; \
1037 (in1)[1] = I0; (in1)[0] = I1; \
1038 I0 = (in2)[0]; I1 = (in2)[1]; \
1039 T0 = (in3)[0]; T1 = (in3)[1]; \
1040 (in2)[1] = T0; (in2)[0] = T1; \
1041 (in3)[1] = I0; (in3)[0] = I1
1042 #define SwapEO( even,odd ) T0 = even; even = odd; odd = T0
1043
1044 case 1:
1045 SwapPI13( &Aga0, &Aka0, &Asa0, &Ama0, 1, 0, 1, 0 );
1046 SwapPI13( &Abe0, &Age0, &Ame0, &Ake0, 0, 1, 0, 1 );
1047 SwapPI13( &Abi0, &Aki0, &Agi0, &Asi0, 1, 0, 1, 0 );
1048 SwapEO( Ami0, Ami1 );
1049 SwapPI13( &Abo0, &Amo0, &Aso0, &Ago0, 1, 0, 1, 0 );
1050 SwapEO( Ako0, Ako1 );
1051 SwapPI13( &Abu0, &Asu0, &Aku0, &Amu0, 0, 1, 0, 1 );
1052 break;
1053
1054 case 2:
1055 SwapPI2( &Aga0, &Asa0, &Aka0, &Ama0 );
1056 SwapPI2( &Abe0, &Ame0, &Age0, &Ake0 );
1057 SwapPI2( &Abi0, &Agi0, &Aki0, &Asi0 );
1058 SwapPI2( &Abo0, &Aso0, &Ago0, &Amo0 );
1059 SwapPI2( &Abu0, &Aku0, &Amu0, &Asu0 );
1060 break;
1061
1062 case 3:
1063 SwapPI13( &Aga0, &Ama0, &Asa0, &Aka0, 0, 1, 0, 1 );
1064 SwapPI13( &Abe0, &Ake0, &Ame0, &Age0, 1, 0, 1, 0 );
1065 SwapPI13( &Abi0, &Asi0, &Agi0, &Aki0, 0, 1, 0, 1 );
1066 SwapEO( Ami0, Ami1 );
1067 SwapPI13( &Abo0, &Ago0, &Aso0, &Amo0, 0, 1, 0, 1 );
1068 SwapEO( Ako0, Ako1 );
1069 SwapPI13( &Abu0, &Amu0, &Aku0, &Asu0, 1, 0, 1, 0 );
1070 break;
1071 #undef I0
1072 #undef I1
1073 #undef T0
1074 #undef T1
1075 #undef SwapPI13
1076 #undef SwapPI2
1077 #undef SwapEO
1078 }
1079
1080 do
1081 {
1082 /* Code for 4 rounds, using factor 2 interleaving, 64-bit lanes mapped to 32-bit words */
1083 switch ( nRounds )
1084 {
1085 case 0: KeccakRound0(); /* fall through */
1086 case 3: KeccakRound1();
1087 case 2: KeccakRound2();
1088 case 1: KeccakRound3();
1089 }
1090 nRounds = 0;
1091 }
1092 while ( *pRoundConstants != 0xFF );
1093
1094 #undef Aba0
1095 #undef Aba1
1096 #undef Abe0
1097 #undef Abe1
1098 #undef Abi0
1099 #undef Abi1
1100 #undef Abo0
1101 #undef Abo1
1102 #undef Abu0
1103 #undef Abu1
1104 #undef Aga0
1105 #undef Aga1
1106 #undef Age0
1107 #undef Age1
1108 #undef Agi0
1109 #undef Agi1
1110 #undef Ago0
1111 #undef Ago1
1112 #undef Agu0
1113 #undef Agu1
1114 #undef Aka0
1115 #undef Aka1
1116 #undef Ake0
1117 #undef Ake1
1118 #undef Aki0
1119 #undef Aki1
1120 #undef Ako0
1121 #undef Ako1
1122 #undef Aku0
1123 #undef Aku1
1124 #undef Ama0
1125 #undef Ama1
1126 #undef Ame0
1127 #undef Ame1
1128 #undef Ami0
1129 #undef Ami1
1130 #undef Amo0
1131 #undef Amo1
1132 #undef Amu0
1133 #undef Amu1
1134 #undef Asa0
1135 #undef Asa1
1136 #undef Ase0
1137 #undef Ase1
1138 #undef Asi0
1139 #undef Asi1
1140 #undef Aso0
1141 #undef Aso1
1142 #undef Asu0
1143 #undef Asu1
1144 }
1145
1146 /* ---------------------------------------------------------------- */
1147
KeccakP1600_Permute_12rounds(void * state)1148 void KeccakP1600_Permute_12rounds(void *state)
1149 {
1150 KeccakP1600_Permute_Nrounds(state, 12);
1151 }
1152
1153 /* ---------------------------------------------------------------- */
1154
KeccakP1600_Permute_24rounds(void * state)1155 void KeccakP1600_Permute_24rounds(void *state)
1156 {
1157 KeccakP1600_Permute_Nrounds(state, 24);
1158 }
1159