1 /*
2 Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
3 Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
4 denoted as "the implementer".
5
6 For more information, feedback or questions, please refer to our websites:
7 http://keccak.noekeon.org/
8 http://keyak.noekeon.org/
9 http://ketje.noekeon.org/
10
11 To the extent possible under law, the implementer has waived all copyright
12 and related or neighboring rights to the source code in this file.
13 http://creativecommons.org/publicdomain/zero/1.0/
14 */
15
16 #include <string.h>
17 #include "brg_endian.h"
18 #include "KeccakP-1600-SnP.h"
19 #include "SnP-Relaned.h"
20 #ifdef __has_feature
21 # if __has_feature(undefined_behavior_sanitizer)
22 # define ALLOW_MISALIGNED_ACCESS __attribute__((no_sanitize("alignment")))
23 # endif
24 #endif
25 #ifndef ALLOW_MISALIGNED_ACCESS
26 # define ALLOW_MISALIGNED_ACCESS
27 #endif
28
29 typedef unsigned char UINT8;
30 typedef unsigned int UINT32;
31 /* WARNING: on 8-bit and 16-bit platforms, this should be replaced by: */
32 /* typedef unsigned long UINT32; */
33
34 #define ROL32(a, offset) ((((UINT32)a) << (offset)) ^ (((UINT32)a) >> (32-(offset))))
35
36 /* Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
37 #define prepareToBitInterleaving(low, high, temp, temp0, temp1) \
38 temp0 = (low); \
39 temp = (temp0 ^ (temp0 >> 1)) & 0x22222222UL; temp0 = temp0 ^ temp ^ (temp << 1); \
40 temp = (temp0 ^ (temp0 >> 2)) & 0x0C0C0C0CUL; temp0 = temp0 ^ temp ^ (temp << 2); \
41 temp = (temp0 ^ (temp0 >> 4)) & 0x00F000F0UL; temp0 = temp0 ^ temp ^ (temp << 4); \
42 temp = (temp0 ^ (temp0 >> 8)) & 0x0000FF00UL; temp0 = temp0 ^ temp ^ (temp << 8); \
43 temp1 = (high); \
44 temp = (temp1 ^ (temp1 >> 1)) & 0x22222222UL; temp1 = temp1 ^ temp ^ (temp << 1); \
45 temp = (temp1 ^ (temp1 >> 2)) & 0x0C0C0C0CUL; temp1 = temp1 ^ temp ^ (temp << 2); \
46 temp = (temp1 ^ (temp1 >> 4)) & 0x00F000F0UL; temp1 = temp1 ^ temp ^ (temp << 4); \
47 temp = (temp1 ^ (temp1 >> 8)) & 0x0000FF00UL; temp1 = temp1 ^ temp ^ (temp << 8);
48
49 #define toBitInterleavingAndXOR(low, high, even, odd, temp, temp0, temp1) \
50 prepareToBitInterleaving(low, high, temp, temp0, temp1) \
51 even ^= (temp0 & 0x0000FFFF) | (temp1 << 16); \
52 odd ^= (temp0 >> 16) | (temp1 & 0xFFFF0000);
53
54 #define toBitInterleavingAndAND(low, high, even, odd, temp, temp0, temp1) \
55 prepareToBitInterleaving(low, high, temp, temp0, temp1) \
56 even &= (temp0 & 0x0000FFFF) | (temp1 << 16); \
57 odd &= (temp0 >> 16) | (temp1 & 0xFFFF0000);
58
59 #define toBitInterleavingAndSet(low, high, even, odd, temp, temp0, temp1) \
60 prepareToBitInterleaving(low, high, temp, temp0, temp1) \
61 even = (temp0 & 0x0000FFFF) | (temp1 << 16); \
62 odd = (temp0 >> 16) | (temp1 & 0xFFFF0000);
63
64 /* Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
65 #define prepareFromBitInterleaving(even, odd, temp, temp0, temp1) \
66 temp0 = (even); \
67 temp1 = (odd); \
68 temp = (temp0 & 0x0000FFFF) | (temp1 << 16); \
69 temp1 = (temp0 >> 16) | (temp1 & 0xFFFF0000); \
70 temp0 = temp; \
71 temp = (temp0 ^ (temp0 >> 8)) & 0x0000FF00UL; temp0 = temp0 ^ temp ^ (temp << 8); \
72 temp = (temp0 ^ (temp0 >> 4)) & 0x00F000F0UL; temp0 = temp0 ^ temp ^ (temp << 4); \
73 temp = (temp0 ^ (temp0 >> 2)) & 0x0C0C0C0CUL; temp0 = temp0 ^ temp ^ (temp << 2); \
74 temp = (temp0 ^ (temp0 >> 1)) & 0x22222222UL; temp0 = temp0 ^ temp ^ (temp << 1); \
75 temp = (temp1 ^ (temp1 >> 8)) & 0x0000FF00UL; temp1 = temp1 ^ temp ^ (temp << 8); \
76 temp = (temp1 ^ (temp1 >> 4)) & 0x00F000F0UL; temp1 = temp1 ^ temp ^ (temp << 4); \
77 temp = (temp1 ^ (temp1 >> 2)) & 0x0C0C0C0CUL; temp1 = temp1 ^ temp ^ (temp << 2); \
78 temp = (temp1 ^ (temp1 >> 1)) & 0x22222222UL; temp1 = temp1 ^ temp ^ (temp << 1);
79
80 #define fromBitInterleaving(even, odd, low, high, temp, temp0, temp1) \
81 prepareFromBitInterleaving(even, odd, temp, temp0, temp1) \
82 low = temp0; \
83 high = temp1;
84
85 #define fromBitInterleavingAndXOR(even, odd, lowIn, highIn, lowOut, highOut, temp, temp0, temp1) \
86 prepareFromBitInterleaving(even, odd, temp, temp0, temp1) \
87 lowOut = lowIn ^ temp0; \
88 highOut = highIn ^ temp1;
89
KeccakP1600_SetBytesInLaneToZero(void * state,unsigned int lanePosition,unsigned int offset,unsigned int length)90 void KeccakP1600_SetBytesInLaneToZero(void *state, unsigned int lanePosition, unsigned int offset, unsigned int length)
91 {
92 UINT8 laneAsBytes[8];
93 UINT32 low, high;
94 UINT32 temp, temp0, temp1;
95 UINT32 *stateAsHalfLanes = (UINT32*)state;
96
97 memset(laneAsBytes, 0xFF, offset);
98 memset(laneAsBytes+offset, 0x00, length);
99 memset(laneAsBytes+offset+length, 0xFF, 8-offset-length);
100 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
101 low = *((UINT32*)(laneAsBytes+0));
102 high = *((UINT32*)(laneAsBytes+4));
103 #else
104 low = laneAsBytes[0]
105 | ((UINT32)(laneAsBytes[1]) << 8)
106 | ((UINT32)(laneAsBytes[2]) << 16)
107 | ((UINT32)(laneAsBytes[3]) << 24);
108 high = laneAsBytes[4]
109 | ((UINT32)(laneAsBytes[5]) << 8)
110 | ((UINT32)(laneAsBytes[6]) << 16)
111 | ((UINT32)(laneAsBytes[7]) << 24);
112 #endif
113 toBitInterleavingAndAND(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1);
114 }
115
116 /* ---------------------------------------------------------------- */
117
KeccakP1600_Initialize(void * state)118 void KeccakP1600_Initialize(void *state)
119 {
120 memset(state, 0, 200);
121 }
122
123 /* ---------------------------------------------------------------- */
124
KeccakP1600_AddByte(void * state,unsigned char byte,unsigned int offset)125 void KeccakP1600_AddByte(void *state, unsigned char byte, unsigned int offset)
126 {
127 unsigned int lanePosition = offset/8;
128 unsigned int offsetInLane = offset%8;
129 UINT32 low, high;
130 UINT32 temp, temp0, temp1;
131 UINT32 *stateAsHalfLanes = (UINT32*)state;
132
133 if (offsetInLane < 4) {
134 low = (UINT32)byte << (offsetInLane*8);
135 high = 0;
136 }
137 else {
138 low = 0;
139 high = (UINT32)byte << ((offsetInLane-4)*8);
140 }
141 toBitInterleavingAndXOR(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1);
142 }
143
144 /* ---------------------------------------------------------------- */
145
KeccakP1600_AddBytesInLane(void * state,unsigned int lanePosition,const unsigned char * data,unsigned int offset,unsigned int length)146 void KeccakP1600_AddBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length)
147 {
148 UINT8 laneAsBytes[8];
149 UINT32 low, high;
150 UINT32 temp, temp0, temp1;
151 UINT32 *stateAsHalfLanes = (UINT32*)state;
152
153 memset(laneAsBytes, 0, 8);
154 memcpy(laneAsBytes+offset, data, length);
155 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
156 low = *((UINT32*)(laneAsBytes+0));
157 high = *((UINT32*)(laneAsBytes+4));
158 #else
159 low = laneAsBytes[0]
160 | ((UINT32)(laneAsBytes[1]) << 8)
161 | ((UINT32)(laneAsBytes[2]) << 16)
162 | ((UINT32)(laneAsBytes[3]) << 24);
163 high = laneAsBytes[4]
164 | ((UINT32)(laneAsBytes[5]) << 8)
165 | ((UINT32)(laneAsBytes[6]) << 16)
166 | ((UINT32)(laneAsBytes[7]) << 24);
167 #endif
168 toBitInterleavingAndXOR(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1);
169 }
170
171 /* ---------------------------------------------------------------- */
172
173 ALLOW_MISALIGNED_ACCESS
KeccakP1600_AddLanes(void * state,const unsigned char * data,unsigned int laneCount)174 void KeccakP1600_AddLanes(void *state, const unsigned char *data, unsigned int laneCount)
175 {
176 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
177 const UINT32 * pI = (const UINT32 *)data;
178 UINT32 * pS = (UINT32*)state;
179 UINT32 t, x0, x1;
180 int i;
181 for (i = laneCount-1; i >= 0; --i) {
182 #ifdef NO_MISALIGNED_ACCESSES
183 UINT32 low;
184 UINT32 high;
185 memcpy(&low, pI++, 4);
186 memcpy(&high, pI++, 4);
187 toBitInterleavingAndXOR(low, high, *(pS++), *(pS++), t, x0, x1);
188 #else
189 toBitInterleavingAndXOR(*(pI++), *(pI++), *(pS++), *(pS++), t, x0, x1)
190 #endif
191 }
192 #else
193 unsigned int lanePosition;
194 for(lanePosition=0; lanePosition<laneCount; lanePosition++) {
195 UINT8 laneAsBytes[8];
196 memcpy(laneAsBytes, data+lanePosition*8, 8);
197 UINT32 low = laneAsBytes[0]
198 | ((UINT32)(laneAsBytes[1]) << 8)
199 | ((UINT32)(laneAsBytes[2]) << 16)
200 | ((UINT32)(laneAsBytes[3]) << 24);
201 UINT32 high = laneAsBytes[4]
202 | ((UINT32)(laneAsBytes[5]) << 8)
203 | ((UINT32)(laneAsBytes[6]) << 16)
204 | ((UINT32)(laneAsBytes[7]) << 24);
205 UINT32 even, odd, temp, temp0, temp1;
206 UINT32 *stateAsHalfLanes = (UINT32*)state;
207 toBitInterleavingAndXOR(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1);
208 }
209 #endif
210 }
211
212 /* ---------------------------------------------------------------- */
213
KeccakP1600_AddBytes(void * state,const unsigned char * data,unsigned int offset,unsigned int length)214 void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
215 {
216 SnP_AddBytes(state, data, offset, length, KeccakP1600_AddLanes, KeccakP1600_AddBytesInLane, 8);
217 }
218
219 /* ---------------------------------------------------------------- */
220
KeccakP1600_OverwriteBytesInLane(void * state,unsigned int lanePosition,const unsigned char * data,unsigned int offset,unsigned int length)221 void KeccakP1600_OverwriteBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length)
222 {
223 KeccakP1600_SetBytesInLaneToZero(state, lanePosition, offset, length);
224 KeccakP1600_AddBytesInLane(state, lanePosition, data, offset, length);
225 }
226
227 /* ---------------------------------------------------------------- */
228
229 ALLOW_MISALIGNED_ACCESS
KeccakP1600_OverwriteLanes(void * state,const unsigned char * data,unsigned int laneCount)230 void KeccakP1600_OverwriteLanes(void *state, const unsigned char *data, unsigned int laneCount)
231 {
232 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
233 const UINT32 * pI = (const UINT32 *)data;
234 UINT32 * pS = (UINT32 *)state;
235 UINT32 t, x0, x1;
236 int i;
237 for (i = laneCount-1; i >= 0; --i) {
238 #ifdef NO_MISALIGNED_ACCESSES
239 UINT32 low;
240 UINT32 high;
241 memcpy(&low, pI++, 4);
242 memcpy(&high, pI++, 4);
243 toBitInterleavingAndSet(low, high, *(pS++), *(pS++), t, x0, x1);
244 #else
245 toBitInterleavingAndSet(*(pI++), *(pI++), *(pS++), *(pS++), t, x0, x1)
246 #endif
247 }
248 #else
249 unsigned int lanePosition;
250 for(lanePosition=0; lanePosition<laneCount; lanePosition++) {
251 UINT8 laneAsBytes[8];
252 memcpy(laneAsBytes, data+lanePosition*8, 8);
253 UINT32 low = laneAsBytes[0]
254 | ((UINT32)(laneAsBytes[1]) << 8)
255 | ((UINT32)(laneAsBytes[2]) << 16)
256 | ((UINT32)(laneAsBytes[3]) << 24);
257 UINT32 high = laneAsBytes[4]
258 | ((UINT32)(laneAsBytes[5]) << 8)
259 | ((UINT32)(laneAsBytes[6]) << 16)
260 | ((UINT32)(laneAsBytes[7]) << 24);
261 UINT32 even, odd, temp, temp0, temp1;
262 UINT32 *stateAsHalfLanes = (UINT32*)state;
263 toBitInterleavingAndSet(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1);
264 }
265 #endif
266 }
267
268 /* ---------------------------------------------------------------- */
269
KeccakP1600_OverwriteBytes(void * state,const unsigned char * data,unsigned int offset,unsigned int length)270 void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
271 {
272 SnP_OverwriteBytes(state, data, offset, length, KeccakP1600_OverwriteLanes, KeccakP1600_OverwriteBytesInLane, 8);
273 }
274
275 /* ---------------------------------------------------------------- */
276
KeccakP1600_OverwriteWithZeroes(void * state,unsigned int byteCount)277 void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount)
278 {
279 UINT32 *stateAsHalfLanes = (UINT32*)state;
280 unsigned int i;
281
282 for(i=0; i<byteCount/8; i++) {
283 stateAsHalfLanes[i*2+0] = 0;
284 stateAsHalfLanes[i*2+1] = 0;
285 }
286 if (byteCount%8 != 0)
287 KeccakP1600_SetBytesInLaneToZero(state, byteCount/8, 0, byteCount%8);
288 }
289
290 /* ---------------------------------------------------------------- */
291
KeccakP1600_ExtractBytesInLane(const void * state,unsigned int lanePosition,unsigned char * data,unsigned int offset,unsigned int length)292 void KeccakP1600_ExtractBytesInLane(const void *state, unsigned int lanePosition, unsigned char *data, unsigned int offset, unsigned int length)
293 {
294 UINT32 *stateAsHalfLanes = (UINT32*)state;
295 UINT32 low, high, temp, temp0, temp1;
296 UINT8 laneAsBytes[8];
297
298 fromBitInterleaving(stateAsHalfLanes[lanePosition*2], stateAsHalfLanes[lanePosition*2+1], low, high, temp, temp0, temp1);
299 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
300 *((UINT32*)(laneAsBytes+0)) = low;
301 *((UINT32*)(laneAsBytes+4)) = high;
302 #else
303 laneAsBytes[0] = low & 0xFF;
304 laneAsBytes[1] = (low >> 8) & 0xFF;
305 laneAsBytes[2] = (low >> 16) & 0xFF;
306 laneAsBytes[3] = (low >> 24) & 0xFF;
307 laneAsBytes[4] = high & 0xFF;
308 laneAsBytes[5] = (high >> 8) & 0xFF;
309 laneAsBytes[6] = (high >> 16) & 0xFF;
310 laneAsBytes[7] = (high >> 24) & 0xFF;
311 #endif
312 memcpy(data, laneAsBytes+offset, length);
313 }
314
315 /* ---------------------------------------------------------------- */
316
317 ALLOW_MISALIGNED_ACCESS
KeccakP1600_ExtractLanes(const void * state,unsigned char * data,unsigned int laneCount)318 void KeccakP1600_ExtractLanes(const void *state, unsigned char *data, unsigned int laneCount)
319 {
320 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
321 UINT32 * pI = (UINT32 *)data;
322 const UINT32 * pS = ( const UINT32 *)state;
323 UINT32 t, x0, x1;
324 int i;
325 for (i = laneCount-1; i >= 0; --i) {
326 #ifdef NO_MISALIGNED_ACCESSES
327 UINT32 low;
328 UINT32 high;
329 fromBitInterleaving(*(pS++), *(pS++), low, high, t, x0, x1);
330 memcpy(pI++, &low, 4);
331 memcpy(pI++, &high, 4);
332 #else
333 fromBitInterleaving(*(pS++), *(pS++), *(pI++), *(pI++), t, x0, x1)
334 #endif
335 }
336 #else
337 unsigned int lanePosition;
338 for(lanePosition=0; lanePosition<laneCount; lanePosition++) {
339 UINT32 *stateAsHalfLanes = (UINT32*)state;
340 UINT32 low, high, temp, temp0, temp1;
341 fromBitInterleaving(stateAsHalfLanes[lanePosition*2], stateAsHalfLanes[lanePosition*2+1], low, high, temp, temp0, temp1);
342 UINT8 laneAsBytes[8];
343 laneAsBytes[0] = low & 0xFF;
344 laneAsBytes[1] = (low >> 8) & 0xFF;
345 laneAsBytes[2] = (low >> 16) & 0xFF;
346 laneAsBytes[3] = (low >> 24) & 0xFF;
347 laneAsBytes[4] = high & 0xFF;
348 laneAsBytes[5] = (high >> 8) & 0xFF;
349 laneAsBytes[6] = (high >> 16) & 0xFF;
350 laneAsBytes[7] = (high >> 24) & 0xFF;
351 memcpy(data+lanePosition*8, laneAsBytes, 8);
352 }
353 #endif
354 }
355
356 /* ---------------------------------------------------------------- */
357
KeccakP1600_ExtractBytes(const void * state,unsigned char * data,unsigned int offset,unsigned int length)358 void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length)
359 {
360 SnP_ExtractBytes(state, data, offset, length, KeccakP1600_ExtractLanes, KeccakP1600_ExtractBytesInLane, 8);
361 }
362
363 /* ---------------------------------------------------------------- */
364
KeccakP1600_ExtractAndAddBytesInLane(const void * state,unsigned int lanePosition,const unsigned char * input,unsigned char * output,unsigned int offset,unsigned int length)365 void KeccakP1600_ExtractAndAddBytesInLane(const void *state, unsigned int lanePosition, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
366 {
367 UINT32 *stateAsHalfLanes = (UINT32*)state;
368 UINT32 low, high, temp, temp0, temp1;
369 UINT8 laneAsBytes[8];
370 unsigned int i;
371
372 fromBitInterleaving(stateAsHalfLanes[lanePosition*2], stateAsHalfLanes[lanePosition*2+1], low, high, temp, temp0, temp1);
373 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
374 *((UINT32*)(laneAsBytes+0)) = low;
375 *((UINT32*)(laneAsBytes+4)) = high;
376 #else
377 laneAsBytes[0] = low & 0xFF;
378 laneAsBytes[1] = (low >> 8) & 0xFF;
379 laneAsBytes[2] = (low >> 16) & 0xFF;
380 laneAsBytes[3] = (low >> 24) & 0xFF;
381 laneAsBytes[4] = high & 0xFF;
382 laneAsBytes[5] = (high >> 8) & 0xFF;
383 laneAsBytes[6] = (high >> 16) & 0xFF;
384 laneAsBytes[7] = (high >> 24) & 0xFF;
385 #endif
386 for(i=0; i<length; i++)
387 output[i] = input[i] ^ laneAsBytes[offset+i];
388 }
389
390 /* ---------------------------------------------------------------- */
391
392 ALLOW_MISALIGNED_ACCESS
KeccakP1600_ExtractAndAddLanes(const void * state,const unsigned char * input,unsigned char * output,unsigned int laneCount)393 void KeccakP1600_ExtractAndAddLanes(const void *state, const unsigned char *input, unsigned char *output, unsigned int laneCount)
394 {
395 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
396 const UINT32 * pI = (const UINT32 *)input;
397 UINT32 * pO = (UINT32 *)output;
398 const UINT32 * pS = (const UINT32 *)state;
399 UINT32 t, x0, x1;
400 int i;
401 for (i = laneCount-1; i >= 0; --i) {
402 #ifdef NO_MISALIGNED_ACCESSES
403 UINT32 low;
404 UINT32 high;
405 fromBitInterleaving(*(pS++), *(pS++), low, high, t, x0, x1);
406 *(pO++) = *(pI++) ^ low;
407 *(pO++) = *(pI++) ^ high;
408 #else
409 fromBitInterleavingAndXOR(*(pS++), *(pS++), *(pI++), *(pI++), *(pO++), *(pO++), t, x0, x1)
410 #endif
411 }
412 #else
413 unsigned int lanePosition;
414 for(lanePosition=0; lanePosition<laneCount; lanePosition++) {
415 UINT32 *stateAsHalfLanes = (UINT32*)state;
416 UINT32 low, high, temp, temp0, temp1;
417 fromBitInterleaving(stateAsHalfLanes[lanePosition*2], stateAsHalfLanes[lanePosition*2+1], low, high, temp, temp0, temp1);
418 UINT8 laneAsBytes[8];
419 laneAsBytes[0] = low & 0xFF;
420 laneAsBytes[1] = (low >> 8) & 0xFF;
421 laneAsBytes[2] = (low >> 16) & 0xFF;
422 laneAsBytes[3] = (low >> 24) & 0xFF;
423 laneAsBytes[4] = high & 0xFF;
424 laneAsBytes[5] = (high >> 8) & 0xFF;
425 laneAsBytes[6] = (high >> 16) & 0xFF;
426 laneAsBytes[7] = (high >> 24) & 0xFF;
427 ((UINT32*)(output+lanePosition*8))[0] = ((UINT32*)(input+lanePosition*8))[0] ^ (*(const UINT32*)(laneAsBytes+0));
428 ((UINT32*)(output+lanePosition*8))[1] = ((UINT32*)(input+lanePosition*8))[0] ^ (*(const UINT32*)(laneAsBytes+4));
429 }
430 #endif
431 }
432 /* ---------------------------------------------------------------- */
433
KeccakP1600_ExtractAndAddBytes(const void * state,const unsigned char * input,unsigned char * output,unsigned int offset,unsigned int length)434 void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
435 {
436 SnP_ExtractAndAddBytes(state, input, output, offset, length, KeccakP1600_ExtractAndAddLanes, KeccakP1600_ExtractAndAddBytesInLane, 8);
437 }
438
439 /* ---------------------------------------------------------------- */
440
441 static const UINT32 KeccakF1600RoundConstants_int2[2*24+1] =
442 {
443 0x00000001UL, 0x00000000UL,
444 0x00000000UL, 0x00000089UL,
445 0x00000000UL, 0x8000008bUL,
446 0x00000000UL, 0x80008080UL,
447 0x00000001UL, 0x0000008bUL,
448 0x00000001UL, 0x00008000UL,
449 0x00000001UL, 0x80008088UL,
450 0x00000001UL, 0x80000082UL,
451 0x00000000UL, 0x0000000bUL,
452 0x00000000UL, 0x0000000aUL,
453 0x00000001UL, 0x00008082UL,
454 0x00000000UL, 0x00008003UL,
455 0x00000001UL, 0x0000808bUL,
456 0x00000001UL, 0x8000000bUL,
457 0x00000001UL, 0x8000008aUL,
458 0x00000001UL, 0x80000081UL,
459 0x00000000UL, 0x80000081UL,
460 0x00000000UL, 0x80000008UL,
461 0x00000000UL, 0x00000083UL,
462 0x00000000UL, 0x80008003UL,
463 0x00000001UL, 0x80008088UL,
464 0x00000000UL, 0x80000088UL,
465 0x00000001UL, 0x00008000UL,
466 0x00000000UL, 0x80008082UL,
467 0x000000FFUL
468 };
469
470 #define KeccakRound0() \
471 Cx = Abu0^Agu0^Aku0^Amu0^Asu0; \
472 Du1 = Abe1^Age1^Ake1^Ame1^Ase1; \
473 Da0 = Cx^ROL32(Du1, 1); \
474 Cz = Abu1^Agu1^Aku1^Amu1^Asu1; \
475 Du0 = Abe0^Age0^Ake0^Ame0^Ase0; \
476 Da1 = Cz^Du0; \
477 Cw = Abi0^Agi0^Aki0^Ami0^Asi0; \
478 Do0 = Cw^ROL32(Cz, 1); \
479 Cy = Abi1^Agi1^Aki1^Ami1^Asi1; \
480 Do1 = Cy^Cx; \
481 Cx = Aba0^Aga0^Aka0^Ama0^Asa0; \
482 De0 = Cx^ROL32(Cy, 1); \
483 Cz = Aba1^Aga1^Aka1^Ama1^Asa1; \
484 De1 = Cz^Cw; \
485 Cy = Abo1^Ago1^Ako1^Amo1^Aso1; \
486 Di0 = Du0^ROL32(Cy, 1); \
487 Cw = Abo0^Ago0^Ako0^Amo0^Aso0; \
488 Di1 = Du1^Cw; \
489 Du0 = Cw^ROL32(Cz, 1); \
490 Du1 = Cy^Cx; \
491 \
492 Ba = (Aba0^Da0); \
493 Be = ROL32((Age0^De0), 22); \
494 Bi = ROL32((Aki1^Di1), 22); \
495 Bo = ROL32((Amo1^Do1), 11); \
496 Bu = ROL32((Asu0^Du0), 7); \
497 Aba0 = Ba ^((~Be)& Bi ); \
498 Aba0 ^= *(pRoundConstants++); \
499 Age0 = Be ^((~Bi)& Bo ); \
500 Aki1 = Bi ^((~Bo)& Bu ); \
501 Amo1 = Bo ^((~Bu)& Ba ); \
502 Asu0 = Bu ^((~Ba)& Be ); \
503 Ba = (Aba1^Da1); \
504 Be = ROL32((Age1^De1), 22); \
505 Bi = ROL32((Aki0^Di0), 21); \
506 Bo = ROL32((Amo0^Do0), 10); \
507 Bu = ROL32((Asu1^Du1), 7); \
508 Aba1 = Ba ^((~Be)& Bi ); \
509 Aba1 ^= *(pRoundConstants++); \
510 Age1 = Be ^((~Bi)& Bo ); \
511 Aki0 = Bi ^((~Bo)& Bu ); \
512 Amo0 = Bo ^((~Bu)& Ba ); \
513 Asu1 = Bu ^((~Ba)& Be ); \
514 Bi = ROL32((Aka1^Da1), 2); \
515 Bo = ROL32((Ame1^De1), 23); \
516 Bu = ROL32((Asi1^Di1), 31); \
517 Ba = ROL32((Abo0^Do0), 14); \
518 Be = ROL32((Agu0^Du0), 10); \
519 Aka1 = Ba ^((~Be)& Bi ); \
520 Ame1 = Be ^((~Bi)& Bo ); \
521 Asi1 = Bi ^((~Bo)& Bu ); \
522 Abo0 = Bo ^((~Bu)& Ba ); \
523 Agu0 = Bu ^((~Ba)& Be ); \
524 Bi = ROL32((Aka0^Da0), 1); \
525 Bo = ROL32((Ame0^De0), 22); \
526 Bu = ROL32((Asi0^Di0), 30); \
527 Ba = ROL32((Abo1^Do1), 14); \
528 Be = ROL32((Agu1^Du1), 10); \
529 Aka0 = Ba ^((~Be)& Bi ); \
530 Ame0 = Be ^((~Bi)& Bo ); \
531 Asi0 = Bi ^((~Bo)& Bu ); \
532 Abo1 = Bo ^((~Bu)& Ba ); \
533 Agu1 = Bu ^((~Ba)& Be ); \
534 Bu = ROL32((Asa0^Da0), 9); \
535 Ba = ROL32((Abe1^De1), 1); \
536 Be = ROL32((Agi0^Di0), 3); \
537 Bi = ROL32((Ako1^Do1), 13); \
538 Bo = ROL32((Amu0^Du0), 4); \
539 Asa0 = Ba ^((~Be)& Bi ); \
540 Abe1 = Be ^((~Bi)& Bo ); \
541 Agi0 = Bi ^((~Bo)& Bu ); \
542 Ako1 = Bo ^((~Bu)& Ba ); \
543 Amu0 = Bu ^((~Ba)& Be ); \
544 Bu = ROL32((Asa1^Da1), 9); \
545 Ba = (Abe0^De0); \
546 Be = ROL32((Agi1^Di1), 3); \
547 Bi = ROL32((Ako0^Do0), 12); \
548 Bo = ROL32((Amu1^Du1), 4); \
549 Asa1 = Ba ^((~Be)& Bi ); \
550 Abe0 = Be ^((~Bi)& Bo ); \
551 Agi1 = Bi ^((~Bo)& Bu ); \
552 Ako0 = Bo ^((~Bu)& Ba ); \
553 Amu1 = Bu ^((~Ba)& Be ); \
554 Be = ROL32((Aga0^Da0), 18); \
555 Bi = ROL32((Ake0^De0), 5); \
556 Bo = ROL32((Ami1^Di1), 8); \
557 Bu = ROL32((Aso0^Do0), 28); \
558 Ba = ROL32((Abu1^Du1), 14); \
559 Aga0 = Ba ^((~Be)& Bi ); \
560 Ake0 = Be ^((~Bi)& Bo ); \
561 Ami1 = Bi ^((~Bo)& Bu ); \
562 Aso0 = Bo ^((~Bu)& Ba ); \
563 Abu1 = Bu ^((~Ba)& Be ); \
564 Be = ROL32((Aga1^Da1), 18); \
565 Bi = ROL32((Ake1^De1), 5); \
566 Bo = ROL32((Ami0^Di0), 7); \
567 Bu = ROL32((Aso1^Do1), 28); \
568 Ba = ROL32((Abu0^Du0), 13); \
569 Aga1 = Ba ^((~Be)& Bi ); \
570 Ake1 = Be ^((~Bi)& Bo ); \
571 Ami0 = Bi ^((~Bo)& Bu ); \
572 Aso1 = Bo ^((~Bu)& Ba ); \
573 Abu0 = Bu ^((~Ba)& Be ); \
574 Bo = ROL32((Ama1^Da1), 21); \
575 Bu = ROL32((Ase0^De0), 1); \
576 Ba = ROL32((Abi0^Di0), 31); \
577 Be = ROL32((Ago1^Do1), 28); \
578 Bi = ROL32((Aku1^Du1), 20); \
579 Ama1 = Ba ^((~Be)& Bi ); \
580 Ase0 = Be ^((~Bi)& Bo ); \
581 Abi0 = Bi ^((~Bo)& Bu ); \
582 Ago1 = Bo ^((~Bu)& Ba ); \
583 Aku1 = Bu ^((~Ba)& Be ); \
584 Bo = ROL32((Ama0^Da0), 20); \
585 Bu = ROL32((Ase1^De1), 1); \
586 Ba = ROL32((Abi1^Di1), 31); \
587 Be = ROL32((Ago0^Do0), 27); \
588 Bi = ROL32((Aku0^Du0), 19); \
589 Ama0 = Ba ^((~Be)& Bi ); \
590 Ase1 = Be ^((~Bi)& Bo ); \
591 Abi1 = Bi ^((~Bo)& Bu ); \
592 Ago0 = Bo ^((~Bu)& Ba ); \
593 Aku0 = Bu ^((~Ba)& Be )
594
595 #define KeccakRound1() \
596 Cx = Asu0^Agu0^Amu0^Abu1^Aku1; \
597 Du1 = Age1^Ame0^Abe0^Ake1^Ase1; \
598 Da0 = Cx^ROL32(Du1, 1); \
599 Cz = Asu1^Agu1^Amu1^Abu0^Aku0; \
600 Du0 = Age0^Ame1^Abe1^Ake0^Ase0; \
601 Da1 = Cz^Du0; \
602 Cw = Aki1^Asi1^Agi0^Ami1^Abi0; \
603 Do0 = Cw^ROL32(Cz, 1); \
604 Cy = Aki0^Asi0^Agi1^Ami0^Abi1; \
605 Do1 = Cy^Cx; \
606 Cx = Aba0^Aka1^Asa0^Aga0^Ama1; \
607 De0 = Cx^ROL32(Cy, 1); \
608 Cz = Aba1^Aka0^Asa1^Aga1^Ama0; \
609 De1 = Cz^Cw; \
610 Cy = Amo0^Abo1^Ako0^Aso1^Ago0; \
611 Di0 = Du0^ROL32(Cy, 1); \
612 Cw = Amo1^Abo0^Ako1^Aso0^Ago1; \
613 Di1 = Du1^Cw; \
614 Du0 = Cw^ROL32(Cz, 1); \
615 Du1 = Cy^Cx; \
616 \
617 Ba = (Aba0^Da0); \
618 Be = ROL32((Ame1^De0), 22); \
619 Bi = ROL32((Agi1^Di1), 22); \
620 Bo = ROL32((Aso1^Do1), 11); \
621 Bu = ROL32((Aku1^Du0), 7); \
622 Aba0 = Ba ^((~Be)& Bi ); \
623 Aba0 ^= *(pRoundConstants++); \
624 Ame1 = Be ^((~Bi)& Bo ); \
625 Agi1 = Bi ^((~Bo)& Bu ); \
626 Aso1 = Bo ^((~Bu)& Ba ); \
627 Aku1 = Bu ^((~Ba)& Be ); \
628 Ba = (Aba1^Da1); \
629 Be = ROL32((Ame0^De1), 22); \
630 Bi = ROL32((Agi0^Di0), 21); \
631 Bo = ROL32((Aso0^Do0), 10); \
632 Bu = ROL32((Aku0^Du1), 7); \
633 Aba1 = Ba ^((~Be)& Bi ); \
634 Aba1 ^= *(pRoundConstants++); \
635 Ame0 = Be ^((~Bi)& Bo ); \
636 Agi0 = Bi ^((~Bo)& Bu ); \
637 Aso0 = Bo ^((~Bu)& Ba ); \
638 Aku0 = Bu ^((~Ba)& Be ); \
639 Bi = ROL32((Asa1^Da1), 2); \
640 Bo = ROL32((Ake1^De1), 23); \
641 Bu = ROL32((Abi1^Di1), 31); \
642 Ba = ROL32((Amo1^Do0), 14); \
643 Be = ROL32((Agu0^Du0), 10); \
644 Asa1 = Ba ^((~Be)& Bi ); \
645 Ake1 = Be ^((~Bi)& Bo ); \
646 Abi1 = Bi ^((~Bo)& Bu ); \
647 Amo1 = Bo ^((~Bu)& Ba ); \
648 Agu0 = Bu ^((~Ba)& Be ); \
649 Bi = ROL32((Asa0^Da0), 1); \
650 Bo = ROL32((Ake0^De0), 22); \
651 Bu = ROL32((Abi0^Di0), 30); \
652 Ba = ROL32((Amo0^Do1), 14); \
653 Be = ROL32((Agu1^Du1), 10); \
654 Asa0 = Ba ^((~Be)& Bi ); \
655 Ake0 = Be ^((~Bi)& Bo ); \
656 Abi0 = Bi ^((~Bo)& Bu ); \
657 Amo0 = Bo ^((~Bu)& Ba ); \
658 Agu1 = Bu ^((~Ba)& Be ); \
659 Bu = ROL32((Ama1^Da0), 9); \
660 Ba = ROL32((Age1^De1), 1); \
661 Be = ROL32((Asi1^Di0), 3); \
662 Bi = ROL32((Ako0^Do1), 13); \
663 Bo = ROL32((Abu1^Du0), 4); \
664 Ama1 = Ba ^((~Be)& Bi ); \
665 Age1 = Be ^((~Bi)& Bo ); \
666 Asi1 = Bi ^((~Bo)& Bu ); \
667 Ako0 = Bo ^((~Bu)& Ba ); \
668 Abu1 = Bu ^((~Ba)& Be ); \
669 Bu = ROL32((Ama0^Da1), 9); \
670 Ba = (Age0^De0); \
671 Be = ROL32((Asi0^Di1), 3); \
672 Bi = ROL32((Ako1^Do0), 12); \
673 Bo = ROL32((Abu0^Du1), 4); \
674 Ama0 = Ba ^((~Be)& Bi ); \
675 Age0 = Be ^((~Bi)& Bo ); \
676 Asi0 = Bi ^((~Bo)& Bu ); \
677 Ako1 = Bo ^((~Bu)& Ba ); \
678 Abu0 = Bu ^((~Ba)& Be ); \
679 Be = ROL32((Aka1^Da0), 18); \
680 Bi = ROL32((Abe1^De0), 5); \
681 Bo = ROL32((Ami0^Di1), 8); \
682 Bu = ROL32((Ago1^Do0), 28); \
683 Ba = ROL32((Asu1^Du1), 14); \
684 Aka1 = Ba ^((~Be)& Bi ); \
685 Abe1 = Be ^((~Bi)& Bo ); \
686 Ami0 = Bi ^((~Bo)& Bu ); \
687 Ago1 = Bo ^((~Bu)& Ba ); \
688 Asu1 = Bu ^((~Ba)& Be ); \
689 Be = ROL32((Aka0^Da1), 18); \
690 Bi = ROL32((Abe0^De1), 5); \
691 Bo = ROL32((Ami1^Di0), 7); \
692 Bu = ROL32((Ago0^Do1), 28); \
693 Ba = ROL32((Asu0^Du0), 13); \
694 Aka0 = Ba ^((~Be)& Bi ); \
695 Abe0 = Be ^((~Bi)& Bo ); \
696 Ami1 = Bi ^((~Bo)& Bu ); \
697 Ago0 = Bo ^((~Bu)& Ba ); \
698 Asu0 = Bu ^((~Ba)& Be ); \
699 Bo = ROL32((Aga1^Da1), 21); \
700 Bu = ROL32((Ase0^De0), 1); \
701 Ba = ROL32((Aki1^Di0), 31); \
702 Be = ROL32((Abo1^Do1), 28); \
703 Bi = ROL32((Amu1^Du1), 20); \
704 Aga1 = Ba ^((~Be)& Bi ); \
705 Ase0 = Be ^((~Bi)& Bo ); \
706 Aki1 = Bi ^((~Bo)& Bu ); \
707 Abo1 = Bo ^((~Bu)& Ba ); \
708 Amu1 = Bu ^((~Ba)& Be ); \
709 Bo = ROL32((Aga0^Da0), 20); \
710 Bu = ROL32((Ase1^De1), 1); \
711 Ba = ROL32((Aki0^Di1), 31); \
712 Be = ROL32((Abo0^Do0), 27); \
713 Bi = ROL32((Amu0^Du0), 19); \
714 Aga0 = Ba ^((~Be)& Bi ); \
715 Ase1 = Be ^((~Bi)& Bo ); \
716 Aki0 = Bi ^((~Bo)& Bu ); \
717 Abo0 = Bo ^((~Bu)& Ba ); \
718 Amu0 = Bu ^((~Ba)& Be );
719
720 #define KeccakRound2() \
721 Cx = Aku1^Agu0^Abu1^Asu1^Amu1; \
722 Du1 = Ame0^Ake0^Age0^Abe0^Ase1; \
723 Da0 = Cx^ROL32(Du1, 1); \
724 Cz = Aku0^Agu1^Abu0^Asu0^Amu0; \
725 Du0 = Ame1^Ake1^Age1^Abe1^Ase0; \
726 Da1 = Cz^Du0; \
727 Cw = Agi1^Abi1^Asi1^Ami0^Aki1; \
728 Do0 = Cw^ROL32(Cz, 1); \
729 Cy = Agi0^Abi0^Asi0^Ami1^Aki0; \
730 Do1 = Cy^Cx; \
731 Cx = Aba0^Asa1^Ama1^Aka1^Aga1; \
732 De0 = Cx^ROL32(Cy, 1); \
733 Cz = Aba1^Asa0^Ama0^Aka0^Aga0; \
734 De1 = Cz^Cw; \
735 Cy = Aso0^Amo0^Ako1^Ago0^Abo0; \
736 Di0 = Du0^ROL32(Cy, 1); \
737 Cw = Aso1^Amo1^Ako0^Ago1^Abo1; \
738 Di1 = Du1^Cw; \
739 Du0 = Cw^ROL32(Cz, 1); \
740 Du1 = Cy^Cx; \
741 \
742 Ba = (Aba0^Da0); \
743 Be = ROL32((Ake1^De0), 22); \
744 Bi = ROL32((Asi0^Di1), 22); \
745 Bo = ROL32((Ago0^Do1), 11); \
746 Bu = ROL32((Amu1^Du0), 7); \
747 Aba0 = Ba ^((~Be)& Bi ); \
748 Aba0 ^= *(pRoundConstants++); \
749 Ake1 = Be ^((~Bi)& Bo ); \
750 Asi0 = Bi ^((~Bo)& Bu ); \
751 Ago0 = Bo ^((~Bu)& Ba ); \
752 Amu1 = Bu ^((~Ba)& Be ); \
753 Ba = (Aba1^Da1); \
754 Be = ROL32((Ake0^De1), 22); \
755 Bi = ROL32((Asi1^Di0), 21); \
756 Bo = ROL32((Ago1^Do0), 10); \
757 Bu = ROL32((Amu0^Du1), 7); \
758 Aba1 = Ba ^((~Be)& Bi ); \
759 Aba1 ^= *(pRoundConstants++); \
760 Ake0 = Be ^((~Bi)& Bo ); \
761 Asi1 = Bi ^((~Bo)& Bu ); \
762 Ago1 = Bo ^((~Bu)& Ba ); \
763 Amu0 = Bu ^((~Ba)& Be ); \
764 Bi = ROL32((Ama0^Da1), 2); \
765 Bo = ROL32((Abe0^De1), 23); \
766 Bu = ROL32((Aki0^Di1), 31); \
767 Ba = ROL32((Aso1^Do0), 14); \
768 Be = ROL32((Agu0^Du0), 10); \
769 Ama0 = Ba ^((~Be)& Bi ); \
770 Abe0 = Be ^((~Bi)& Bo ); \
771 Aki0 = Bi ^((~Bo)& Bu ); \
772 Aso1 = Bo ^((~Bu)& Ba ); \
773 Agu0 = Bu ^((~Ba)& Be ); \
774 Bi = ROL32((Ama1^Da0), 1); \
775 Bo = ROL32((Abe1^De0), 22); \
776 Bu = ROL32((Aki1^Di0), 30); \
777 Ba = ROL32((Aso0^Do1), 14); \
778 Be = ROL32((Agu1^Du1), 10); \
779 Ama1 = Ba ^((~Be)& Bi ); \
780 Abe1 = Be ^((~Bi)& Bo ); \
781 Aki1 = Bi ^((~Bo)& Bu ); \
782 Aso0 = Bo ^((~Bu)& Ba ); \
783 Agu1 = Bu ^((~Ba)& Be ); \
784 Bu = ROL32((Aga1^Da0), 9); \
785 Ba = ROL32((Ame0^De1), 1); \
786 Be = ROL32((Abi1^Di0), 3); \
787 Bi = ROL32((Ako1^Do1), 13); \
788 Bo = ROL32((Asu1^Du0), 4); \
789 Aga1 = Ba ^((~Be)& Bi ); \
790 Ame0 = Be ^((~Bi)& Bo ); \
791 Abi1 = Bi ^((~Bo)& Bu ); \
792 Ako1 = Bo ^((~Bu)& Ba ); \
793 Asu1 = Bu ^((~Ba)& Be ); \
794 Bu = ROL32((Aga0^Da1), 9); \
795 Ba = (Ame1^De0); \
796 Be = ROL32((Abi0^Di1), 3); \
797 Bi = ROL32((Ako0^Do0), 12); \
798 Bo = ROL32((Asu0^Du1), 4); \
799 Aga0 = Ba ^((~Be)& Bi ); \
800 Ame1 = Be ^((~Bi)& Bo ); \
801 Abi0 = Bi ^((~Bo)& Bu ); \
802 Ako0 = Bo ^((~Bu)& Ba ); \
803 Asu0 = Bu ^((~Ba)& Be ); \
804 Be = ROL32((Asa1^Da0), 18); \
805 Bi = ROL32((Age1^De0), 5); \
806 Bo = ROL32((Ami1^Di1), 8); \
807 Bu = ROL32((Abo1^Do0), 28); \
808 Ba = ROL32((Aku0^Du1), 14); \
809 Asa1 = Ba ^((~Be)& Bi ); \
810 Age1 = Be ^((~Bi)& Bo ); \
811 Ami1 = Bi ^((~Bo)& Bu ); \
812 Abo1 = Bo ^((~Bu)& Ba ); \
813 Aku0 = Bu ^((~Ba)& Be ); \
814 Be = ROL32((Asa0^Da1), 18); \
815 Bi = ROL32((Age0^De1), 5); \
816 Bo = ROL32((Ami0^Di0), 7); \
817 Bu = ROL32((Abo0^Do1), 28); \
818 Ba = ROL32((Aku1^Du0), 13); \
819 Asa0 = Ba ^((~Be)& Bi ); \
820 Age0 = Be ^((~Bi)& Bo ); \
821 Ami0 = Bi ^((~Bo)& Bu ); \
822 Abo0 = Bo ^((~Bu)& Ba ); \
823 Aku1 = Bu ^((~Ba)& Be ); \
824 Bo = ROL32((Aka0^Da1), 21); \
825 Bu = ROL32((Ase0^De0), 1); \
826 Ba = ROL32((Agi1^Di0), 31); \
827 Be = ROL32((Amo0^Do1), 28); \
828 Bi = ROL32((Abu0^Du1), 20); \
829 Aka0 = Ba ^((~Be)& Bi ); \
830 Ase0 = Be ^((~Bi)& Bo ); \
831 Agi1 = Bi ^((~Bo)& Bu ); \
832 Amo0 = Bo ^((~Bu)& Ba ); \
833 Abu0 = Bu ^((~Ba)& Be ); \
834 Bo = ROL32((Aka1^Da0), 20); \
835 Bu = ROL32((Ase1^De1), 1); \
836 Ba = ROL32((Agi0^Di1), 31); \
837 Be = ROL32((Amo1^Do0), 27); \
838 Bi = ROL32((Abu1^Du0), 19); \
839 Aka1 = Ba ^((~Be)& Bi ); \
840 Ase1 = Be ^((~Bi)& Bo ); \
841 Agi0 = Bi ^((~Bo)& Bu ); \
842 Amo1 = Bo ^((~Bu)& Ba ); \
843 Abu1 = Bu ^((~Ba)& Be );
844
845 #define KeccakRound3() \
846 Cx = Amu1^Agu0^Asu1^Aku0^Abu0; \
847 Du1 = Ake0^Abe1^Ame1^Age0^Ase1; \
848 Da0 = Cx^ROL32(Du1, 1); \
849 Cz = Amu0^Agu1^Asu0^Aku1^Abu1; \
850 Du0 = Ake1^Abe0^Ame0^Age1^Ase0; \
851 Da1 = Cz^Du0; \
852 Cw = Asi0^Aki0^Abi1^Ami1^Agi1; \
853 Do0 = Cw^ROL32(Cz, 1); \
854 Cy = Asi1^Aki1^Abi0^Ami0^Agi0; \
855 Do1 = Cy^Cx; \
856 Cx = Aba0^Ama0^Aga1^Asa1^Aka0; \
857 De0 = Cx^ROL32(Cy, 1); \
858 Cz = Aba1^Ama1^Aga0^Asa0^Aka1; \
859 De1 = Cz^Cw; \
860 Cy = Ago1^Aso0^Ako0^Abo0^Amo1; \
861 Di0 = Du0^ROL32(Cy, 1); \
862 Cw = Ago0^Aso1^Ako1^Abo1^Amo0; \
863 Di1 = Du1^Cw; \
864 Du0 = Cw^ROL32(Cz, 1); \
865 Du1 = Cy^Cx; \
866 \
867 Ba = (Aba0^Da0); \
868 Be = ROL32((Abe0^De0), 22); \
869 Bi = ROL32((Abi0^Di1), 22); \
870 Bo = ROL32((Abo0^Do1), 11); \
871 Bu = ROL32((Abu0^Du0), 7); \
872 Aba0 = Ba ^((~Be)& Bi ); \
873 Aba0 ^= *(pRoundConstants++); \
874 Abe0 = Be ^((~Bi)& Bo ); \
875 Abi0 = Bi ^((~Bo)& Bu ); \
876 Abo0 = Bo ^((~Bu)& Ba ); \
877 Abu0 = Bu ^((~Ba)& Be ); \
878 Ba = (Aba1^Da1); \
879 Be = ROL32((Abe1^De1), 22); \
880 Bi = ROL32((Abi1^Di0), 21); \
881 Bo = ROL32((Abo1^Do0), 10); \
882 Bu = ROL32((Abu1^Du1), 7); \
883 Aba1 = Ba ^((~Be)& Bi ); \
884 Aba1 ^= *(pRoundConstants++); \
885 Abe1 = Be ^((~Bi)& Bo ); \
886 Abi1 = Bi ^((~Bo)& Bu ); \
887 Abo1 = Bo ^((~Bu)& Ba ); \
888 Abu1 = Bu ^((~Ba)& Be ); \
889 Bi = ROL32((Aga0^Da1), 2); \
890 Bo = ROL32((Age0^De1), 23); \
891 Bu = ROL32((Agi0^Di1), 31); \
892 Ba = ROL32((Ago0^Do0), 14); \
893 Be = ROL32((Agu0^Du0), 10); \
894 Aga0 = Ba ^((~Be)& Bi ); \
895 Age0 = Be ^((~Bi)& Bo ); \
896 Agi0 = Bi ^((~Bo)& Bu ); \
897 Ago0 = Bo ^((~Bu)& Ba ); \
898 Agu0 = Bu ^((~Ba)& Be ); \
899 Bi = ROL32((Aga1^Da0), 1); \
900 Bo = ROL32((Age1^De0), 22); \
901 Bu = ROL32((Agi1^Di0), 30); \
902 Ba = ROL32((Ago1^Do1), 14); \
903 Be = ROL32((Agu1^Du1), 10); \
904 Aga1 = Ba ^((~Be)& Bi ); \
905 Age1 = Be ^((~Bi)& Bo ); \
906 Agi1 = Bi ^((~Bo)& Bu ); \
907 Ago1 = Bo ^((~Bu)& Ba ); \
908 Agu1 = Bu ^((~Ba)& Be ); \
909 Bu = ROL32((Aka0^Da0), 9); \
910 Ba = ROL32((Ake0^De1), 1); \
911 Be = ROL32((Aki0^Di0), 3); \
912 Bi = ROL32((Ako0^Do1), 13); \
913 Bo = ROL32((Aku0^Du0), 4); \
914 Aka0 = Ba ^((~Be)& Bi ); \
915 Ake0 = Be ^((~Bi)& Bo ); \
916 Aki0 = Bi ^((~Bo)& Bu ); \
917 Ako0 = Bo ^((~Bu)& Ba ); \
918 Aku0 = Bu ^((~Ba)& Be ); \
919 Bu = ROL32((Aka1^Da1), 9); \
920 Ba = (Ake1^De0); \
921 Be = ROL32((Aki1^Di1), 3); \
922 Bi = ROL32((Ako1^Do0), 12); \
923 Bo = ROL32((Aku1^Du1), 4); \
924 Aka1 = Ba ^((~Be)& Bi ); \
925 Ake1 = Be ^((~Bi)& Bo ); \
926 Aki1 = Bi ^((~Bo)& Bu ); \
927 Ako1 = Bo ^((~Bu)& Ba ); \
928 Aku1 = Bu ^((~Ba)& Be ); \
929 Be = ROL32((Ama0^Da0), 18); \
930 Bi = ROL32((Ame0^De0), 5); \
931 Bo = ROL32((Ami0^Di1), 8); \
932 Bu = ROL32((Amo0^Do0), 28); \
933 Ba = ROL32((Amu0^Du1), 14); \
934 Ama0 = Ba ^((~Be)& Bi ); \
935 Ame0 = Be ^((~Bi)& Bo ); \
936 Ami0 = Bi ^((~Bo)& Bu ); \
937 Amo0 = Bo ^((~Bu)& Ba ); \
938 Amu0 = Bu ^((~Ba)& Be ); \
939 Be = ROL32((Ama1^Da1), 18); \
940 Bi = ROL32((Ame1^De1), 5); \
941 Bo = ROL32((Ami1^Di0), 7); \
942 Bu = ROL32((Amo1^Do1), 28); \
943 Ba = ROL32((Amu1^Du0), 13); \
944 Ama1 = Ba ^((~Be)& Bi ); \
945 Ame1 = Be ^((~Bi)& Bo ); \
946 Ami1 = Bi ^((~Bo)& Bu ); \
947 Amo1 = Bo ^((~Bu)& Ba ); \
948 Amu1 = Bu ^((~Ba)& Be ); \
949 Bo = ROL32((Asa0^Da1), 21); \
950 Bu = ROL32((Ase0^De0), 1); \
951 Ba = ROL32((Asi0^Di0), 31); \
952 Be = ROL32((Aso0^Do1), 28); \
953 Bi = ROL32((Asu0^Du1), 20); \
954 Asa0 = Ba ^((~Be)& Bi ); \
955 Ase0 = Be ^((~Bi)& Bo ); \
956 Asi0 = Bi ^((~Bo)& Bu ); \
957 Aso0 = Bo ^((~Bu)& Ba ); \
958 Asu0 = Bu ^((~Ba)& Be ); \
959 Bo = ROL32((Asa1^Da0), 20); \
960 Bu = ROL32((Ase1^De1), 1); \
961 Ba = ROL32((Asi1^Di1), 31); \
962 Be = ROL32((Aso1^Do0), 27); \
963 Bi = ROL32((Asu1^Du0), 19); \
964 Asa1 = Ba ^((~Be)& Bi ); \
965 Ase1 = Be ^((~Bi)& Bo ); \
966 Asi1 = Bi ^((~Bo)& Bu ); \
967 Aso1 = Bo ^((~Bu)& Ba ); \
968 Asu1 = Bu ^((~Ba)& Be );
969
KeccakP1600_Permute_Nrounds(void * state,unsigned int nRounds)970 void KeccakP1600_Permute_Nrounds(void *state, unsigned int nRounds)
971 {
972 UINT32 Da0, De0, Di0, Do0, Du0;
973 UINT32 Da1, De1, Di1, Do1, Du1;
974 UINT32 Ba, Be, Bi, Bo, Bu;
975 UINT32 Cx, Cy, Cz, Cw;
976 const UINT32 *pRoundConstants = KeccakF1600RoundConstants_int2+(24-nRounds)*2;
977 UINT32 *stateAsHalfLanes = (UINT32*)state;
978 #define Aba0 stateAsHalfLanes[ 0]
979 #define Aba1 stateAsHalfLanes[ 1]
980 #define Abe0 stateAsHalfLanes[ 2]
981 #define Abe1 stateAsHalfLanes[ 3]
982 #define Abi0 stateAsHalfLanes[ 4]
983 #define Abi1 stateAsHalfLanes[ 5]
984 #define Abo0 stateAsHalfLanes[ 6]
985 #define Abo1 stateAsHalfLanes[ 7]
986 #define Abu0 stateAsHalfLanes[ 8]
987 #define Abu1 stateAsHalfLanes[ 9]
988 #define Aga0 stateAsHalfLanes[10]
989 #define Aga1 stateAsHalfLanes[11]
990 #define Age0 stateAsHalfLanes[12]
991 #define Age1 stateAsHalfLanes[13]
992 #define Agi0 stateAsHalfLanes[14]
993 #define Agi1 stateAsHalfLanes[15]
994 #define Ago0 stateAsHalfLanes[16]
995 #define Ago1 stateAsHalfLanes[17]
996 #define Agu0 stateAsHalfLanes[18]
997 #define Agu1 stateAsHalfLanes[19]
998 #define Aka0 stateAsHalfLanes[20]
999 #define Aka1 stateAsHalfLanes[21]
1000 #define Ake0 stateAsHalfLanes[22]
1001 #define Ake1 stateAsHalfLanes[23]
1002 #define Aki0 stateAsHalfLanes[24]
1003 #define Aki1 stateAsHalfLanes[25]
1004 #define Ako0 stateAsHalfLanes[26]
1005 #define Ako1 stateAsHalfLanes[27]
1006 #define Aku0 stateAsHalfLanes[28]
1007 #define Aku1 stateAsHalfLanes[29]
1008 #define Ama0 stateAsHalfLanes[30]
1009 #define Ama1 stateAsHalfLanes[31]
1010 #define Ame0 stateAsHalfLanes[32]
1011 #define Ame1 stateAsHalfLanes[33]
1012 #define Ami0 stateAsHalfLanes[34]
1013 #define Ami1 stateAsHalfLanes[35]
1014 #define Amo0 stateAsHalfLanes[36]
1015 #define Amo1 stateAsHalfLanes[37]
1016 #define Amu0 stateAsHalfLanes[38]
1017 #define Amu1 stateAsHalfLanes[39]
1018 #define Asa0 stateAsHalfLanes[40]
1019 #define Asa1 stateAsHalfLanes[41]
1020 #define Ase0 stateAsHalfLanes[42]
1021 #define Ase1 stateAsHalfLanes[43]
1022 #define Asi0 stateAsHalfLanes[44]
1023 #define Asi1 stateAsHalfLanes[45]
1024 #define Aso0 stateAsHalfLanes[46]
1025 #define Aso1 stateAsHalfLanes[47]
1026 #define Asu0 stateAsHalfLanes[48]
1027 #define Asu1 stateAsHalfLanes[49]
1028
1029 nRounds &= 3;
1030 switch ( nRounds )
1031 {
1032 #define I0 Ba
1033 #define I1 Be
1034 #define T0 Bi
1035 #define T1 Bo
1036 #define SwapPI13( in0,in1,in2,in3,eo0,eo1,eo2,eo3 ) \
1037 I0 = (in0)[0]; I1 = (in0)[1]; \
1038 T0 = (in1)[0]; T1 = (in1)[1]; \
1039 (in0)[eo0] = T0; (in0)[eo0^1] = T1; \
1040 T0 = (in2)[0]; T1 = (in2)[1]; \
1041 (in1)[eo1] = T0; (in1)[eo1^1] = T1; \
1042 T0 = (in3)[0]; T1 = (in3)[1]; \
1043 (in2)[eo2] = T0; (in2)[eo2^1] = T1; \
1044 (in3)[eo3] = I0; (in3)[eo3^1] = I1
1045 #define SwapPI2( in0,in1,in2,in3 ) \
1046 I0 = (in0)[0]; I1 = (in0)[1]; \
1047 T0 = (in1)[0]; T1 = (in1)[1]; \
1048 (in0)[1] = T0; (in0)[0] = T1; \
1049 (in1)[1] = I0; (in1)[0] = I1; \
1050 I0 = (in2)[0]; I1 = (in2)[1]; \
1051 T0 = (in3)[0]; T1 = (in3)[1]; \
1052 (in2)[1] = T0; (in2)[0] = T1; \
1053 (in3)[1] = I0; (in3)[0] = I1
1054 #define SwapEO( even,odd ) T0 = even; even = odd; odd = T0
1055
1056 case 1:
1057 SwapPI13( &Aga0, &Aka0, &Asa0, &Ama0, 1, 0, 1, 0 );
1058 SwapPI13( &Abe0, &Age0, &Ame0, &Ake0, 0, 1, 0, 1 );
1059 SwapPI13( &Abi0, &Aki0, &Agi0, &Asi0, 1, 0, 1, 0 );
1060 SwapEO( Ami0, Ami1 );
1061 SwapPI13( &Abo0, &Amo0, &Aso0, &Ago0, 1, 0, 1, 0 );
1062 SwapEO( Ako0, Ako1 );
1063 SwapPI13( &Abu0, &Asu0, &Aku0, &Amu0, 0, 1, 0, 1 );
1064 break;
1065
1066 case 2:
1067 SwapPI2( &Aga0, &Asa0, &Aka0, &Ama0 );
1068 SwapPI2( &Abe0, &Ame0, &Age0, &Ake0 );
1069 SwapPI2( &Abi0, &Agi0, &Aki0, &Asi0 );
1070 SwapPI2( &Abo0, &Aso0, &Ago0, &Amo0 );
1071 SwapPI2( &Abu0, &Aku0, &Amu0, &Asu0 );
1072 break;
1073
1074 case 3:
1075 SwapPI13( &Aga0, &Ama0, &Asa0, &Aka0, 0, 1, 0, 1 );
1076 SwapPI13( &Abe0, &Ake0, &Ame0, &Age0, 1, 0, 1, 0 );
1077 SwapPI13( &Abi0, &Asi0, &Agi0, &Aki0, 0, 1, 0, 1 );
1078 SwapEO( Ami0, Ami1 );
1079 SwapPI13( &Abo0, &Ago0, &Aso0, &Amo0, 0, 1, 0, 1 );
1080 SwapEO( Ako0, Ako1 );
1081 SwapPI13( &Abu0, &Amu0, &Aku0, &Asu0, 1, 0, 1, 0 );
1082 break;
1083 #undef I0
1084 #undef I1
1085 #undef T0
1086 #undef T1
1087 #undef SwapPI13
1088 #undef SwapPI2
1089 #undef SwapEO
1090 }
1091
1092 do
1093 {
1094 /* Code for 4 rounds, using factor 2 interleaving, 64-bit lanes mapped to 32-bit words */
1095 switch ( nRounds )
1096 {
1097 case 0: KeccakRound0(); /* fall through */
1098 case 3: KeccakRound1();
1099 case 2: KeccakRound2();
1100 case 1: KeccakRound3();
1101 }
1102 nRounds = 0;
1103 }
1104 while ( *pRoundConstants != 0xFF );
1105
1106 #undef Aba0
1107 #undef Aba1
1108 #undef Abe0
1109 #undef Abe1
1110 #undef Abi0
1111 #undef Abi1
1112 #undef Abo0
1113 #undef Abo1
1114 #undef Abu0
1115 #undef Abu1
1116 #undef Aga0
1117 #undef Aga1
1118 #undef Age0
1119 #undef Age1
1120 #undef Agi0
1121 #undef Agi1
1122 #undef Ago0
1123 #undef Ago1
1124 #undef Agu0
1125 #undef Agu1
1126 #undef Aka0
1127 #undef Aka1
1128 #undef Ake0
1129 #undef Ake1
1130 #undef Aki0
1131 #undef Aki1
1132 #undef Ako0
1133 #undef Ako1
1134 #undef Aku0
1135 #undef Aku1
1136 #undef Ama0
1137 #undef Ama1
1138 #undef Ame0
1139 #undef Ame1
1140 #undef Ami0
1141 #undef Ami1
1142 #undef Amo0
1143 #undef Amo1
1144 #undef Amu0
1145 #undef Amu1
1146 #undef Asa0
1147 #undef Asa1
1148 #undef Ase0
1149 #undef Ase1
1150 #undef Asi0
1151 #undef Asi1
1152 #undef Aso0
1153 #undef Aso1
1154 #undef Asu0
1155 #undef Asu1
1156 }
1157
1158 /* ---------------------------------------------------------------- */
1159
KeccakP1600_Permute_12rounds(void * state)1160 void KeccakP1600_Permute_12rounds(void *state)
1161 {
1162 KeccakP1600_Permute_Nrounds(state, 12);
1163 }
1164
1165 /* ---------------------------------------------------------------- */
1166
KeccakP1600_Permute_24rounds(void * state)1167 void KeccakP1600_Permute_24rounds(void *state)
1168 {
1169 KeccakP1600_Permute_Nrounds(state, 24);
1170 }
1171