1 /*
2 Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
3 Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
4 denoted as "the implementer".
5
6 For more information, feedback or questions, please refer to our websites:
7 http://keccak.noekeon.org/
8 http://keyak.noekeon.org/
9 http://ketje.noekeon.org/
10
11 To the extent possible under law, the implementer has waived all copyright
12 and related or neighboring rights to the source code in this file.
13 http://creativecommons.org/publicdomain/zero/1.0/
14 */
15
16 #include <string.h>
17 #include <stdlib.h>
18 #include "brg_endian.h"
19 #include "KeccakP-1600-opt64-config.h"
20
21 typedef unsigned char UINT8;
22 typedef unsigned long long int UINT64;
23
24 #if defined(KeccakP1600_useLaneComplementing)
25 #define UseBebigokimisa
26 #endif
27
28 #if defined(_MSC_VER)
29 #define ROL64(a, offset) _rotl64(a, offset)
30 #elif defined(KeccakP1600_useSHLD)
31 #define ROL64(x,N) ({ \
32 register UINT64 __out; \
33 register UINT64 __in = x; \
34 __asm__ ("shld %2,%0,%0" : "=r"(__out) : "0"(__in), "i"(N)); \
35 __out; \
36 })
37 #else
38 #define ROL64(a, offset) ((((UINT64)a) << offset) ^ (((UINT64)a) >> (64-offset)))
39 #endif
40
41 #include "KeccakP-1600-64.macros"
42 #ifdef KeccakP1600_fullUnrolling
43 #define FullUnrolling
44 #else
45 #define Unrolling KeccakP1600_unrolling
46 #endif
47 #include "KeccakP-1600-unrolling.macros"
48 #include "SnP-Relaned.h"
49
50 static const UINT64 KeccakF1600RoundConstants[24] = {
51 0x0000000000000001ULL,
52 0x0000000000008082ULL,
53 0x800000000000808aULL,
54 0x8000000080008000ULL,
55 0x000000000000808bULL,
56 0x0000000080000001ULL,
57 0x8000000080008081ULL,
58 0x8000000000008009ULL,
59 0x000000000000008aULL,
60 0x0000000000000088ULL,
61 0x0000000080008009ULL,
62 0x000000008000000aULL,
63 0x000000008000808bULL,
64 0x800000000000008bULL,
65 0x8000000000008089ULL,
66 0x8000000000008003ULL,
67 0x8000000000008002ULL,
68 0x8000000000000080ULL,
69 0x000000000000800aULL,
70 0x800000008000000aULL,
71 0x8000000080008081ULL,
72 0x8000000000008080ULL,
73 0x0000000080000001ULL,
74 0x8000000080008008ULL };
75
76 /* ---------------------------------------------------------------- */
77
KeccakP1600_Initialize(void * state)78 void KeccakP1600_Initialize(void *state)
79 {
80 memset(state, 0, 200);
81 #ifdef KeccakP1600_useLaneComplementing
82 ((UINT64*)state)[ 1] = ~(UINT64)0;
83 ((UINT64*)state)[ 2] = ~(UINT64)0;
84 ((UINT64*)state)[ 8] = ~(UINT64)0;
85 ((UINT64*)state)[12] = ~(UINT64)0;
86 ((UINT64*)state)[17] = ~(UINT64)0;
87 ((UINT64*)state)[20] = ~(UINT64)0;
88 #endif
89 }
90
91 /* ---------------------------------------------------------------- */
92
KeccakP1600_AddBytesInLane(void * state,unsigned int lanePosition,const unsigned char * data,unsigned int offset,unsigned int length)93 void KeccakP1600_AddBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length)
94 {
95 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
96 UINT64 lane;
97 if (length == 0)
98 return;
99 if (length == 1)
100 lane = data[0];
101 else {
102 lane = 0;
103 memcpy(&lane, data, length);
104 }
105 lane <<= offset*8;
106 #else
107 UINT64 lane = 0;
108 unsigned int i;
109 for(i=0; i<length; i++)
110 lane |= ((UINT64)data[i]) << ((i+offset)*8);
111 #endif
112 ((UINT64*)state)[lanePosition] ^= lane;
113 }
114
115 /* ---------------------------------------------------------------- */
116
KeccakP1600_AddLanes(void * state,const unsigned char * data,unsigned int laneCount)117 void KeccakP1600_AddLanes(void *state, const unsigned char *data, unsigned int laneCount)
118 {
119 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
120 unsigned int i = 0;
121 #ifdef NO_MISALIGNED_ACCESSES
122 /* If either pointer is misaligned, fall back to byte-wise xor. */
123 if (((((uintptr_t)state) & 7) != 0) || ((((uintptr_t)data) & 7) != 0)) {
124 for (i = 0; i < laneCount * 8; i++) {
125 ((unsigned char*)state)[i] ^= data[i];
126 }
127 }
128 else
129 #endif
130 {
131 /* Otherwise... */
132 for( ; (i+8)<=laneCount; i+=8) {
133 ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0];
134 ((UINT64*)state)[i+1] ^= ((UINT64*)data)[i+1];
135 ((UINT64*)state)[i+2] ^= ((UINT64*)data)[i+2];
136 ((UINT64*)state)[i+3] ^= ((UINT64*)data)[i+3];
137 ((UINT64*)state)[i+4] ^= ((UINT64*)data)[i+4];
138 ((UINT64*)state)[i+5] ^= ((UINT64*)data)[i+5];
139 ((UINT64*)state)[i+6] ^= ((UINT64*)data)[i+6];
140 ((UINT64*)state)[i+7] ^= ((UINT64*)data)[i+7];
141 }
142 for( ; (i+4)<=laneCount; i+=4) {
143 ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0];
144 ((UINT64*)state)[i+1] ^= ((UINT64*)data)[i+1];
145 ((UINT64*)state)[i+2] ^= ((UINT64*)data)[i+2];
146 ((UINT64*)state)[i+3] ^= ((UINT64*)data)[i+3];
147 }
148 for( ; (i+2)<=laneCount; i+=2) {
149 ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0];
150 ((UINT64*)state)[i+1] ^= ((UINT64*)data)[i+1];
151 }
152 if (i<laneCount) {
153 ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0];
154 }
155 }
156 #else
157 unsigned int i;
158 UINT8 *curData = data;
159 for(i=0; i<laneCount; i++, curData+=8) {
160 UINT64 lane = (UINT64)curData[0]
161 | ((UINT64)curData[1] << 8)
162 | ((UINT64)curData[2] << 16)
163 | ((UINT64)curData[3] << 24)
164 | ((UINT64)curData[4] <<32)
165 | ((UINT64)curData[5] << 40)
166 | ((UINT64)curData[6] << 48)
167 | ((UINT64)curData[7] << 56);
168 ((UINT64*)state)[i] ^= lane;
169 }
170 #endif
171 }
172
173 /* ---------------------------------------------------------------- */
174
175 #if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN)
KeccakP1600_AddByte(void * state,unsigned char byte,unsigned int offset)176 void KeccakP1600_AddByte(void *state, unsigned char byte, unsigned int offset)
177 {
178 UINT64 lane = byte;
179 lane <<= (offset%8)*8;
180 ((UINT64*)state)[offset/8] ^= lane;
181 }
182 #endif
183
184 /* ---------------------------------------------------------------- */
185
KeccakP1600_AddBytes(void * state,const unsigned char * data,unsigned int offset,unsigned int length)186 void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
187 {
188 SnP_AddBytes(state, data, offset, length, KeccakP1600_AddLanes, KeccakP1600_AddBytesInLane, 8);
189 }
190
191 /* ---------------------------------------------------------------- */
192
KeccakP1600_OverwriteBytesInLane(void * state,unsigned int lanePosition,const unsigned char * data,unsigned int offset,unsigned int length)193 void KeccakP1600_OverwriteBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length)
194 {
195 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
196 #ifdef KeccakP1600_useLaneComplementing
197 if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) {
198 unsigned int i;
199 for(i=0; i<length; i++)
200 ((unsigned char*)state)[lanePosition*8+offset+i] = ~data[i];
201 }
202 else
203 #endif
204 {
205 memcpy((unsigned char*)state+lanePosition*8+offset, data, length);
206 }
207 #else
208 #error "Not yet implemented"
209 #endif
210 }
211
212 /* ---------------------------------------------------------------- */
213
KeccakP1600_OverwriteLanes(void * state,const unsigned char * data,unsigned int laneCount)214 void KeccakP1600_OverwriteLanes(void *state, const unsigned char *data, unsigned int laneCount)
215 {
216 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
217 #ifdef KeccakP1600_useLaneComplementing
218 unsigned int lanePosition;
219
220 for(lanePosition=0; lanePosition<laneCount; lanePosition++)
221 if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
222 ((UINT64*)state)[lanePosition] = ~((const UINT64*)data)[lanePosition];
223 else
224 ((UINT64*)state)[lanePosition] = ((const UINT64*)data)[lanePosition];
225 #else
226 memcpy(state, data, laneCount*8);
227 #endif
228 #else
229 #error "Not yet implemented"
230 #endif
231 }
232
233 /* ---------------------------------------------------------------- */
234
KeccakP1600_OverwriteBytes(void * state,const unsigned char * data,unsigned int offset,unsigned int length)235 void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
236 {
237 SnP_OverwriteBytes(state, data, offset, length, KeccakP1600_OverwriteLanes, KeccakP1600_OverwriteBytesInLane, 8);
238 }
239
240 /* ---------------------------------------------------------------- */
241
KeccakP1600_OverwriteWithZeroes(void * state,unsigned int byteCount)242 void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount)
243 {
244 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
245 #ifdef KeccakP1600_useLaneComplementing
246 unsigned int lanePosition;
247
248 for(lanePosition=0; lanePosition<byteCount/8; lanePosition++)
249 if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
250 ((UINT64*)state)[lanePosition] = ~0;
251 else
252 ((UINT64*)state)[lanePosition] = 0;
253 if (byteCount%8 != 0) {
254 lanePosition = byteCount/8;
255 if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
256 memset((unsigned char*)state+lanePosition*8, 0xFF, byteCount%8);
257 else
258 memset((unsigned char*)state+lanePosition*8, 0, byteCount%8);
259 }
260 #else
261 memset(state, 0, byteCount);
262 #endif
263 #else
264 #error "Not yet implemented"
265 #endif
266 }
267
268 /* ---------------------------------------------------------------- */
269
KeccakP1600_Permute_Nrounds(void * state,unsigned int nr)270 void KeccakP1600_Permute_Nrounds(void *state, unsigned int nr)
271 {
272 declareABCDE
273 unsigned int i;
274 UINT64 *stateAsLanes = (UINT64*)state;
275
276 copyFromState(A, stateAsLanes)
277 roundsN(nr)
278 copyToState(stateAsLanes, A)
279
280 }
281
282 /* ---------------------------------------------------------------- */
283
KeccakP1600_Permute_24rounds(void * state)284 void KeccakP1600_Permute_24rounds(void *state)
285 {
286 declareABCDE
287 #ifndef KeccakP1600_fullUnrolling
288 unsigned int i;
289 #endif
290 UINT64 *stateAsLanes = (UINT64*)state;
291
292 copyFromState(A, stateAsLanes)
293 rounds24
294 copyToState(stateAsLanes, A)
295 }
296
297 /* ---------------------------------------------------------------- */
298
KeccakP1600_Permute_12rounds(void * state)299 void KeccakP1600_Permute_12rounds(void *state)
300 {
301 declareABCDE
302 #ifndef KeccakP1600_fullUnrolling
303 unsigned int i;
304 #endif
305 UINT64 *stateAsLanes = (UINT64*)state;
306
307 copyFromState(A, stateAsLanes)
308 rounds12
309 copyToState(stateAsLanes, A)
310 }
311
312 /* ---------------------------------------------------------------- */
313
KeccakP1600_ExtractBytesInLane(const void * state,unsigned int lanePosition,unsigned char * data,unsigned int offset,unsigned int length)314 void KeccakP1600_ExtractBytesInLane(const void *state, unsigned int lanePosition, unsigned char *data, unsigned int offset, unsigned int length)
315 {
316 UINT64 lane = ((UINT64*)state)[lanePosition];
317 #ifdef KeccakP1600_useLaneComplementing
318 if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
319 lane = ~lane;
320 #endif
321 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
322 {
323 UINT64 lane1[1];
324 lane1[0] = lane;
325 memcpy(data, (UINT8*)lane1+offset, length);
326 }
327 #else
328 unsigned int i;
329 lane >>= offset*8;
330 for(i=0; i<length; i++) {
331 data[i] = lane & 0xFF;
332 lane >>= 8;
333 }
334 #endif
335 }
336
337 /* ---------------------------------------------------------------- */
338
339 #if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN)
fromWordToBytes(UINT8 * bytes,const UINT64 word)340 void fromWordToBytes(UINT8 *bytes, const UINT64 word)
341 {
342 unsigned int i;
343
344 for(i=0; i<(64/8); i++)
345 bytes[i] = (word >> (8*i)) & 0xFF;
346 }
347 #endif
348
KeccakP1600_ExtractLanes(const void * state,unsigned char * data,unsigned int laneCount)349 void KeccakP1600_ExtractLanes(const void *state, unsigned char *data, unsigned int laneCount)
350 {
351 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
352 memcpy(data, state, laneCount*8);
353 #else
354 unsigned int i;
355
356 for(i=0; i<laneCount; i++)
357 fromWordToBytes(data+(i*8), ((const UINT64*)state)[i]);
358 #endif
359 #ifdef KeccakP1600_useLaneComplementing
360 if (laneCount > 1) {
361 ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1];
362 if (laneCount > 2) {
363 ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2];
364 if (laneCount > 8) {
365 ((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8];
366 if (laneCount > 12) {
367 ((UINT64*)data)[12] = ~((UINT64*)data)[12];
368 if (laneCount > 17) {
369 ((UINT64*)data)[17] = ~((UINT64*)data)[17];
370 if (laneCount > 20) {
371 ((UINT64*)data)[20] = ~((UINT64*)data)[20];
372 }
373 }
374 }
375 }
376 }
377 }
378 #endif
379 }
380
381 /* ---------------------------------------------------------------- */
382
KeccakP1600_ExtractBytes(const void * state,unsigned char * data,unsigned int offset,unsigned int length)383 void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length)
384 {
385 SnP_ExtractBytes(state, data, offset, length, KeccakP1600_ExtractLanes, KeccakP1600_ExtractBytesInLane, 8);
386 }
387
388 /* ---------------------------------------------------------------- */
389
KeccakP1600_ExtractAndAddBytesInLane(const void * state,unsigned int lanePosition,const unsigned char * input,unsigned char * output,unsigned int offset,unsigned int length)390 void KeccakP1600_ExtractAndAddBytesInLane(const void *state, unsigned int lanePosition, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
391 {
392 UINT64 lane = ((UINT64*)state)[lanePosition];
393 #ifdef KeccakP1600_useLaneComplementing
394 if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
395 lane = ~lane;
396 #endif
397 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
398 {
399 unsigned int i;
400 UINT64 lane1[1];
401 lane1[0] = lane;
402 for(i=0; i<length; i++)
403 output[i] = input[i] ^ ((UINT8*)lane1)[offset+i];
404 }
405 #else
406 unsigned int i;
407 lane >>= offset*8;
408 for(i=0; i<length; i++) {
409 output[i] = input[i] ^ (lane & 0xFF);
410 lane >>= 8;
411 }
412 #endif
413 }
414
415 /* ---------------------------------------------------------------- */
416
KeccakP1600_ExtractAndAddLanes(const void * state,const unsigned char * input,unsigned char * output,unsigned int laneCount)417 void KeccakP1600_ExtractAndAddLanes(const void *state, const unsigned char *input, unsigned char *output, unsigned int laneCount)
418 {
419 unsigned int i;
420 #if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN)
421 unsigned char temp[8];
422 unsigned int j;
423 #endif
424
425 for(i=0; i<laneCount; i++) {
426 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
427 ((UINT64*)output)[i] = ((UINT64*)input)[i] ^ ((const UINT64*)state)[i];
428 #else
429 fromWordToBytes(temp, ((const UINT64*)state)[i]);
430 for(j=0; j<8; j++)
431 output[i*8+j] = input[i*8+j] ^ temp[j];
432 #endif
433 }
434 #ifdef KeccakP1600_useLaneComplementing
435 if (laneCount > 1) {
436 ((UINT64*)output)[ 1] = ~((UINT64*)output)[ 1];
437 if (laneCount > 2) {
438 ((UINT64*)output)[ 2] = ~((UINT64*)output)[ 2];
439 if (laneCount > 8) {
440 ((UINT64*)output)[ 8] = ~((UINT64*)output)[ 8];
441 if (laneCount > 12) {
442 ((UINT64*)output)[12] = ~((UINT64*)output)[12];
443 if (laneCount > 17) {
444 ((UINT64*)output)[17] = ~((UINT64*)output)[17];
445 if (laneCount > 20) {
446 ((UINT64*)output)[20] = ~((UINT64*)output)[20];
447 }
448 }
449 }
450 }
451 }
452 }
453 #endif
454 }
455
456 /* ---------------------------------------------------------------- */
457
KeccakP1600_ExtractAndAddBytes(const void * state,const unsigned char * input,unsigned char * output,unsigned int offset,unsigned int length)458 void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
459 {
460 SnP_ExtractAndAddBytes(state, input, output, offset, length, KeccakP1600_ExtractAndAddLanes, KeccakP1600_ExtractAndAddBytesInLane, 8);
461 }
462
463 /* ---------------------------------------------------------------- */
464
KeccakF1600_FastLoop_Absorb(void * state,unsigned int laneCount,const unsigned char * data,size_t dataByteLen)465 size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen)
466 {
467 size_t originalDataByteLen = dataByteLen;
468 declareABCDE
469 #ifndef KeccakP1600_fullUnrolling
470 unsigned int i;
471 #endif
472 UINT64 *stateAsLanes = (UINT64*)state;
473 UINT64 *inDataAsLanes = (UINT64*)data;
474
475 copyFromState(A, stateAsLanes)
476 while(dataByteLen >= laneCount*8) {
477 addInput(A, inDataAsLanes, laneCount)
478 rounds24
479 inDataAsLanes += laneCount;
480 dataByteLen -= laneCount*8;
481 }
482 copyToState(stateAsLanes, A)
483 return originalDataByteLen - dataByteLen;
484 }
485