1 /*
2 Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
3 Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
4 denoted as "the implementer".
5 
6 For more information, feedback or questions, please refer to our websites:
7 http://keccak.noekeon.org/
8 http://keyak.noekeon.org/
9 http://ketje.noekeon.org/
10 
11 To the extent possible under law, the implementer has waived all copyright
12 and related or neighboring rights to the source code in this file.
13 http://creativecommons.org/publicdomain/zero/1.0/
14 */
15 
16 #include <string.h>
17 #include <stdlib.h>
18 #include "brg_endian.h"
19 #include "KeccakP-1600-opt64-config.h"
20 
21 typedef unsigned char UINT8;
22 typedef unsigned long long int UINT64;
23 
24 #if defined(KeccakP1600_useLaneComplementing)
25 #define UseBebigokimisa
26 #endif
27 
28 #if defined(_MSC_VER)
29 #define ROL64(a, offset) _rotl64(a, offset)
30 #elif defined(KeccakP1600_useSHLD)
31     #define ROL64(x,N) ({ \
32     register UINT64 __out; \
33     register UINT64 __in = x; \
34     __asm__ ("shld %2,%0,%0" : "=r"(__out) : "0"(__in), "i"(N)); \
35     __out; \
36     })
37 #else
38 #define ROL64(a, offset) ((((UINT64)a) << offset) ^ (((UINT64)a) >> (64-offset)))
39 #endif
40 
41 #include "KeccakP-1600-64.macros"
42 #ifdef KeccakP1600_fullUnrolling
43 #define FullUnrolling
44 #else
45 #define Unrolling KeccakP1600_unrolling
46 #endif
47 #include "KeccakP-1600-unrolling.macros"
48 #include "SnP-Relaned.h"
49 
50 static const UINT64 KeccakF1600RoundConstants[24] = {
51     0x0000000000000001ULL,
52     0x0000000000008082ULL,
53     0x800000000000808aULL,
54     0x8000000080008000ULL,
55     0x000000000000808bULL,
56     0x0000000080000001ULL,
57     0x8000000080008081ULL,
58     0x8000000000008009ULL,
59     0x000000000000008aULL,
60     0x0000000000000088ULL,
61     0x0000000080008009ULL,
62     0x000000008000000aULL,
63     0x000000008000808bULL,
64     0x800000000000008bULL,
65     0x8000000000008089ULL,
66     0x8000000000008003ULL,
67     0x8000000000008002ULL,
68     0x8000000000000080ULL,
69     0x000000000000800aULL,
70     0x800000008000000aULL,
71     0x8000000080008081ULL,
72     0x8000000000008080ULL,
73     0x0000000080000001ULL,
74     0x8000000080008008ULL };
75 
76 /* ---------------------------------------------------------------- */
77 
KeccakP1600_Initialize(void * state)78 void KeccakP1600_Initialize(void *state)
79 {
80     memset(state, 0, 200);
81 #ifdef KeccakP1600_useLaneComplementing
82     ((UINT64*)state)[ 1] = ~(UINT64)0;
83     ((UINT64*)state)[ 2] = ~(UINT64)0;
84     ((UINT64*)state)[ 8] = ~(UINT64)0;
85     ((UINT64*)state)[12] = ~(UINT64)0;
86     ((UINT64*)state)[17] = ~(UINT64)0;
87     ((UINT64*)state)[20] = ~(UINT64)0;
88 #endif
89 }
90 
91 /* ---------------------------------------------------------------- */
92 
KeccakP1600_AddBytesInLane(void * state,unsigned int lanePosition,const unsigned char * data,unsigned int offset,unsigned int length)93 void KeccakP1600_AddBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length)
94 {
95 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
96     UINT64 lane;
97     if (length == 0)
98         return;
99     if (length == 1)
100         lane = data[0];
101     else {
102         lane = 0;
103         memcpy(&lane, data, length);
104     }
105     lane <<= offset*8;
106 #else
107     UINT64 lane = 0;
108     unsigned int i;
109     for(i=0; i<length; i++)
110         lane |= ((UINT64)data[i]) << ((i+offset)*8);
111 #endif
112     ((UINT64*)state)[lanePosition] ^= lane;
113 }
114 
115 /* ---------------------------------------------------------------- */
116 
KeccakP1600_AddLanes(void * state,const unsigned char * data,unsigned int laneCount)117 void KeccakP1600_AddLanes(void *state, const unsigned char *data, unsigned int laneCount)
118 {
119 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
120     unsigned int i = 0;
121 #ifdef NO_MISALIGNED_ACCESSES
122     /* If either pointer is misaligned, fall back to byte-wise xor. */
123     if (((((uintptr_t)state) & 7) != 0) || ((((uintptr_t)data) & 7) != 0)) {
124       for (i = 0; i < laneCount * 8; i++) {
125         ((unsigned char*)state)[i] ^= data[i];
126       }
127     }
128     else
129 #endif
130     {
131       /* Otherwise... */
132       for( ; (i+8)<=laneCount; i+=8) {
133           ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0];
134           ((UINT64*)state)[i+1] ^= ((UINT64*)data)[i+1];
135           ((UINT64*)state)[i+2] ^= ((UINT64*)data)[i+2];
136           ((UINT64*)state)[i+3] ^= ((UINT64*)data)[i+3];
137           ((UINT64*)state)[i+4] ^= ((UINT64*)data)[i+4];
138           ((UINT64*)state)[i+5] ^= ((UINT64*)data)[i+5];
139           ((UINT64*)state)[i+6] ^= ((UINT64*)data)[i+6];
140           ((UINT64*)state)[i+7] ^= ((UINT64*)data)[i+7];
141       }
142       for( ; (i+4)<=laneCount; i+=4) {
143           ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0];
144           ((UINT64*)state)[i+1] ^= ((UINT64*)data)[i+1];
145           ((UINT64*)state)[i+2] ^= ((UINT64*)data)[i+2];
146           ((UINT64*)state)[i+3] ^= ((UINT64*)data)[i+3];
147       }
148       for( ; (i+2)<=laneCount; i+=2) {
149           ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0];
150           ((UINT64*)state)[i+1] ^= ((UINT64*)data)[i+1];
151       }
152       if (i<laneCount) {
153           ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0];
154       }
155     }
156 #else
157     unsigned int i;
158     UINT8 *curData = data;
159     for(i=0; i<laneCount; i++, curData+=8) {
160         UINT64 lane = (UINT64)curData[0]
161             | ((UINT64)curData[1] << 8)
162             | ((UINT64)curData[2] << 16)
163             | ((UINT64)curData[3] << 24)
164             | ((UINT64)curData[4] <<32)
165             | ((UINT64)curData[5] << 40)
166             | ((UINT64)curData[6] << 48)
167             | ((UINT64)curData[7] << 56);
168         ((UINT64*)state)[i] ^= lane;
169     }
170 #endif
171 }
172 
173 /* ---------------------------------------------------------------- */
174 
175 #if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN)
KeccakP1600_AddByte(void * state,unsigned char byte,unsigned int offset)176 void KeccakP1600_AddByte(void *state, unsigned char byte, unsigned int offset)
177 {
178     UINT64 lane = byte;
179     lane <<= (offset%8)*8;
180     ((UINT64*)state)[offset/8] ^= lane;
181 }
182 #endif
183 
184 /* ---------------------------------------------------------------- */
185 
KeccakP1600_AddBytes(void * state,const unsigned char * data,unsigned int offset,unsigned int length)186 void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
187 {
188     SnP_AddBytes(state, data, offset, length, KeccakP1600_AddLanes, KeccakP1600_AddBytesInLane, 8);
189 }
190 
191 /* ---------------------------------------------------------------- */
192 
KeccakP1600_OverwriteBytesInLane(void * state,unsigned int lanePosition,const unsigned char * data,unsigned int offset,unsigned int length)193 void KeccakP1600_OverwriteBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length)
194 {
195 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
196 #ifdef KeccakP1600_useLaneComplementing
197     if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) {
198         unsigned int i;
199         for(i=0; i<length; i++)
200             ((unsigned char*)state)[lanePosition*8+offset+i] = ~data[i];
201     }
202     else
203 #endif
204     {
205         memcpy((unsigned char*)state+lanePosition*8+offset, data, length);
206     }
207 #else
208 #error "Not yet implemented"
209 #endif
210 }
211 
212 /* ---------------------------------------------------------------- */
213 
KeccakP1600_OverwriteLanes(void * state,const unsigned char * data,unsigned int laneCount)214 void KeccakP1600_OverwriteLanes(void *state, const unsigned char *data, unsigned int laneCount)
215 {
216 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
217 #ifdef KeccakP1600_useLaneComplementing
218     unsigned int lanePosition;
219 
220     for(lanePosition=0; lanePosition<laneCount; lanePosition++)
221         if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
222             ((UINT64*)state)[lanePosition] = ~((const UINT64*)data)[lanePosition];
223         else
224             ((UINT64*)state)[lanePosition] = ((const UINT64*)data)[lanePosition];
225 #else
226     memcpy(state, data, laneCount*8);
227 #endif
228 #else
229 #error "Not yet implemented"
230 #endif
231 }
232 
233 /* ---------------------------------------------------------------- */
234 
KeccakP1600_OverwriteBytes(void * state,const unsigned char * data,unsigned int offset,unsigned int length)235 void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
236 {
237     SnP_OverwriteBytes(state, data, offset, length, KeccakP1600_OverwriteLanes, KeccakP1600_OverwriteBytesInLane, 8);
238 }
239 
240 /* ---------------------------------------------------------------- */
241 
KeccakP1600_OverwriteWithZeroes(void * state,unsigned int byteCount)242 void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount)
243 {
244 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
245 #ifdef KeccakP1600_useLaneComplementing
246     unsigned int lanePosition;
247 
248     for(lanePosition=0; lanePosition<byteCount/8; lanePosition++)
249         if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
250             ((UINT64*)state)[lanePosition] = ~0;
251         else
252             ((UINT64*)state)[lanePosition] = 0;
253     if (byteCount%8 != 0) {
254         lanePosition = byteCount/8;
255         if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
256             memset((unsigned char*)state+lanePosition*8, 0xFF, byteCount%8);
257         else
258             memset((unsigned char*)state+lanePosition*8, 0, byteCount%8);
259     }
260 #else
261     memset(state, 0, byteCount);
262 #endif
263 #else
264 #error "Not yet implemented"
265 #endif
266 }
267 
268 /* ---------------------------------------------------------------- */
269 
KeccakP1600_Permute_Nrounds(void * state,unsigned int nr)270 void KeccakP1600_Permute_Nrounds(void *state, unsigned int nr)
271 {
272     declareABCDE
273     unsigned int i;
274     UINT64 *stateAsLanes = (UINT64*)state;
275 
276     copyFromState(A, stateAsLanes)
277     roundsN(nr)
278     copyToState(stateAsLanes, A)
279 
280 }
281 
282 /* ---------------------------------------------------------------- */
283 
KeccakP1600_Permute_24rounds(void * state)284 void KeccakP1600_Permute_24rounds(void *state)
285 {
286     declareABCDE
287     #ifndef KeccakP1600_fullUnrolling
288     unsigned int i;
289     #endif
290     UINT64 *stateAsLanes = (UINT64*)state;
291 
292     copyFromState(A, stateAsLanes)
293     rounds24
294     copyToState(stateAsLanes, A)
295 }
296 
297 /* ---------------------------------------------------------------- */
298 
KeccakP1600_Permute_12rounds(void * state)299 void KeccakP1600_Permute_12rounds(void *state)
300 {
301     declareABCDE
302     #ifndef KeccakP1600_fullUnrolling
303     unsigned int i;
304     #endif
305     UINT64 *stateAsLanes = (UINT64*)state;
306 
307     copyFromState(A, stateAsLanes)
308     rounds12
309     copyToState(stateAsLanes, A)
310 }
311 
312 /* ---------------------------------------------------------------- */
313 
KeccakP1600_ExtractBytesInLane(const void * state,unsigned int lanePosition,unsigned char * data,unsigned int offset,unsigned int length)314 void KeccakP1600_ExtractBytesInLane(const void *state, unsigned int lanePosition, unsigned char *data, unsigned int offset, unsigned int length)
315 {
316     UINT64 lane = ((UINT64*)state)[lanePosition];
317 #ifdef KeccakP1600_useLaneComplementing
318     if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
319         lane = ~lane;
320 #endif
321 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
322     {
323         UINT64 lane1[1];
324         lane1[0] = lane;
325         memcpy(data, (UINT8*)lane1+offset, length);
326     }
327 #else
328     unsigned int i;
329     lane >>= offset*8;
330     for(i=0; i<length; i++) {
331         data[i] = lane & 0xFF;
332         lane >>= 8;
333     }
334 #endif
335 }
336 
337 /* ---------------------------------------------------------------- */
338 
339 #if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN)
fromWordToBytes(UINT8 * bytes,const UINT64 word)340 void fromWordToBytes(UINT8 *bytes, const UINT64 word)
341 {
342     unsigned int i;
343 
344     for(i=0; i<(64/8); i++)
345         bytes[i] = (word >> (8*i)) & 0xFF;
346 }
347 #endif
348 
KeccakP1600_ExtractLanes(const void * state,unsigned char * data,unsigned int laneCount)349 void KeccakP1600_ExtractLanes(const void *state, unsigned char *data, unsigned int laneCount)
350 {
351 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
352     memcpy(data, state, laneCount*8);
353 #else
354     unsigned int i;
355 
356     for(i=0; i<laneCount; i++)
357         fromWordToBytes(data+(i*8), ((const UINT64*)state)[i]);
358 #endif
359 #ifdef KeccakP1600_useLaneComplementing
360     if (laneCount > 1) {
361         ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1];
362         if (laneCount > 2) {
363             ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2];
364             if (laneCount > 8) {
365                 ((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8];
366                 if (laneCount > 12) {
367                     ((UINT64*)data)[12] = ~((UINT64*)data)[12];
368                     if (laneCount > 17) {
369                         ((UINT64*)data)[17] = ~((UINT64*)data)[17];
370                         if (laneCount > 20) {
371                             ((UINT64*)data)[20] = ~((UINT64*)data)[20];
372                         }
373                     }
374                 }
375             }
376         }
377     }
378 #endif
379 }
380 
381 /* ---------------------------------------------------------------- */
382 
KeccakP1600_ExtractBytes(const void * state,unsigned char * data,unsigned int offset,unsigned int length)383 void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length)
384 {
385     SnP_ExtractBytes(state, data, offset, length, KeccakP1600_ExtractLanes, KeccakP1600_ExtractBytesInLane, 8);
386 }
387 
388 /* ---------------------------------------------------------------- */
389 
KeccakP1600_ExtractAndAddBytesInLane(const void * state,unsigned int lanePosition,const unsigned char * input,unsigned char * output,unsigned int offset,unsigned int length)390 void KeccakP1600_ExtractAndAddBytesInLane(const void *state, unsigned int lanePosition, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
391 {
392     UINT64 lane = ((UINT64*)state)[lanePosition];
393 #ifdef KeccakP1600_useLaneComplementing
394     if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
395         lane = ~lane;
396 #endif
397 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
398     {
399         unsigned int i;
400         UINT64 lane1[1];
401         lane1[0] = lane;
402         for(i=0; i<length; i++)
403             output[i] = input[i] ^ ((UINT8*)lane1)[offset+i];
404     }
405 #else
406     unsigned int i;
407     lane >>= offset*8;
408     for(i=0; i<length; i++) {
409         output[i] = input[i] ^ (lane & 0xFF);
410         lane >>= 8;
411     }
412 #endif
413 }
414 
415 /* ---------------------------------------------------------------- */
416 
KeccakP1600_ExtractAndAddLanes(const void * state,const unsigned char * input,unsigned char * output,unsigned int laneCount)417 void KeccakP1600_ExtractAndAddLanes(const void *state, const unsigned char *input, unsigned char *output, unsigned int laneCount)
418 {
419     unsigned int i;
420 #if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN)
421     unsigned char temp[8];
422     unsigned int j;
423 #endif
424 
425     for(i=0; i<laneCount; i++) {
426 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
427         ((UINT64*)output)[i] = ((UINT64*)input)[i] ^ ((const UINT64*)state)[i];
428 #else
429         fromWordToBytes(temp, ((const UINT64*)state)[i]);
430         for(j=0; j<8; j++)
431             output[i*8+j] = input[i*8+j] ^ temp[j];
432 #endif
433     }
434 #ifdef KeccakP1600_useLaneComplementing
435     if (laneCount > 1) {
436         ((UINT64*)output)[ 1] = ~((UINT64*)output)[ 1];
437         if (laneCount > 2) {
438             ((UINT64*)output)[ 2] = ~((UINT64*)output)[ 2];
439             if (laneCount > 8) {
440                 ((UINT64*)output)[ 8] = ~((UINT64*)output)[ 8];
441                 if (laneCount > 12) {
442                     ((UINT64*)output)[12] = ~((UINT64*)output)[12];
443                     if (laneCount > 17) {
444                         ((UINT64*)output)[17] = ~((UINT64*)output)[17];
445                         if (laneCount > 20) {
446                             ((UINT64*)output)[20] = ~((UINT64*)output)[20];
447                         }
448                     }
449                 }
450             }
451         }
452     }
453 #endif
454 }
455 
456 /* ---------------------------------------------------------------- */
457 
KeccakP1600_ExtractAndAddBytes(const void * state,const unsigned char * input,unsigned char * output,unsigned int offset,unsigned int length)458 void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
459 {
460     SnP_ExtractAndAddBytes(state, input, output, offset, length, KeccakP1600_ExtractAndAddLanes, KeccakP1600_ExtractAndAddBytesInLane, 8);
461 }
462 
463 /* ---------------------------------------------------------------- */
464 
KeccakF1600_FastLoop_Absorb(void * state,unsigned int laneCount,const unsigned char * data,size_t dataByteLen)465 size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen)
466 {
467     size_t originalDataByteLen = dataByteLen;
468     declareABCDE
469     #ifndef KeccakP1600_fullUnrolling
470     unsigned int i;
471     #endif
472     UINT64 *stateAsLanes = (UINT64*)state;
473     UINT64 *inDataAsLanes = (UINT64*)data;
474 
475     copyFromState(A, stateAsLanes)
476     while(dataByteLen >= laneCount*8) {
477         addInput(A, inDataAsLanes, laneCount)
478         rounds24
479         inDataAsLanes += laneCount;
480         dataByteLen -= laneCount*8;
481     }
482     copyToState(stateAsLanes, A)
483     return originalDataByteLen - dataByteLen;
484 }
485