1 /*
2 Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
3 Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
4 denoted as "the implementer".
5 
6 For more information, feedback or questions, please refer to our websites:
7 http://keccak.noekeon.org/
8 http://keyak.noekeon.org/
9 http://ketje.noekeon.org/
10 
11 To the extent possible under law, the implementer has waived all copyright
12 and related or neighboring rights to the source code in this file.
13 http://creativecommons.org/publicdomain/zero/1.0/
14 */
15 
16 #include <string.h>
17 #include <stdlib.h>
18 #include "brg_endian.h"
19 #include "KeccakP-1600-opt64-config.h"
20 #ifdef __has_feature
21 # if __has_feature(undefined_behavior_sanitizer)
22 #  define ALLOW_MISALIGNED_ACCESS __attribute__((no_sanitize("alignment")))
23 # endif
24 #endif
25 #ifndef ALLOW_MISALIGNED_ACCESS
26 # define ALLOW_MISALIGNED_ACCESS
27 #endif
28 
29 typedef unsigned char UINT8;
30 typedef unsigned long long int UINT64;
31 
32 #if defined(KeccakP1600_useLaneComplementing)
33 #define UseBebigokimisa
34 #endif
35 
36 #if defined(_MSC_VER)
37 #define ROL64(a, offset) _rotl64(a, offset)
38 #elif defined(KeccakP1600_useSHLD)
39     #define ROL64(x,N) ({ \
40     register UINT64 __out; \
41     register UINT64 __in = x; \
42     __asm__ ("shld %2,%0,%0" : "=r"(__out) : "0"(__in), "i"(N)); \
43     __out; \
44     })
45 #else
46 #define ROL64(a, offset) ((((UINT64)a) << offset) ^ (((UINT64)a) >> (64-offset)))
47 #endif
48 
49 #include "KeccakP-1600-64.macros"
50 #ifdef KeccakP1600_fullUnrolling
51 #define FullUnrolling
52 #else
53 #define Unrolling KeccakP1600_unrolling
54 #endif
55 #include "KeccakP-1600-unrolling.macros"
56 #include "SnP-Relaned.h"
57 
58 static const UINT64 KeccakF1600RoundConstants[24] = {
59     0x0000000000000001ULL,
60     0x0000000000008082ULL,
61     0x800000000000808aULL,
62     0x8000000080008000ULL,
63     0x000000000000808bULL,
64     0x0000000080000001ULL,
65     0x8000000080008081ULL,
66     0x8000000000008009ULL,
67     0x000000000000008aULL,
68     0x0000000000000088ULL,
69     0x0000000080008009ULL,
70     0x000000008000000aULL,
71     0x000000008000808bULL,
72     0x800000000000008bULL,
73     0x8000000000008089ULL,
74     0x8000000000008003ULL,
75     0x8000000000008002ULL,
76     0x8000000000000080ULL,
77     0x000000000000800aULL,
78     0x800000008000000aULL,
79     0x8000000080008081ULL,
80     0x8000000000008080ULL,
81     0x0000000080000001ULL,
82     0x8000000080008008ULL };
83 
84 /* ---------------------------------------------------------------- */
85 
KeccakP1600_Initialize(void * state)86 void KeccakP1600_Initialize(void *state)
87 {
88     memset(state, 0, 200);
89 #ifdef KeccakP1600_useLaneComplementing
90     ((UINT64*)state)[ 1] = ~(UINT64)0;
91     ((UINT64*)state)[ 2] = ~(UINT64)0;
92     ((UINT64*)state)[ 8] = ~(UINT64)0;
93     ((UINT64*)state)[12] = ~(UINT64)0;
94     ((UINT64*)state)[17] = ~(UINT64)0;
95     ((UINT64*)state)[20] = ~(UINT64)0;
96 #endif
97 }
98 
99 /* ---------------------------------------------------------------- */
100 
KeccakP1600_AddBytesInLane(void * state,unsigned int lanePosition,const unsigned char * data,unsigned int offset,unsigned int length)101 void KeccakP1600_AddBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length)
102 {
103 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
104     UINT64 lane;
105     if (length == 0)
106         return;
107     if (length == 1)
108         lane = data[0];
109     else {
110         lane = 0;
111         memcpy(&lane, data, length);
112     }
113     lane <<= offset*8;
114 #else
115     UINT64 lane = 0;
116     unsigned int i;
117     for(i=0; i<length; i++)
118         lane |= ((UINT64)data[i]) << ((i+offset)*8);
119 #endif
120     ((UINT64*)state)[lanePosition] ^= lane;
121 }
122 
123 /* ---------------------------------------------------------------- */
124 
125 ALLOW_MISALIGNED_ACCESS
KeccakP1600_AddLanes(void * state,const unsigned char * data,unsigned int laneCount)126 void KeccakP1600_AddLanes(void *state, const unsigned char *data, unsigned int laneCount)
127 {
128 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
129     unsigned int i = 0;
130 #ifdef NO_MISALIGNED_ACCESSES
131     /* If either pointer is misaligned, fall back to byte-wise xor. */
132     if (((((uintptr_t)state) & 7) != 0) || ((((uintptr_t)data) & 7) != 0)) {
133       for (i = 0; i < laneCount * 8; i++) {
134         ((unsigned char*)state)[i] ^= data[i];
135       }
136     }
137     else
138 #endif
139     {
140       /* Otherwise... */
141       for( ; (i+8)<=laneCount; i+=8) {
142           ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0];
143           ((UINT64*)state)[i+1] ^= ((UINT64*)data)[i+1];
144           ((UINT64*)state)[i+2] ^= ((UINT64*)data)[i+2];
145           ((UINT64*)state)[i+3] ^= ((UINT64*)data)[i+3];
146           ((UINT64*)state)[i+4] ^= ((UINT64*)data)[i+4];
147           ((UINT64*)state)[i+5] ^= ((UINT64*)data)[i+5];
148           ((UINT64*)state)[i+6] ^= ((UINT64*)data)[i+6];
149           ((UINT64*)state)[i+7] ^= ((UINT64*)data)[i+7];
150       }
151       for( ; (i+4)<=laneCount; i+=4) {
152           ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0];
153           ((UINT64*)state)[i+1] ^= ((UINT64*)data)[i+1];
154           ((UINT64*)state)[i+2] ^= ((UINT64*)data)[i+2];
155           ((UINT64*)state)[i+3] ^= ((UINT64*)data)[i+3];
156       }
157       for( ; (i+2)<=laneCount; i+=2) {
158           ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0];
159           ((UINT64*)state)[i+1] ^= ((UINT64*)data)[i+1];
160       }
161       if (i<laneCount) {
162           ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0];
163       }
164     }
165 #else
166     unsigned int i;
167     UINT8 *curData = data;
168     for(i=0; i<laneCount; i++, curData+=8) {
169         UINT64 lane = (UINT64)curData[0]
170             | ((UINT64)curData[1] << 8)
171             | ((UINT64)curData[2] << 16)
172             | ((UINT64)curData[3] << 24)
173             | ((UINT64)curData[4] <<32)
174             | ((UINT64)curData[5] << 40)
175             | ((UINT64)curData[6] << 48)
176             | ((UINT64)curData[7] << 56);
177         ((UINT64*)state)[i] ^= lane;
178     }
179 #endif
180 }
181 
182 /* ---------------------------------------------------------------- */
183 
184 #if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN)
KeccakP1600_AddByte(void * state,unsigned char byte,unsigned int offset)185 void KeccakP1600_AddByte(void *state, unsigned char byte, unsigned int offset)
186 {
187     UINT64 lane = byte;
188     lane <<= (offset%8)*8;
189     ((UINT64*)state)[offset/8] ^= lane;
190 }
191 #endif
192 
193 /* ---------------------------------------------------------------- */
194 
KeccakP1600_AddBytes(void * state,const unsigned char * data,unsigned int offset,unsigned int length)195 void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
196 {
197     SnP_AddBytes(state, data, offset, length, KeccakP1600_AddLanes, KeccakP1600_AddBytesInLane, 8);
198 }
199 
200 /* ---------------------------------------------------------------- */
201 
KeccakP1600_OverwriteBytesInLane(void * state,unsigned int lanePosition,const unsigned char * data,unsigned int offset,unsigned int length)202 void KeccakP1600_OverwriteBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length)
203 {
204 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
205 #ifdef KeccakP1600_useLaneComplementing
206     if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) {
207         unsigned int i;
208         for(i=0; i<length; i++)
209             ((unsigned char*)state)[lanePosition*8+offset+i] = ~data[i];
210     }
211     else
212 #endif
213     {
214         memcpy((unsigned char*)state+lanePosition*8+offset, data, length);
215     }
216 #else
217 #error "Not yet implemented"
218 #endif
219 }
220 
221 /* ---------------------------------------------------------------- */
222 
KeccakP1600_OverwriteLanes(void * state,const unsigned char * data,unsigned int laneCount)223 void KeccakP1600_OverwriteLanes(void *state, const unsigned char *data, unsigned int laneCount)
224 {
225 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
226 #ifdef KeccakP1600_useLaneComplementing
227     unsigned int lanePosition;
228 
229     for(lanePosition=0; lanePosition<laneCount; lanePosition++)
230         if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
231             ((UINT64*)state)[lanePosition] = ~((const UINT64*)data)[lanePosition];
232         else
233             ((UINT64*)state)[lanePosition] = ((const UINT64*)data)[lanePosition];
234 #else
235     memcpy(state, data, laneCount*8);
236 #endif
237 #else
238 #error "Not yet implemented"
239 #endif
240 }
241 
242 /* ---------------------------------------------------------------- */
243 
KeccakP1600_OverwriteBytes(void * state,const unsigned char * data,unsigned int offset,unsigned int length)244 void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
245 {
246     SnP_OverwriteBytes(state, data, offset, length, KeccakP1600_OverwriteLanes, KeccakP1600_OverwriteBytesInLane, 8);
247 }
248 
249 /* ---------------------------------------------------------------- */
250 
KeccakP1600_OverwriteWithZeroes(void * state,unsigned int byteCount)251 void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount)
252 {
253 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
254 #ifdef KeccakP1600_useLaneComplementing
255     unsigned int lanePosition;
256 
257     for(lanePosition=0; lanePosition<byteCount/8; lanePosition++)
258         if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
259             ((UINT64*)state)[lanePosition] = ~0;
260         else
261             ((UINT64*)state)[lanePosition] = 0;
262     if (byteCount%8 != 0) {
263         lanePosition = byteCount/8;
264         if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
265             memset((unsigned char*)state+lanePosition*8, 0xFF, byteCount%8);
266         else
267             memset((unsigned char*)state+lanePosition*8, 0, byteCount%8);
268     }
269 #else
270     memset(state, 0, byteCount);
271 #endif
272 #else
273 #error "Not yet implemented"
274 #endif
275 }
276 
277 /* ---------------------------------------------------------------- */
278 
KeccakP1600_Permute_Nrounds(void * state,unsigned int nr)279 void KeccakP1600_Permute_Nrounds(void *state, unsigned int nr)
280 {
281     declareABCDE
282     unsigned int i;
283     UINT64 *stateAsLanes = (UINT64*)state;
284 
285     copyFromState(A, stateAsLanes)
286     roundsN(nr)
287     copyToState(stateAsLanes, A)
288 
289 }
290 
291 /* ---------------------------------------------------------------- */
292 
KeccakP1600_Permute_24rounds(void * state)293 void KeccakP1600_Permute_24rounds(void *state)
294 {
295     declareABCDE
296     #ifndef KeccakP1600_fullUnrolling
297     unsigned int i;
298     #endif
299     UINT64 *stateAsLanes = (UINT64*)state;
300 
301     copyFromState(A, stateAsLanes)
302     rounds24
303     copyToState(stateAsLanes, A)
304 }
305 
306 /* ---------------------------------------------------------------- */
307 
KeccakP1600_Permute_12rounds(void * state)308 void KeccakP1600_Permute_12rounds(void *state)
309 {
310     declareABCDE
311     #ifndef KeccakP1600_fullUnrolling
312     unsigned int i;
313     #endif
314     UINT64 *stateAsLanes = (UINT64*)state;
315 
316     copyFromState(A, stateAsLanes)
317     rounds12
318     copyToState(stateAsLanes, A)
319 }
320 
321 /* ---------------------------------------------------------------- */
322 
KeccakP1600_ExtractBytesInLane(const void * state,unsigned int lanePosition,unsigned char * data,unsigned int offset,unsigned int length)323 void KeccakP1600_ExtractBytesInLane(const void *state, unsigned int lanePosition, unsigned char *data, unsigned int offset, unsigned int length)
324 {
325     UINT64 lane = ((UINT64*)state)[lanePosition];
326 #ifdef KeccakP1600_useLaneComplementing
327     if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
328         lane = ~lane;
329 #endif
330 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
331     {
332         UINT64 lane1[1];
333         lane1[0] = lane;
334         memcpy(data, (UINT8*)lane1+offset, length);
335     }
336 #else
337     unsigned int i;
338     lane >>= offset*8;
339     for(i=0; i<length; i++) {
340         data[i] = lane & 0xFF;
341         lane >>= 8;
342     }
343 #endif
344 }
345 
346 /* ---------------------------------------------------------------- */
347 
348 #if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN)
fromWordToBytes(UINT8 * bytes,const UINT64 word)349 void fromWordToBytes(UINT8 *bytes, const UINT64 word)
350 {
351     unsigned int i;
352 
353     for(i=0; i<(64/8); i++)
354         bytes[i] = (word >> (8*i)) & 0xFF;
355 }
356 #endif
357 
KeccakP1600_ExtractLanes(const void * state,unsigned char * data,unsigned int laneCount)358 void KeccakP1600_ExtractLanes(const void *state, unsigned char *data, unsigned int laneCount)
359 {
360 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
361     memcpy(data, state, laneCount*8);
362 #else
363     unsigned int i;
364 
365     for(i=0; i<laneCount; i++)
366         fromWordToBytes(data+(i*8), ((const UINT64*)state)[i]);
367 #endif
368 #ifdef KeccakP1600_useLaneComplementing
369     if (laneCount > 1) {
370         ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1];
371         if (laneCount > 2) {
372             ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2];
373             if (laneCount > 8) {
374                 ((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8];
375                 if (laneCount > 12) {
376                     ((UINT64*)data)[12] = ~((UINT64*)data)[12];
377                     if (laneCount > 17) {
378                         ((UINT64*)data)[17] = ~((UINT64*)data)[17];
379                         if (laneCount > 20) {
380                             ((UINT64*)data)[20] = ~((UINT64*)data)[20];
381                         }
382                     }
383                 }
384             }
385         }
386     }
387 #endif
388 }
389 
390 /* ---------------------------------------------------------------- */
391 
KeccakP1600_ExtractBytes(const void * state,unsigned char * data,unsigned int offset,unsigned int length)392 void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length)
393 {
394     SnP_ExtractBytes(state, data, offset, length, KeccakP1600_ExtractLanes, KeccakP1600_ExtractBytesInLane, 8);
395 }
396 
397 /* ---------------------------------------------------------------- */
398 
KeccakP1600_ExtractAndAddBytesInLane(const void * state,unsigned int lanePosition,const unsigned char * input,unsigned char * output,unsigned int offset,unsigned int length)399 void KeccakP1600_ExtractAndAddBytesInLane(const void *state, unsigned int lanePosition, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
400 {
401     UINT64 lane = ((UINT64*)state)[lanePosition];
402 #ifdef KeccakP1600_useLaneComplementing
403     if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
404         lane = ~lane;
405 #endif
406 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
407     {
408         unsigned int i;
409         UINT64 lane1[1];
410         lane1[0] = lane;
411         for(i=0; i<length; i++)
412             output[i] = input[i] ^ ((UINT8*)lane1)[offset+i];
413     }
414 #else
415     unsigned int i;
416     lane >>= offset*8;
417     for(i=0; i<length; i++) {
418         output[i] = input[i] ^ (lane & 0xFF);
419         lane >>= 8;
420     }
421 #endif
422 }
423 
424 /* ---------------------------------------------------------------- */
425 
KeccakP1600_ExtractAndAddLanes(const void * state,const unsigned char * input,unsigned char * output,unsigned int laneCount)426 void KeccakP1600_ExtractAndAddLanes(const void *state, const unsigned char *input, unsigned char *output, unsigned int laneCount)
427 {
428     unsigned int i;
429 #if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN)
430     unsigned char temp[8];
431     unsigned int j;
432 #endif
433 
434     for(i=0; i<laneCount; i++) {
435 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
436         ((UINT64*)output)[i] = ((UINT64*)input)[i] ^ ((const UINT64*)state)[i];
437 #else
438         fromWordToBytes(temp, ((const UINT64*)state)[i]);
439         for(j=0; j<8; j++)
440             output[i*8+j] = input[i*8+j] ^ temp[j];
441 #endif
442     }
443 #ifdef KeccakP1600_useLaneComplementing
444     if (laneCount > 1) {
445         ((UINT64*)output)[ 1] = ~((UINT64*)output)[ 1];
446         if (laneCount > 2) {
447             ((UINT64*)output)[ 2] = ~((UINT64*)output)[ 2];
448             if (laneCount > 8) {
449                 ((UINT64*)output)[ 8] = ~((UINT64*)output)[ 8];
450                 if (laneCount > 12) {
451                     ((UINT64*)output)[12] = ~((UINT64*)output)[12];
452                     if (laneCount > 17) {
453                         ((UINT64*)output)[17] = ~((UINT64*)output)[17];
454                         if (laneCount > 20) {
455                             ((UINT64*)output)[20] = ~((UINT64*)output)[20];
456                         }
457                     }
458                 }
459             }
460         }
461     }
462 #endif
463 }
464 
465 /* ---------------------------------------------------------------- */
466 
KeccakP1600_ExtractAndAddBytes(const void * state,const unsigned char * input,unsigned char * output,unsigned int offset,unsigned int length)467 void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
468 {
469     SnP_ExtractAndAddBytes(state, input, output, offset, length, KeccakP1600_ExtractAndAddLanes, KeccakP1600_ExtractAndAddBytesInLane, 8);
470 }
471 
472 /* ---------------------------------------------------------------- */
473 
474 ALLOW_MISALIGNED_ACCESS
KeccakF1600_FastLoop_Absorb(void * state,unsigned int laneCount,const unsigned char * data,size_t dataByteLen)475 size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen)
476 {
477     size_t originalDataByteLen = dataByteLen;
478     declareABCDE
479     #ifndef KeccakP1600_fullUnrolling
480     unsigned int i;
481     #endif
482     UINT64 *stateAsLanes = (UINT64*)state;
483     UINT64 *inDataAsLanes = (UINT64*)data;
484 
485     copyFromState(A, stateAsLanes)
486     while(dataByteLen >= laneCount*8) {
487         addInput(A, inDataAsLanes, laneCount)
488         rounds24
489         inDataAsLanes += laneCount;
490         dataByteLen -= laneCount*8;
491     }
492     copyToState(stateAsLanes, A)
493     return originalDataByteLen - dataByteLen;
494 }
495