1 /*
2 * Copyright 2004-2024 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the Apache License 2.0 (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
8 */
9
10 /*
11 * SHA512 low level APIs are deprecated for public use, but still ok for
12 * internal use.
13 */
14 #include "internal/deprecated.h"
15
16 #include <stdio.h>
17 #include <openssl/opensslconf.h>
18 /*-
19 * IMPLEMENTATION NOTES.
20 *
21 * As you might have noticed, 32-bit hash algorithms:
22 *
23 * - permit SHA_LONG to be wider than 32-bit
24 * - optimized versions implement two transform functions: one operating
25 * on [aligned] data in host byte order, and one operating on data in input
26 * stream byte order;
27 * - share common byte-order neutral collector and padding function
28 * implementations, crypto/md32_common.h;
29 *
30 * Neither of the above applies to this SHA-512 implementation. Reasons
31 * [in reverse order] are:
32 *
33 * - it's the only 64-bit hash algorithm for the moment of this writing,
34 * there is no need for common collector/padding implementation [yet];
35 * - by supporting only one transform function [which operates on
36 * *aligned* data in input stream byte order, big-endian in this case]
37 * we minimize burden of maintenance in two ways: a) collector/padding
38 * function is simpler; b) only one transform function to stare at;
39 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
40 * apply a number of optimizations to mitigate potential performance
41 * penalties caused by previous design decision;
42 *
43 * Caveat lector.
44 *
45 * Implementation relies on the fact that "long long" is 64-bit on
46 * both 32- and 64-bit platforms. If some compiler vendor comes up
47 * with 128-bit long long, adjustment to sha.h would be required.
48 * As this implementation relies on 64-bit integer type, it's totally
49 * inappropriate for platforms which don't support it, most notably
50 * 16-bit platforms.
51 */
52 #include <stdlib.h>
53 #include <string.h>
54
55 #include <openssl/crypto.h>
56 #include <openssl/sha.h>
57 #include <openssl/opensslv.h>
58
59 #include "internal/cryptlib.h"
60 #include "crypto/sha.h"
61
62 #if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
63 defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
64 defined(__s390__) || defined(__s390x__) || \
65 defined(__aarch64__) || \
66 defined(SHA512_ASM)
67 # define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
68 #endif
69
70 #if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
71 # define U64(C) C##UI64
72 #elif defined(__arch64__)
73 # define U64(C) C##UL
74 #else
75 # define U64(C) C##ULL
76 #endif
77
sha512_224_init(SHA512_CTX * c)78 int sha512_224_init(SHA512_CTX *c)
79 {
80 c->h[0] = U64(0x8c3d37c819544da2);
81 c->h[1] = U64(0x73e1996689dcd4d6);
82 c->h[2] = U64(0x1dfab7ae32ff9c82);
83 c->h[3] = U64(0x679dd514582f9fcf);
84 c->h[4] = U64(0x0f6d2b697bd44da8);
85 c->h[5] = U64(0x77e36f7304c48942);
86 c->h[6] = U64(0x3f9d85a86a1d36c8);
87 c->h[7] = U64(0x1112e6ad91d692a1);
88
89 c->Nl = 0;
90 c->Nh = 0;
91 c->num = 0;
92 c->md_len = SHA224_DIGEST_LENGTH;
93 return 1;
94 }
95
sha512_256_init(SHA512_CTX * c)96 int sha512_256_init(SHA512_CTX *c)
97 {
98 c->h[0] = U64(0x22312194fc2bf72c);
99 c->h[1] = U64(0x9f555fa3c84c64c2);
100 c->h[2] = U64(0x2393b86b6f53b151);
101 c->h[3] = U64(0x963877195940eabd);
102 c->h[4] = U64(0x96283ee2a88effe3);
103 c->h[5] = U64(0xbe5e1e2553863992);
104 c->h[6] = U64(0x2b0199fc2c85b8aa);
105 c->h[7] = U64(0x0eb72ddc81c52ca2);
106
107 c->Nl = 0;
108 c->Nh = 0;
109 c->num = 0;
110 c->md_len = SHA256_DIGEST_LENGTH;
111 return 1;
112 }
113
SHA384_Init(SHA512_CTX * c)114 int SHA384_Init(SHA512_CTX *c)
115 {
116 c->h[0] = U64(0xcbbb9d5dc1059ed8);
117 c->h[1] = U64(0x629a292a367cd507);
118 c->h[2] = U64(0x9159015a3070dd17);
119 c->h[3] = U64(0x152fecd8f70e5939);
120 c->h[4] = U64(0x67332667ffc00b31);
121 c->h[5] = U64(0x8eb44a8768581511);
122 c->h[6] = U64(0xdb0c2e0d64f98fa7);
123 c->h[7] = U64(0x47b5481dbefa4fa4);
124
125 c->Nl = 0;
126 c->Nh = 0;
127 c->num = 0;
128 c->md_len = SHA384_DIGEST_LENGTH;
129 return 1;
130 }
131
SHA512_Init(SHA512_CTX * c)132 int SHA512_Init(SHA512_CTX *c)
133 {
134 c->h[0] = U64(0x6a09e667f3bcc908);
135 c->h[1] = U64(0xbb67ae8584caa73b);
136 c->h[2] = U64(0x3c6ef372fe94f82b);
137 c->h[3] = U64(0xa54ff53a5f1d36f1);
138 c->h[4] = U64(0x510e527fade682d1);
139 c->h[5] = U64(0x9b05688c2b3e6c1f);
140 c->h[6] = U64(0x1f83d9abfb41bd6b);
141 c->h[7] = U64(0x5be0cd19137e2179);
142
143 c->Nl = 0;
144 c->Nh = 0;
145 c->num = 0;
146 c->md_len = SHA512_DIGEST_LENGTH;
147 return 1;
148 }
149
150 #ifndef SHA512_ASM
151 static
152 #else
153 # ifdef INCLUDE_C_SHA512
154 void sha512_block_data_order_c(SHA512_CTX *ctx, const void *in, size_t num);
155 # endif
156 #endif
157 void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
158
SHA512_Final(unsigned char * md,SHA512_CTX * c)159 int SHA512_Final(unsigned char *md, SHA512_CTX *c)
160 {
161 unsigned char *p = (unsigned char *)c->u.p;
162 size_t n = c->num;
163
164 p[n] = 0x80; /* There always is a room for one */
165 n++;
166 if (n > (sizeof(c->u) - 16)) {
167 memset(p + n, 0, sizeof(c->u) - n);
168 n = 0;
169 sha512_block_data_order(c, p, 1);
170 }
171
172 memset(p + n, 0, sizeof(c->u) - 16 - n);
173 #ifdef B_ENDIAN
174 c->u.d[SHA_LBLOCK - 2] = c->Nh;
175 c->u.d[SHA_LBLOCK - 1] = c->Nl;
176 #else
177 p[sizeof(c->u) - 1] = (unsigned char)(c->Nl);
178 p[sizeof(c->u) - 2] = (unsigned char)(c->Nl >> 8);
179 p[sizeof(c->u) - 3] = (unsigned char)(c->Nl >> 16);
180 p[sizeof(c->u) - 4] = (unsigned char)(c->Nl >> 24);
181 p[sizeof(c->u) - 5] = (unsigned char)(c->Nl >> 32);
182 p[sizeof(c->u) - 6] = (unsigned char)(c->Nl >> 40);
183 p[sizeof(c->u) - 7] = (unsigned char)(c->Nl >> 48);
184 p[sizeof(c->u) - 8] = (unsigned char)(c->Nl >> 56);
185 p[sizeof(c->u) - 9] = (unsigned char)(c->Nh);
186 p[sizeof(c->u) - 10] = (unsigned char)(c->Nh >> 8);
187 p[sizeof(c->u) - 11] = (unsigned char)(c->Nh >> 16);
188 p[sizeof(c->u) - 12] = (unsigned char)(c->Nh >> 24);
189 p[sizeof(c->u) - 13] = (unsigned char)(c->Nh >> 32);
190 p[sizeof(c->u) - 14] = (unsigned char)(c->Nh >> 40);
191 p[sizeof(c->u) - 15] = (unsigned char)(c->Nh >> 48);
192 p[sizeof(c->u) - 16] = (unsigned char)(c->Nh >> 56);
193 #endif
194
195 sha512_block_data_order(c, p, 1);
196
197 if (md == 0)
198 return 0;
199
200 switch (c->md_len) {
201 /* Let compiler decide if it's appropriate to unroll... */
202 case SHA224_DIGEST_LENGTH:
203 for (n = 0; n < SHA224_DIGEST_LENGTH / 8; n++) {
204 SHA_LONG64 t = c->h[n];
205
206 *(md++) = (unsigned char)(t >> 56);
207 *(md++) = (unsigned char)(t >> 48);
208 *(md++) = (unsigned char)(t >> 40);
209 *(md++) = (unsigned char)(t >> 32);
210 *(md++) = (unsigned char)(t >> 24);
211 *(md++) = (unsigned char)(t >> 16);
212 *(md++) = (unsigned char)(t >> 8);
213 *(md++) = (unsigned char)(t);
214 }
215 /*
216 * For 224 bits, there are four bytes left over that have to be
217 * processed separately.
218 */
219 {
220 SHA_LONG64 t = c->h[SHA224_DIGEST_LENGTH / 8];
221
222 *(md++) = (unsigned char)(t >> 56);
223 *(md++) = (unsigned char)(t >> 48);
224 *(md++) = (unsigned char)(t >> 40);
225 *(md++) = (unsigned char)(t >> 32);
226 }
227 break;
228 case SHA256_DIGEST_LENGTH:
229 for (n = 0; n < SHA256_DIGEST_LENGTH / 8; n++) {
230 SHA_LONG64 t = c->h[n];
231
232 *(md++) = (unsigned char)(t >> 56);
233 *(md++) = (unsigned char)(t >> 48);
234 *(md++) = (unsigned char)(t >> 40);
235 *(md++) = (unsigned char)(t >> 32);
236 *(md++) = (unsigned char)(t >> 24);
237 *(md++) = (unsigned char)(t >> 16);
238 *(md++) = (unsigned char)(t >> 8);
239 *(md++) = (unsigned char)(t);
240 }
241 break;
242 case SHA384_DIGEST_LENGTH:
243 for (n = 0; n < SHA384_DIGEST_LENGTH / 8; n++) {
244 SHA_LONG64 t = c->h[n];
245
246 *(md++) = (unsigned char)(t >> 56);
247 *(md++) = (unsigned char)(t >> 48);
248 *(md++) = (unsigned char)(t >> 40);
249 *(md++) = (unsigned char)(t >> 32);
250 *(md++) = (unsigned char)(t >> 24);
251 *(md++) = (unsigned char)(t >> 16);
252 *(md++) = (unsigned char)(t >> 8);
253 *(md++) = (unsigned char)(t);
254 }
255 break;
256 case SHA512_DIGEST_LENGTH:
257 for (n = 0; n < SHA512_DIGEST_LENGTH / 8; n++) {
258 SHA_LONG64 t = c->h[n];
259
260 *(md++) = (unsigned char)(t >> 56);
261 *(md++) = (unsigned char)(t >> 48);
262 *(md++) = (unsigned char)(t >> 40);
263 *(md++) = (unsigned char)(t >> 32);
264 *(md++) = (unsigned char)(t >> 24);
265 *(md++) = (unsigned char)(t >> 16);
266 *(md++) = (unsigned char)(t >> 8);
267 *(md++) = (unsigned char)(t);
268 }
269 break;
270 /* ... as well as make sure md_len is not abused. */
271 default:
272 return 0;
273 }
274
275 return 1;
276 }
277
SHA384_Final(unsigned char * md,SHA512_CTX * c)278 int SHA384_Final(unsigned char *md, SHA512_CTX *c)
279 {
280 return SHA512_Final(md, c);
281 }
282
SHA512_Update(SHA512_CTX * c,const void * _data,size_t len)283 int SHA512_Update(SHA512_CTX *c, const void *_data, size_t len)
284 {
285 SHA_LONG64 l;
286 unsigned char *p = c->u.p;
287 const unsigned char *data = (const unsigned char *)_data;
288
289 if (len == 0)
290 return 1;
291
292 l = (c->Nl + (((SHA_LONG64) len) << 3)) & U64(0xffffffffffffffff);
293 if (l < c->Nl)
294 c->Nh++;
295 if (sizeof(len) >= 8)
296 c->Nh += (((SHA_LONG64) len) >> 61);
297 c->Nl = l;
298
299 if (c->num != 0) {
300 size_t n = sizeof(c->u) - c->num;
301
302 if (len < n) {
303 memcpy(p + c->num, data, len), c->num += (unsigned int)len;
304 return 1;
305 } else {
306 memcpy(p + c->num, data, n), c->num = 0;
307 len -= n, data += n;
308 sha512_block_data_order(c, p, 1);
309 }
310 }
311
312 if (len >= sizeof(c->u)) {
313 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
314 if ((size_t)data % sizeof(c->u.d[0]) != 0)
315 while (len >= sizeof(c->u))
316 memcpy(p, data, sizeof(c->u)),
317 sha512_block_data_order(c, p, 1),
318 len -= sizeof(c->u), data += sizeof(c->u);
319 else
320 #endif
321 sha512_block_data_order(c, data, len / sizeof(c->u)),
322 data += len, len %= sizeof(c->u), data -= len;
323 }
324
325 if (len != 0)
326 memcpy(p, data, len), c->num = (int)len;
327
328 return 1;
329 }
330
SHA384_Update(SHA512_CTX * c,const void * data,size_t len)331 int SHA384_Update(SHA512_CTX *c, const void *data, size_t len)
332 {
333 return SHA512_Update(c, data, len);
334 }
335
SHA512_Transform(SHA512_CTX * c,const unsigned char * data)336 void SHA512_Transform(SHA512_CTX *c, const unsigned char *data)
337 {
338 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
339 if ((size_t)data % sizeof(c->u.d[0]) != 0)
340 memcpy(c->u.p, data, sizeof(c->u.p)), data = c->u.p;
341 #endif
342 sha512_block_data_order(c, data, 1);
343 }
344
345 #if !defined(SHA512_ASM) || defined(INCLUDE_C_SHA512)
346 static const SHA_LONG64 K512[80] = {
347 U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
348 U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
349 U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
350 U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
351 U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
352 U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
353 U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
354 U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
355 U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
356 U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
357 U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
358 U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
359 U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
360 U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
361 U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
362 U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
363 U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
364 U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
365 U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
366 U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
367 U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
368 U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
369 U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
370 U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
371 U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
372 U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
373 U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
374 U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
375 U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
376 U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
377 U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
378 U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
379 U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
380 U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
381 U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
382 U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
383 U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
384 U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
385 U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
386 U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817)
387 };
388
389 # ifndef PEDANTIC
390 # if defined(__GNUC__) && __GNUC__>=2 && \
391 !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
392 # if defined(__x86_64) || defined(__x86_64__)
393 # define ROTR(a,n) ({ SHA_LONG64 ret; \
394 asm ("rorq %1,%0" \
395 : "=r"(ret) \
396 : "J"(n),"0"(a) \
397 : "cc"); ret; })
398 # if !defined(B_ENDIAN)
399 # define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x))); \
400 asm ("bswapq %0" \
401 : "=r"(ret) \
402 : "0"(ret)); ret; })
403 # endif
404 # elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
405 # if defined(I386_ONLY)
406 # define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
407 unsigned int hi=p[0],lo=p[1]; \
408 asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
409 "roll $16,%%eax; roll $16,%%edx; "\
410 "xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
411 : "=a"(lo),"=d"(hi) \
412 : "0"(lo),"1"(hi) : "cc"); \
413 ((SHA_LONG64)hi)<<32|lo; })
414 # else
415 # define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
416 unsigned int hi=p[0],lo=p[1]; \
417 asm ("bswapl %0; bswapl %1;" \
418 : "=r"(lo),"=r"(hi) \
419 : "0"(lo),"1"(hi)); \
420 ((SHA_LONG64)hi)<<32|lo; })
421 # endif
422 # elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
423 # define ROTR(a,n) ({ SHA_LONG64 ret; \
424 asm ("rotrdi %0,%1,%2" \
425 : "=r"(ret) \
426 : "r"(a),"K"(n)); ret; })
427 # elif defined(__aarch64__)
428 # define ROTR(a,n) ({ SHA_LONG64 ret; \
429 asm ("ror %0,%1,%2" \
430 : "=r"(ret) \
431 : "r"(a),"I"(n)); ret; })
432 # if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
433 __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__
434 # define PULL64(x) ({ SHA_LONG64 ret; \
435 asm ("rev %0,%1" \
436 : "=r"(ret) \
437 : "r"(*((const SHA_LONG64 *)(&(x))))); ret; })
438 # endif
439 # elif (defined(__riscv_zbkb) || defined(__riscv_zbb)) && __riscv_xlen == 32
440 # define PULL64(x) ({ SHA_LONG64 ret; \
441 unsigned int *r = (unsigned int *)(&(ret)); \
442 const unsigned int *p = (const unsigned int *)(&(x)); \
443 asm ("rev8 %0, %1" \
444 : "=r"(r[0]) \
445 : "r" (p[1])); \
446 asm ("rev8 %0, %1" \
447 : "=r"(r[1]) \
448 : "r" (p[0])); ret; })
449 # elif (defined(__riscv_zbkb) || defined(__riscv_zbb)) && __riscv_xlen == 64
450 # define PULL64(x) ({ SHA_LONG64 ret; \
451 asm ("rev8 %0, %1" \
452 : "=r"(ret) \
453 : "r"(x)); ret; })
454 # endif
455 # if defined(__riscv_zknh) && __riscv_xlen == 32
456 # define Sigma0(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret)); \
457 const unsigned int *p = (const unsigned int *)(&(x)); \
458 asm ("sha512sum0r %0, %1, %2" \
459 : "=r"(r[0]) \
460 : "r" (p[0]), "r" (p[1])); \
461 asm ("sha512sum0r %0, %2, %1" \
462 : "=r"(r[1]) \
463 : "r" (p[0]), "r" (p[1])); ret; })
464 # define Sigma1(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret)); \
465 const unsigned int *p = (const unsigned int *)(&(x)); \
466 asm ("sha512sum1r %0, %1, %2" \
467 : "=r"(r[0]) \
468 : "r" (p[0]), "r" (p[1])); \
469 asm ("sha512sum1r %0, %2, %1" \
470 : "=r"(r[1]) \
471 : "r" (p[0]), "r" (p[1])); ret; })
472 # define sigma0(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret)); \
473 const unsigned int *p = (const unsigned int *)(&(x)); \
474 asm ("sha512sig0l %0, %1, %2" \
475 : "=r"(r[0]) \
476 : "r" (p[0]), "r" (p[1])); \
477 asm ("sha512sig0h %0, %2, %1" \
478 : "=r"(r[1]) \
479 : "r" (p[0]), "r" (p[1])); ret; })
480 # define sigma1(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret)); \
481 const unsigned int *p = (const unsigned int *)(&(x)); \
482 asm ("sha512sig1l %0, %1, %2" \
483 : "=r"(r[0]) \
484 : "r" (p[0]), "r" (p[1])); \
485 asm ("sha512sig1h %0, %2, %1" \
486 : "=r"(r[1]) \
487 : "r" (p[0]), "r" (p[1])); ret; })
488 # elif defined(__riscv_zknh) && __riscv_xlen == 64
489 # define Sigma0(x) ({ SHA_LONG64 ret; \
490 asm ("sha512sum0 %0, %1" \
491 : "=r"(ret) \
492 : "r"(x)); ret; })
493 # define Sigma1(x) ({ SHA_LONG64 ret; \
494 asm ("sha512sum1 %0, %1" \
495 : "=r"(ret) \
496 : "r"(x)); ret; })
497 # define sigma0(x) ({ SHA_LONG64 ret; \
498 asm ("sha512sig0 %0, %1" \
499 : "=r"(ret) \
500 : "r"(x)); ret; })
501 # define sigma1(x) ({ SHA_LONG64 ret; \
502 asm ("sha512sig1 %0, %1" \
503 : "=r"(ret) \
504 : "r"(x)); ret; })
505 # endif
506 # if (defined(__riscv_zbt) || defined(__riscv_zpn)) && __riscv_xlen == 32
507 # define Ch(x,y,z) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret)); \
508 const unsigned int *xp = (const unsigned int *)(&(x)); \
509 const unsigned int *yp = (const unsigned int *)(&(y)); \
510 const unsigned int *zp = (const unsigned int *)(&(z)); \
511 asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t" \
512 : "=r"(r[0]) \
513 : "r"(xp[0]), "r"(yp[0]), "r"(zp[0])); \
514 asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t" \
515 : "=r"(r[1]) \
516 : "r"(xp[1]), "r"(yp[1]), "r"(zp[1])); ret; })
517 # define Maj(x,y,z) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret)); \
518 const unsigned int *xp = (const unsigned int *)(&(x)); \
519 const unsigned int *yp = (const unsigned int *)(&(y)); \
520 const unsigned int *zp = (const unsigned int *)(&(z)); \
521 asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t" \
522 : "=r"(r[0]) \
523 : "r"(xp[0]^zp[0]), "r"(yp[0]), "r"(zp[0])); \
524 asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t" \
525 : "=r"(r[1]) \
526 : "r"(xp[1]^zp[1]), "r"(yp[1]), "r"(zp[1])); ret; })
527 # elif (defined(__riscv_zbt) || defined(__riscv_zpn)) && __riscv_xlen == 64
528 # define Ch(x,y,z) ({ SHA_LONG64 ret; \
529 asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3"\
530 : "=r"(ret) \
531 : "r"(x), "r"(y), "r"(z)); ret; })
532 # define Maj(x,y,z) ({ SHA_LONG64 ret; \
533 asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3"\
534 : "=r"(ret) \
535 : "r"(x^z), "r"(y), "r"(x)); ret; })
536 # endif
537 # elif defined(_MSC_VER)
538 # if defined(_WIN64) /* applies to both IA-64 and AMD64 */
539 # pragma intrinsic(_rotr64)
540 # define ROTR(a,n) _rotr64((a),n)
541 # endif
542 # if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && \
543 !defined(OPENSSL_NO_INLINE_ASM)
544 # if defined(I386_ONLY)
__pull64be(const void * x)545 static SHA_LONG64 __fastcall __pull64be(const void *x)
546 {
547 _asm mov edx,[ecx + 0]
548 _asm mov eax,[ecx + 4]
549 _asm xchg dh, dl
550 _asm xchg ah, al
551 _asm rol edx, 16
552 _asm rol eax, 16
553 _asm xchg dh, dl
554 _asm xchg ah, al
555 }
556 # else
__pull64be(const void * x)557 static SHA_LONG64 __fastcall __pull64be(const void *x)
558 {
559 _asm mov edx,[ecx + 0]
560 _asm mov eax,[ecx + 4]
561 _asm bswap edx
562 _asm bswap eax
563 }
564 # endif
565 # define PULL64(x) __pull64be(&(x))
566 # endif
567 # endif
568 # endif
569 # ifndef PULL64
570 # define B(x,j) (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
571 # define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
572 # endif
573 # ifndef ROTR
574 # define ROTR(x,s) (((x)>>s) | (x)<<(64-s))
575 # endif
576 # ifndef Sigma0
577 # define Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
578 # endif
579 # ifndef Sigma1
580 # define Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
581 # endif
582 # ifndef sigma0
583 # define sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
584 # endif
585 # ifndef sigma1
586 # define sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
587 # endif
588 # ifndef Ch
589 # define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z)))
590 # endif
591 # ifndef Maj
592 # define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
593 # endif
594
595 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
596 /*
597 * This code should give better results on 32-bit CPU with less than
598 * ~24 registers, both size and performance wise...
599 */
600
sha512_block_data_order(SHA512_CTX * ctx,const void * in,size_t num)601 static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
602 size_t num)
603 {
604 const SHA_LONG64 *W = in;
605 SHA_LONG64 A, E, T;
606 SHA_LONG64 X[9 + 80], *F;
607 int i;
608
609 while (num--) {
610
611 F = X + 80;
612 A = ctx->h[0];
613 F[1] = ctx->h[1];
614 F[2] = ctx->h[2];
615 F[3] = ctx->h[3];
616 E = ctx->h[4];
617 F[5] = ctx->h[5];
618 F[6] = ctx->h[6];
619 F[7] = ctx->h[7];
620
621 for (i = 0; i < 16; i++, F--) {
622 # ifdef B_ENDIAN
623 T = W[i];
624 # else
625 T = PULL64(W[i]);
626 # endif
627 F[0] = A;
628 F[4] = E;
629 F[8] = T;
630 T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
631 E = F[3] + T;
632 A = T + Sigma0(A) + Maj(A, F[1], F[2]);
633 }
634
635 for (; i < 80; i++, F--) {
636 T = sigma0(F[8 + 16 - 1]);
637 T += sigma1(F[8 + 16 - 14]);
638 T += F[8 + 16] + F[8 + 16 - 9];
639
640 F[0] = A;
641 F[4] = E;
642 F[8] = T;
643 T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
644 E = F[3] + T;
645 A = T + Sigma0(A) + Maj(A, F[1], F[2]);
646 }
647
648 ctx->h[0] += A;
649 ctx->h[1] += F[1];
650 ctx->h[2] += F[2];
651 ctx->h[3] += F[3];
652 ctx->h[4] += E;
653 ctx->h[5] += F[5];
654 ctx->h[6] += F[6];
655 ctx->h[7] += F[7];
656
657 W += SHA_LBLOCK;
658 }
659 }
660
661 # elif defined(OPENSSL_SMALL_FOOTPRINT)
662
sha512_block_data_order(SHA512_CTX * ctx,const void * in,size_t num)663 static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
664 size_t num)
665 {
666 const SHA_LONG64 *W = in;
667 SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1, T2;
668 SHA_LONG64 X[16];
669 int i;
670
671 while (num--) {
672
673 a = ctx->h[0];
674 b = ctx->h[1];
675 c = ctx->h[2];
676 d = ctx->h[3];
677 e = ctx->h[4];
678 f = ctx->h[5];
679 g = ctx->h[6];
680 h = ctx->h[7];
681
682 for (i = 0; i < 16; i++) {
683 # ifdef B_ENDIAN
684 T1 = X[i] = W[i];
685 # else
686 T1 = X[i] = PULL64(W[i]);
687 # endif
688 T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
689 T2 = Sigma0(a) + Maj(a, b, c);
690 h = g;
691 g = f;
692 f = e;
693 e = d + T1;
694 d = c;
695 c = b;
696 b = a;
697 a = T1 + T2;
698 }
699
700 for (; i < 80; i++) {
701 s0 = X[(i + 1) & 0x0f];
702 s0 = sigma0(s0);
703 s1 = X[(i + 14) & 0x0f];
704 s1 = sigma1(s1);
705
706 T1 = X[i & 0xf] += s0 + s1 + X[(i + 9) & 0xf];
707 T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
708 T2 = Sigma0(a) + Maj(a, b, c);
709 h = g;
710 g = f;
711 f = e;
712 e = d + T1;
713 d = c;
714 c = b;
715 b = a;
716 a = T1 + T2;
717 }
718
719 ctx->h[0] += a;
720 ctx->h[1] += b;
721 ctx->h[2] += c;
722 ctx->h[3] += d;
723 ctx->h[4] += e;
724 ctx->h[5] += f;
725 ctx->h[6] += g;
726 ctx->h[7] += h;
727
728 W += SHA_LBLOCK;
729 }
730 }
731
732 # else
733 # define ROUND_00_15(i,a,b,c,d,e,f,g,h) do { \
734 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i]; \
735 h = Sigma0(a) + Maj(a,b,c); \
736 d += T1; h += T1; } while (0)
737
738 # define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X) do { \
739 s0 = X[(j+1)&0x0f]; s0 = sigma0(s0); \
740 s1 = X[(j+14)&0x0f]; s1 = sigma1(s1); \
741 T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f]; \
742 ROUND_00_15(i+j,a,b,c,d,e,f,g,h); } while (0)
743
744 #ifdef INCLUDE_C_SHA512
sha512_block_data_order_c(SHA512_CTX * ctx,const void * in,size_t num)745 void sha512_block_data_order_c(SHA512_CTX *ctx, const void *in, size_t num)
746 #else
747 static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
748 size_t num)
749 #endif
750 {
751 const SHA_LONG64 *W = in;
752 SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1;
753 SHA_LONG64 X[16];
754 int i;
755
756 while (num--) {
757
758 a = ctx->h[0];
759 b = ctx->h[1];
760 c = ctx->h[2];
761 d = ctx->h[3];
762 e = ctx->h[4];
763 f = ctx->h[5];
764 g = ctx->h[6];
765 h = ctx->h[7];
766
767 # ifdef B_ENDIAN
768 T1 = X[0] = W[0];
769 ROUND_00_15(0, a, b, c, d, e, f, g, h);
770 T1 = X[1] = W[1];
771 ROUND_00_15(1, h, a, b, c, d, e, f, g);
772 T1 = X[2] = W[2];
773 ROUND_00_15(2, g, h, a, b, c, d, e, f);
774 T1 = X[3] = W[3];
775 ROUND_00_15(3, f, g, h, a, b, c, d, e);
776 T1 = X[4] = W[4];
777 ROUND_00_15(4, e, f, g, h, a, b, c, d);
778 T1 = X[5] = W[5];
779 ROUND_00_15(5, d, e, f, g, h, a, b, c);
780 T1 = X[6] = W[6];
781 ROUND_00_15(6, c, d, e, f, g, h, a, b);
782 T1 = X[7] = W[7];
783 ROUND_00_15(7, b, c, d, e, f, g, h, a);
784 T1 = X[8] = W[8];
785 ROUND_00_15(8, a, b, c, d, e, f, g, h);
786 T1 = X[9] = W[9];
787 ROUND_00_15(9, h, a, b, c, d, e, f, g);
788 T1 = X[10] = W[10];
789 ROUND_00_15(10, g, h, a, b, c, d, e, f);
790 T1 = X[11] = W[11];
791 ROUND_00_15(11, f, g, h, a, b, c, d, e);
792 T1 = X[12] = W[12];
793 ROUND_00_15(12, e, f, g, h, a, b, c, d);
794 T1 = X[13] = W[13];
795 ROUND_00_15(13, d, e, f, g, h, a, b, c);
796 T1 = X[14] = W[14];
797 ROUND_00_15(14, c, d, e, f, g, h, a, b);
798 T1 = X[15] = W[15];
799 ROUND_00_15(15, b, c, d, e, f, g, h, a);
800 # else
801 T1 = X[0] = PULL64(W[0]);
802 ROUND_00_15(0, a, b, c, d, e, f, g, h);
803 T1 = X[1] = PULL64(W[1]);
804 ROUND_00_15(1, h, a, b, c, d, e, f, g);
805 T1 = X[2] = PULL64(W[2]);
806 ROUND_00_15(2, g, h, a, b, c, d, e, f);
807 T1 = X[3] = PULL64(W[3]);
808 ROUND_00_15(3, f, g, h, a, b, c, d, e);
809 T1 = X[4] = PULL64(W[4]);
810 ROUND_00_15(4, e, f, g, h, a, b, c, d);
811 T1 = X[5] = PULL64(W[5]);
812 ROUND_00_15(5, d, e, f, g, h, a, b, c);
813 T1 = X[6] = PULL64(W[6]);
814 ROUND_00_15(6, c, d, e, f, g, h, a, b);
815 T1 = X[7] = PULL64(W[7]);
816 ROUND_00_15(7, b, c, d, e, f, g, h, a);
817 T1 = X[8] = PULL64(W[8]);
818 ROUND_00_15(8, a, b, c, d, e, f, g, h);
819 T1 = X[9] = PULL64(W[9]);
820 ROUND_00_15(9, h, a, b, c, d, e, f, g);
821 T1 = X[10] = PULL64(W[10]);
822 ROUND_00_15(10, g, h, a, b, c, d, e, f);
823 T1 = X[11] = PULL64(W[11]);
824 ROUND_00_15(11, f, g, h, a, b, c, d, e);
825 T1 = X[12] = PULL64(W[12]);
826 ROUND_00_15(12, e, f, g, h, a, b, c, d);
827 T1 = X[13] = PULL64(W[13]);
828 ROUND_00_15(13, d, e, f, g, h, a, b, c);
829 T1 = X[14] = PULL64(W[14]);
830 ROUND_00_15(14, c, d, e, f, g, h, a, b);
831 T1 = X[15] = PULL64(W[15]);
832 ROUND_00_15(15, b, c, d, e, f, g, h, a);
833 # endif
834
835 for (i = 16; i < 80; i += 16) {
836 ROUND_16_80(i, 0, a, b, c, d, e, f, g, h, X);
837 ROUND_16_80(i, 1, h, a, b, c, d, e, f, g, X);
838 ROUND_16_80(i, 2, g, h, a, b, c, d, e, f, X);
839 ROUND_16_80(i, 3, f, g, h, a, b, c, d, e, X);
840 ROUND_16_80(i, 4, e, f, g, h, a, b, c, d, X);
841 ROUND_16_80(i, 5, d, e, f, g, h, a, b, c, X);
842 ROUND_16_80(i, 6, c, d, e, f, g, h, a, b, X);
843 ROUND_16_80(i, 7, b, c, d, e, f, g, h, a, X);
844 ROUND_16_80(i, 8, a, b, c, d, e, f, g, h, X);
845 ROUND_16_80(i, 9, h, a, b, c, d, e, f, g, X);
846 ROUND_16_80(i, 10, g, h, a, b, c, d, e, f, X);
847 ROUND_16_80(i, 11, f, g, h, a, b, c, d, e, X);
848 ROUND_16_80(i, 12, e, f, g, h, a, b, c, d, X);
849 ROUND_16_80(i, 13, d, e, f, g, h, a, b, c, X);
850 ROUND_16_80(i, 14, c, d, e, f, g, h, a, b, X);
851 ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X);
852 }
853
854 ctx->h[0] += a;
855 ctx->h[1] += b;
856 ctx->h[2] += c;
857 ctx->h[3] += d;
858 ctx->h[4] += e;
859 ctx->h[5] += f;
860 ctx->h[6] += g;
861 ctx->h[7] += h;
862
863 W += SHA_LBLOCK;
864 }
865 }
866
867 # endif
868
869 #endif /* SHA512_ASM */
870