1 /*
2 * Copyright 2010-2024 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the Apache License 2.0 (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
8 */
9
10 #include <string.h>
11 #include <openssl/crypto.h>
12 #include "internal/cryptlib.h"
13 #include "internal/endian.h"
14 #include "crypto/modes.h"
15
16 #if defined(__GNUC__) && !defined(STRICT_ALIGNMENT)
17 typedef size_t size_t_aX __attribute((__aligned__(1)));
18 #else
19 typedef size_t size_t_aX;
20 #endif
21
22 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
23 /* redefine, because alignment is ensured */
24 # undef GETU32
25 # define GETU32(p) BSWAP4(*(const u32 *)(p))
26 # undef PUTU32
27 # define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
28 #endif
29
30 /* RISC-V uses C implementation as a fallback. */
31 #if defined(__riscv)
32 # define INCLUDE_C_GMULT_4BIT
33 # define INCLUDE_C_GHASH_4BIT
34 #endif
35
36 #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
37 #define REDUCE1BIT(V) do { \
38 if (sizeof(size_t)==8) { \
39 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
40 V.lo = (V.hi<<63)|(V.lo>>1); \
41 V.hi = (V.hi>>1 )^T; \
42 } \
43 else { \
44 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
45 V.lo = (V.hi<<63)|(V.lo>>1); \
46 V.hi = (V.hi>>1 )^((u64)T<<32); \
47 } \
48 } while(0)
49
50 /*-
51 *
52 * NOTE: TABLE_BITS and all non-4bit implementations have been removed in 3.1.
53 *
54 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
55 * never be set to 8. 8 is effectively reserved for testing purposes.
56 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
57 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
58 * whole spectrum of possible table driven implementations. Why? In
59 * non-"Shoup's" case memory access pattern is segmented in such manner,
60 * that it's trivial to see that cache timing information can reveal
61 * fair portion of intermediate hash value. Given that ciphertext is
62 * always available to attacker, it's possible for him to attempt to
63 * deduce secret parameter H and if successful, tamper with messages
64 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
65 * not as trivial, but there is no reason to believe that it's resistant
66 * to cache-timing attack. And the thing about "8-bit" implementation is
67 * that it consumes 16 (sixteen) times more memory, 4KB per individual
68 * key + 1KB shared. Well, on pros side it should be twice as fast as
69 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
70 * was observed to run ~75% faster, closer to 100% for commercial
71 * compilers... Yet "4-bit" procedure is preferred, because it's
72 * believed to provide better security-performance balance and adequate
73 * all-round performance. "All-round" refers to things like:
74 *
75 * - shorter setup time effectively improves overall timing for
76 * handling short messages;
77 * - larger table allocation can become unbearable because of VM
78 * subsystem penalties (for example on Windows large enough free
79 * results in VM working set trimming, meaning that consequent
80 * malloc would immediately incur working set expansion);
81 * - larger table has larger cache footprint, which can affect
82 * performance of other code paths (not necessarily even from same
83 * thread in Hyper-Threading world);
84 *
85 * Value of 1 is not appropriate for performance reasons.
86 */
87
gcm_init_4bit(u128 Htable[16],const u64 H[2])88 static void gcm_init_4bit(u128 Htable[16], const u64 H[2])
89 {
90 u128 V;
91 # if defined(OPENSSL_SMALL_FOOTPRINT)
92 int i;
93 # endif
94
95 Htable[0].hi = 0;
96 Htable[0].lo = 0;
97 V.hi = H[0];
98 V.lo = H[1];
99
100 # if defined(OPENSSL_SMALL_FOOTPRINT)
101 for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
102 REDUCE1BIT(V);
103 Htable[i] = V;
104 }
105
106 for (i = 2; i < 16; i <<= 1) {
107 u128 *Hi = Htable + i;
108 int j;
109 for (V = *Hi, j = 1; j < i; ++j) {
110 Hi[j].hi = V.hi ^ Htable[j].hi;
111 Hi[j].lo = V.lo ^ Htable[j].lo;
112 }
113 }
114 # else
115 Htable[8] = V;
116 REDUCE1BIT(V);
117 Htable[4] = V;
118 REDUCE1BIT(V);
119 Htable[2] = V;
120 REDUCE1BIT(V);
121 Htable[1] = V;
122 Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
123 V = Htable[4];
124 Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
125 Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
126 Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
127 V = Htable[8];
128 Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
129 Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
130 Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
131 Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
132 Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
133 Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
134 Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
135 # endif
136 # if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
137 /*
138 * ARM assembler expects specific dword order in Htable.
139 */
140 {
141 int j;
142 DECLARE_IS_ENDIAN;
143
144 if (IS_LITTLE_ENDIAN)
145 for (j = 0; j < 16; ++j) {
146 V = Htable[j];
147 Htable[j].hi = V.lo;
148 Htable[j].lo = V.hi;
149 } else
150 for (j = 0; j < 16; ++j) {
151 V = Htable[j];
152 Htable[j].hi = V.lo << 32 | V.lo >> 32;
153 Htable[j].lo = V.hi << 32 | V.hi >> 32;
154 }
155 }
156 # endif
157 }
158
159 # if !defined(GHASH_ASM) || defined(INCLUDE_C_GMULT_4BIT)
160 static const size_t rem_4bit[16] = {
161 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
162 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
163 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
164 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
165 };
166
gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16])167 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
168 {
169 u128 Z;
170 int cnt = 15;
171 size_t rem, nlo, nhi;
172 DECLARE_IS_ENDIAN;
173
174 nlo = ((const u8 *)Xi)[15];
175 nhi = nlo >> 4;
176 nlo &= 0xf;
177
178 Z.hi = Htable[nlo].hi;
179 Z.lo = Htable[nlo].lo;
180
181 while (1) {
182 rem = (size_t)Z.lo & 0xf;
183 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
184 Z.hi = (Z.hi >> 4);
185 if (sizeof(size_t) == 8)
186 Z.hi ^= rem_4bit[rem];
187 else
188 Z.hi ^= (u64)rem_4bit[rem] << 32;
189
190 Z.hi ^= Htable[nhi].hi;
191 Z.lo ^= Htable[nhi].lo;
192
193 if (--cnt < 0)
194 break;
195
196 nlo = ((const u8 *)Xi)[cnt];
197 nhi = nlo >> 4;
198 nlo &= 0xf;
199
200 rem = (size_t)Z.lo & 0xf;
201 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
202 Z.hi = (Z.hi >> 4);
203 if (sizeof(size_t) == 8)
204 Z.hi ^= rem_4bit[rem];
205 else
206 Z.hi ^= (u64)rem_4bit[rem] << 32;
207
208 Z.hi ^= Htable[nlo].hi;
209 Z.lo ^= Htable[nlo].lo;
210 }
211
212 if (IS_LITTLE_ENDIAN) {
213 # ifdef BSWAP8
214 Xi[0] = BSWAP8(Z.hi);
215 Xi[1] = BSWAP8(Z.lo);
216 # else
217 u8 *p = (u8 *)Xi;
218 u32 v;
219 v = (u32)(Z.hi >> 32);
220 PUTU32(p, v);
221 v = (u32)(Z.hi);
222 PUTU32(p + 4, v);
223 v = (u32)(Z.lo >> 32);
224 PUTU32(p + 8, v);
225 v = (u32)(Z.lo);
226 PUTU32(p + 12, v);
227 # endif
228 } else {
229 Xi[0] = Z.hi;
230 Xi[1] = Z.lo;
231 }
232 }
233
234 # endif
235
236 # if !defined(GHASH_ASM) || defined(INCLUDE_C_GHASH_4BIT)
237 # if !defined(OPENSSL_SMALL_FOOTPRINT)
238 /*
239 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
240 * details... Compiler-generated code doesn't seem to give any
241 * performance improvement, at least not on x86[_64]. It's here
242 * mostly as reference and a placeholder for possible future
243 * non-trivial optimization[s]...
244 */
gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 * inp,size_t len)245 static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
246 const u8 *inp, size_t len)
247 {
248 u128 Z;
249 int cnt;
250 size_t rem, nlo, nhi;
251 DECLARE_IS_ENDIAN;
252
253 do {
254 cnt = 15;
255 nlo = ((const u8 *)Xi)[15];
256 nlo ^= inp[15];
257 nhi = nlo >> 4;
258 nlo &= 0xf;
259
260 Z.hi = Htable[nlo].hi;
261 Z.lo = Htable[nlo].lo;
262
263 while (1) {
264 rem = (size_t)Z.lo & 0xf;
265 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
266 Z.hi = (Z.hi >> 4);
267 if (sizeof(size_t) == 8)
268 Z.hi ^= rem_4bit[rem];
269 else
270 Z.hi ^= (u64)rem_4bit[rem] << 32;
271
272 Z.hi ^= Htable[nhi].hi;
273 Z.lo ^= Htable[nhi].lo;
274
275 if (--cnt < 0)
276 break;
277
278 nlo = ((const u8 *)Xi)[cnt];
279 nlo ^= inp[cnt];
280 nhi = nlo >> 4;
281 nlo &= 0xf;
282
283 rem = (size_t)Z.lo & 0xf;
284 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
285 Z.hi = (Z.hi >> 4);
286 if (sizeof(size_t) == 8)
287 Z.hi ^= rem_4bit[rem];
288 else
289 Z.hi ^= (u64)rem_4bit[rem] << 32;
290
291 Z.hi ^= Htable[nlo].hi;
292 Z.lo ^= Htable[nlo].lo;
293 }
294
295 if (IS_LITTLE_ENDIAN) {
296 # ifdef BSWAP8
297 Xi[0] = BSWAP8(Z.hi);
298 Xi[1] = BSWAP8(Z.lo);
299 # else
300 u8 *p = (u8 *)Xi;
301 u32 v;
302 v = (u32)(Z.hi >> 32);
303 PUTU32(p, v);
304 v = (u32)(Z.hi);
305 PUTU32(p + 4, v);
306 v = (u32)(Z.lo >> 32);
307 PUTU32(p + 8, v);
308 v = (u32)(Z.lo);
309 PUTU32(p + 12, v);
310 # endif
311 } else {
312 Xi[0] = Z.hi;
313 Xi[1] = Z.lo;
314 }
315
316 inp += 16;
317 /* Block size is 128 bits so len is a multiple of 16 */
318 len -= 16;
319 } while (len > 0);
320 }
321 # endif
322 # else
323 void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
324 void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
325 size_t len);
326 # endif
327
328 # define GCM_MUL(ctx) ctx->funcs.gmult(ctx->Xi.u,ctx->Htable)
329 # if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
330 # define GHASH(ctx,in,len) ctx->funcs.ghash((ctx)->Xi.u,(ctx)->Htable,in,len)
331 /*
332 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
333 * effect. In other words idea is to hash data while it's still in L1 cache
334 * after encryption pass...
335 */
336 # define GHASH_CHUNK (3*1024)
337 # endif
338
339 #if (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
340 # if !defined(I386_ONLY) && \
341 (defined(__i386) || defined(__i386__) || \
342 defined(__x86_64) || defined(__x86_64__) || \
343 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
344 # define GHASH_ASM_X86_OR_64
345
346 void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
347 void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
348 void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
349 size_t len);
350
351 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
352 # define gcm_init_avx gcm_init_clmul
353 # define gcm_gmult_avx gcm_gmult_clmul
354 # define gcm_ghash_avx gcm_ghash_clmul
355 # else
356 void gcm_init_avx(u128 Htable[16], const u64 Xi[2]);
357 void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]);
358 void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
359 size_t len);
360 # endif
361
362 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
363 # define GHASH_ASM_X86
364 void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
365 void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
366 size_t len);
367
368 void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);
369 void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,
370 size_t len);
371 # endif
372 # elif defined(__arm__) || defined(__arm) || defined(__aarch64__) || defined(_M_ARM64)
373 # include "arm_arch.h"
374 # if __ARM_MAX_ARCH__>=7
375 # define GHASH_ASM_ARM
376 # define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL)
377 # if defined(__arm__) || defined(__arm)
378 # define NEON_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON)
379 # endif
380 void gcm_init_neon(u128 Htable[16], const u64 Xi[2]);
381 void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
382 void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
383 size_t len);
384 void gcm_init_v8(u128 Htable[16], const u64 Xi[2]);
385 void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]);
386 void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
387 size_t len);
388 # endif
389 # elif defined(__sparc__) || defined(__sparc)
390 # include "crypto/sparc_arch.h"
391 # define GHASH_ASM_SPARC
392 void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]);
393 void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]);
394 void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp,
395 size_t len);
396 # elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__POWERPC__) || defined(_ARCH_PPC))
397 # include "crypto/ppc_arch.h"
398 # define GHASH_ASM_PPC
399 void gcm_init_p8(u128 Htable[16], const u64 Xi[2]);
400 void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]);
401 void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
402 size_t len);
403 # elif defined(OPENSSL_CPUID_OBJ) && defined(__riscv) && __riscv_xlen == 64
404 # include "crypto/riscv_arch.h"
405 # define GHASH_ASM_RV64I
406 /* Zbc/Zbkc (scalar crypto with clmul) based routines. */
407 void gcm_init_rv64i_zbc(u128 Htable[16], const u64 Xi[2]);
408 void gcm_init_rv64i_zbc__zbb(u128 Htable[16], const u64 Xi[2]);
409 void gcm_init_rv64i_zbc__zbkb(u128 Htable[16], const u64 Xi[2]);
410 void gcm_gmult_rv64i_zbc(u64 Xi[2], const u128 Htable[16]);
411 void gcm_gmult_rv64i_zbc__zbkb(u64 Xi[2], const u128 Htable[16]);
412 void gcm_ghash_rv64i_zbc(u64 Xi[2], const u128 Htable[16],
413 const u8 *inp, size_t len);
414 void gcm_ghash_rv64i_zbc__zbkb(u64 Xi[2], const u128 Htable[16],
415 const u8 *inp, size_t len);
416 /* zvkb/Zvbc (vector crypto with vclmul) based routines. */
417 void gcm_init_rv64i_zvkb_zvbc(u128 Htable[16], const u64 Xi[2]);
418 void gcm_gmult_rv64i_zvkb_zvbc(u64 Xi[2], const u128 Htable[16]);
419 void gcm_ghash_rv64i_zvkb_zvbc(u64 Xi[2], const u128 Htable[16],
420 const u8 *inp, size_t len);
421 /* Zvkg (vector crypto with vgmul.vv and vghsh.vv). */
422 void gcm_init_rv64i_zvkg(u128 Htable[16], const u64 Xi[2]);
423 void gcm_init_rv64i_zvkg_zvkb(u128 Htable[16], const u64 Xi[2]);
424 void gcm_gmult_rv64i_zvkg(u64 Xi[2], const u128 Htable[16]);
425 void gcm_ghash_rv64i_zvkg(u64 Xi[2], const u128 Htable[16],
426 const u8 *inp, size_t len);
427 # endif
428 #endif
429
gcm_get_funcs(struct gcm_funcs_st * ctx)430 static void gcm_get_funcs(struct gcm_funcs_st *ctx)
431 {
432 /* set defaults -- overridden below as needed */
433 ctx->ginit = gcm_init_4bit;
434 #if !defined(GHASH_ASM)
435 ctx->gmult = gcm_gmult_4bit;
436 #else
437 ctx->gmult = NULL;
438 #endif
439 #if !defined(GHASH_ASM) && !defined(OPENSSL_SMALL_FOOTPRINT)
440 ctx->ghash = gcm_ghash_4bit;
441 #else
442 ctx->ghash = NULL;
443 #endif
444
445 #if defined(GHASH_ASM_X86_OR_64)
446 # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
447 /* x86_64 */
448 if (OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
449 if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
450 ctx->ginit = gcm_init_avx;
451 ctx->gmult = gcm_gmult_avx;
452 ctx->ghash = gcm_ghash_avx;
453 } else {
454 ctx->ginit = gcm_init_clmul;
455 ctx->gmult = gcm_gmult_clmul;
456 ctx->ghash = gcm_ghash_clmul;
457 }
458 return;
459 }
460 # endif
461 # if defined(GHASH_ASM_X86)
462 /* x86 only */
463 # if defined(OPENSSL_IA32_SSE2)
464 if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
465 ctx->gmult = gcm_gmult_4bit_mmx;
466 ctx->ghash = gcm_ghash_4bit_mmx;
467 return;
468 }
469 # else
470 if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */
471 ctx->gmult = gcm_gmult_4bit_mmx;
472 ctx->ghash = gcm_ghash_4bit_mmx;
473 return;
474 }
475 # endif
476 ctx->gmult = gcm_gmult_4bit_x86;
477 ctx->ghash = gcm_ghash_4bit_x86;
478 return;
479 # else
480 /* x86_64 fallback defaults */
481 ctx->gmult = gcm_gmult_4bit;
482 ctx->ghash = gcm_ghash_4bit;
483 return;
484 # endif
485 #elif defined(GHASH_ASM_ARM)
486 /* ARM defaults */
487 ctx->gmult = gcm_gmult_4bit;
488 # if !defined(OPENSSL_SMALL_FOOTPRINT)
489 ctx->ghash = gcm_ghash_4bit;
490 # else
491 ctx->ghash = NULL;
492 # endif
493 # ifdef PMULL_CAPABLE
494 if (PMULL_CAPABLE) {
495 ctx->ginit = (gcm_init_fn)gcm_init_v8;
496 ctx->gmult = gcm_gmult_v8;
497 ctx->ghash = gcm_ghash_v8;
498 }
499 # elif defined(NEON_CAPABLE)
500 if (NEON_CAPABLE) {
501 ctx->ginit = gcm_init_neon;
502 ctx->gmult = gcm_gmult_neon;
503 ctx->ghash = gcm_ghash_neon;
504 }
505 # endif
506 return;
507 #elif defined(GHASH_ASM_SPARC)
508 /* SPARC defaults */
509 ctx->gmult = gcm_gmult_4bit;
510 ctx->ghash = gcm_ghash_4bit;
511 if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
512 ctx->ginit = gcm_init_vis3;
513 ctx->gmult = gcm_gmult_vis3;
514 ctx->ghash = gcm_ghash_vis3;
515 }
516 return;
517 #elif defined(GHASH_ASM_PPC)
518 /* PowerPC does not define GHASH_ASM; defaults set above */
519 if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
520 ctx->ginit = gcm_init_p8;
521 ctx->gmult = gcm_gmult_p8;
522 ctx->ghash = gcm_ghash_p8;
523 }
524 return;
525 #elif defined(GHASH_ASM_RV64I)
526 /* RISCV defaults */
527 ctx->gmult = gcm_gmult_4bit;
528 ctx->ghash = gcm_ghash_4bit;
529
530 if (RISCV_HAS_ZVKG() && riscv_vlen() >= 128) {
531 if (RISCV_HAS_ZVKB())
532 ctx->ginit = gcm_init_rv64i_zvkg_zvkb;
533 else
534 ctx->ginit = gcm_init_rv64i_zvkg;
535 ctx->gmult = gcm_gmult_rv64i_zvkg;
536 ctx->ghash = gcm_ghash_rv64i_zvkg;
537 } else if (RISCV_HAS_ZVKB() && RISCV_HAS_ZVBC() && riscv_vlen() >= 128) {
538 ctx->ginit = gcm_init_rv64i_zvkb_zvbc;
539 ctx->gmult = gcm_gmult_rv64i_zvkb_zvbc;
540 ctx->ghash = gcm_ghash_rv64i_zvkb_zvbc;
541 } else if (RISCV_HAS_ZBC()) {
542 if (RISCV_HAS_ZBKB()) {
543 ctx->ginit = gcm_init_rv64i_zbc__zbkb;
544 ctx->gmult = gcm_gmult_rv64i_zbc__zbkb;
545 ctx->ghash = gcm_ghash_rv64i_zbc__zbkb;
546 } else if (RISCV_HAS_ZBB()) {
547 ctx->ginit = gcm_init_rv64i_zbc__zbb;
548 ctx->gmult = gcm_gmult_rv64i_zbc;
549 ctx->ghash = gcm_ghash_rv64i_zbc;
550 } else {
551 ctx->ginit = gcm_init_rv64i_zbc;
552 ctx->gmult = gcm_gmult_rv64i_zbc;
553 ctx->ghash = gcm_ghash_rv64i_zbc;
554 }
555 }
556 return;
557 #elif defined(GHASH_ASM)
558 /* all other architectures use the generic names */
559 ctx->gmult = gcm_gmult_4bit;
560 ctx->ghash = gcm_ghash_4bit;
561 return;
562 #endif
563 }
564
ossl_gcm_init_4bit(u128 Htable[16],const u64 H[2])565 void ossl_gcm_init_4bit(u128 Htable[16], const u64 H[2])
566 {
567 struct gcm_funcs_st funcs;
568
569 gcm_get_funcs(&funcs);
570 funcs.ginit(Htable, H);
571 }
572
ossl_gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16])573 void ossl_gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
574 {
575 struct gcm_funcs_st funcs;
576
577 gcm_get_funcs(&funcs);
578 funcs.gmult(Xi, Htable);
579 }
580
ossl_gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 * inp,size_t len)581 void ossl_gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
582 const u8 *inp, size_t len)
583 {
584 struct gcm_funcs_st funcs;
585 u64 tmp[2];
586 size_t i;
587
588 gcm_get_funcs(&funcs);
589 if (funcs.ghash != NULL) {
590 funcs.ghash(Xi, Htable, inp, len);
591 } else {
592 /* Emulate ghash if needed */
593 for (i = 0; i < len; i += 16) {
594 memcpy(tmp, &inp[i], sizeof(tmp));
595 Xi[0] ^= tmp[0];
596 Xi[1] ^= tmp[1];
597 funcs.gmult(Xi, Htable);
598 }
599 }
600 }
601
CRYPTO_gcm128_init(GCM128_CONTEXT * ctx,void * key,block128_f block)602 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
603 {
604 DECLARE_IS_ENDIAN;
605
606 memset(ctx, 0, sizeof(*ctx));
607 ctx->block = block;
608 ctx->key = key;
609
610 (*block) (ctx->H.c, ctx->H.c, key);
611
612 if (IS_LITTLE_ENDIAN) {
613 /* H is stored in host byte order */
614 #ifdef BSWAP8
615 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
616 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
617 #else
618 u8 *p = ctx->H.c;
619 u64 hi, lo;
620 hi = (u64)GETU32(p) << 32 | GETU32(p + 4);
621 lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
622 ctx->H.u[0] = hi;
623 ctx->H.u[1] = lo;
624 #endif
625 }
626
627 gcm_get_funcs(&ctx->funcs);
628 ctx->funcs.ginit(ctx->Htable, ctx->H.u);
629 }
630
CRYPTO_gcm128_setiv(GCM128_CONTEXT * ctx,const unsigned char * iv,size_t len)631 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
632 size_t len)
633 {
634 DECLARE_IS_ENDIAN;
635 unsigned int ctr;
636
637 ctx->len.u[0] = 0; /* AAD length */
638 ctx->len.u[1] = 0; /* message length */
639 ctx->ares = 0;
640 ctx->mres = 0;
641
642 if (len == 12) {
643 memcpy(ctx->Yi.c, iv, 12);
644 ctx->Yi.c[12] = 0;
645 ctx->Yi.c[13] = 0;
646 ctx->Yi.c[14] = 0;
647 ctx->Yi.c[15] = 1;
648 ctr = 1;
649 } else {
650 size_t i;
651 u64 len0 = len;
652
653 /* Borrow ctx->Xi to calculate initial Yi */
654 ctx->Xi.u[0] = 0;
655 ctx->Xi.u[1] = 0;
656
657 while (len >= 16) {
658 for (i = 0; i < 16; ++i)
659 ctx->Xi.c[i] ^= iv[i];
660 GCM_MUL(ctx);
661 iv += 16;
662 len -= 16;
663 }
664 if (len) {
665 for (i = 0; i < len; ++i)
666 ctx->Xi.c[i] ^= iv[i];
667 GCM_MUL(ctx);
668 }
669 len0 <<= 3;
670 if (IS_LITTLE_ENDIAN) {
671 #ifdef BSWAP8
672 ctx->Xi.u[1] ^= BSWAP8(len0);
673 #else
674 ctx->Xi.c[8] ^= (u8)(len0 >> 56);
675 ctx->Xi.c[9] ^= (u8)(len0 >> 48);
676 ctx->Xi.c[10] ^= (u8)(len0 >> 40);
677 ctx->Xi.c[11] ^= (u8)(len0 >> 32);
678 ctx->Xi.c[12] ^= (u8)(len0 >> 24);
679 ctx->Xi.c[13] ^= (u8)(len0 >> 16);
680 ctx->Xi.c[14] ^= (u8)(len0 >> 8);
681 ctx->Xi.c[15] ^= (u8)(len0);
682 #endif
683 } else {
684 ctx->Xi.u[1] ^= len0;
685 }
686
687 GCM_MUL(ctx);
688
689 if (IS_LITTLE_ENDIAN)
690 #ifdef BSWAP4
691 ctr = BSWAP4(ctx->Xi.d[3]);
692 #else
693 ctr = GETU32(ctx->Xi.c + 12);
694 #endif
695 else
696 ctr = ctx->Xi.d[3];
697
698 /* Copy borrowed Xi to Yi */
699 ctx->Yi.u[0] = ctx->Xi.u[0];
700 ctx->Yi.u[1] = ctx->Xi.u[1];
701 }
702
703 ctx->Xi.u[0] = 0;
704 ctx->Xi.u[1] = 0;
705
706 (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key);
707 ++ctr;
708 if (IS_LITTLE_ENDIAN)
709 #ifdef BSWAP4
710 ctx->Yi.d[3] = BSWAP4(ctr);
711 #else
712 PUTU32(ctx->Yi.c + 12, ctr);
713 #endif
714 else
715 ctx->Yi.d[3] = ctr;
716 }
717
CRYPTO_gcm128_aad(GCM128_CONTEXT * ctx,const unsigned char * aad,size_t len)718 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
719 size_t len)
720 {
721 size_t i;
722 unsigned int n;
723 u64 alen = ctx->len.u[0];
724
725 if (ctx->len.u[1])
726 return -2;
727
728 alen += len;
729 if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
730 return -1;
731 ctx->len.u[0] = alen;
732
733 n = ctx->ares;
734 if (n) {
735 while (n && len) {
736 ctx->Xi.c[n] ^= *(aad++);
737 --len;
738 n = (n + 1) % 16;
739 }
740 if (n == 0)
741 GCM_MUL(ctx);
742 else {
743 ctx->ares = n;
744 return 0;
745 }
746 }
747 #ifdef GHASH
748 if ((i = (len & (size_t)-16))) {
749 GHASH(ctx, aad, i);
750 aad += i;
751 len -= i;
752 }
753 #else
754 while (len >= 16) {
755 for (i = 0; i < 16; ++i)
756 ctx->Xi.c[i] ^= aad[i];
757 GCM_MUL(ctx);
758 aad += 16;
759 len -= 16;
760 }
761 #endif
762 if (len) {
763 n = (unsigned int)len;
764 for (i = 0; i < len; ++i)
765 ctx->Xi.c[i] ^= aad[i];
766 }
767
768 ctx->ares = n;
769 return 0;
770 }
771
CRYPTO_gcm128_encrypt(GCM128_CONTEXT * ctx,const unsigned char * in,unsigned char * out,size_t len)772 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
773 const unsigned char *in, unsigned char *out,
774 size_t len)
775 {
776 DECLARE_IS_ENDIAN;
777 unsigned int n, ctr, mres;
778 size_t i;
779 u64 mlen = ctx->len.u[1];
780 block128_f block = ctx->block;
781 void *key = ctx->key;
782
783 mlen += len;
784 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
785 return -1;
786 ctx->len.u[1] = mlen;
787
788 mres = ctx->mres;
789
790 if (ctx->ares) {
791 /* First call to encrypt finalizes GHASH(AAD) */
792 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
793 if (len == 0) {
794 GCM_MUL(ctx);
795 ctx->ares = 0;
796 return 0;
797 }
798 memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
799 ctx->Xi.u[0] = 0;
800 ctx->Xi.u[1] = 0;
801 mres = sizeof(ctx->Xi);
802 #else
803 GCM_MUL(ctx);
804 #endif
805 ctx->ares = 0;
806 }
807
808 if (IS_LITTLE_ENDIAN)
809 #ifdef BSWAP4
810 ctr = BSWAP4(ctx->Yi.d[3]);
811 #else
812 ctr = GETU32(ctx->Yi.c + 12);
813 #endif
814 else
815 ctr = ctx->Yi.d[3];
816
817 n = mres % 16;
818 #if !defined(OPENSSL_SMALL_FOOTPRINT)
819 if (16 % sizeof(size_t) == 0) { /* always true actually */
820 do {
821 if (n) {
822 # if defined(GHASH)
823 while (n && len) {
824 ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
825 --len;
826 n = (n + 1) % 16;
827 }
828 if (n == 0) {
829 GHASH(ctx, ctx->Xn, mres);
830 mres = 0;
831 } else {
832 ctx->mres = mres;
833 return 0;
834 }
835 # else
836 while (n && len) {
837 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
838 --len;
839 n = (n + 1) % 16;
840 }
841 if (n == 0) {
842 GCM_MUL(ctx);
843 mres = 0;
844 } else {
845 ctx->mres = n;
846 return 0;
847 }
848 # endif
849 }
850 # if defined(STRICT_ALIGNMENT)
851 if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
852 break;
853 # endif
854 # if defined(GHASH)
855 if (len >= 16 && mres) {
856 GHASH(ctx, ctx->Xn, mres);
857 mres = 0;
858 }
859 # if defined(GHASH_CHUNK)
860 while (len >= GHASH_CHUNK) {
861 size_t j = GHASH_CHUNK;
862
863 while (j) {
864 size_t_aX *out_t = (size_t_aX *)out;
865 const size_t_aX *in_t = (const size_t_aX *)in;
866
867 (*block) (ctx->Yi.c, ctx->EKi.c, key);
868 ++ctr;
869 if (IS_LITTLE_ENDIAN)
870 # ifdef BSWAP4
871 ctx->Yi.d[3] = BSWAP4(ctr);
872 # else
873 PUTU32(ctx->Yi.c + 12, ctr);
874 # endif
875 else
876 ctx->Yi.d[3] = ctr;
877 for (i = 0; i < 16 / sizeof(size_t); ++i)
878 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
879 out += 16;
880 in += 16;
881 j -= 16;
882 }
883 GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
884 len -= GHASH_CHUNK;
885 }
886 # endif
887 if ((i = (len & (size_t)-16))) {
888 size_t j = i;
889
890 while (len >= 16) {
891 size_t_aX *out_t = (size_t_aX *)out;
892 const size_t_aX *in_t = (const size_t_aX *)in;
893
894 (*block) (ctx->Yi.c, ctx->EKi.c, key);
895 ++ctr;
896 if (IS_LITTLE_ENDIAN)
897 # ifdef BSWAP4
898 ctx->Yi.d[3] = BSWAP4(ctr);
899 # else
900 PUTU32(ctx->Yi.c + 12, ctr);
901 # endif
902 else
903 ctx->Yi.d[3] = ctr;
904 for (i = 0; i < 16 / sizeof(size_t); ++i)
905 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
906 out += 16;
907 in += 16;
908 len -= 16;
909 }
910 GHASH(ctx, out - j, j);
911 }
912 # else
913 while (len >= 16) {
914 size_t *out_t = (size_t *)out;
915 const size_t *in_t = (const size_t *)in;
916
917 (*block) (ctx->Yi.c, ctx->EKi.c, key);
918 ++ctr;
919 if (IS_LITTLE_ENDIAN)
920 # ifdef BSWAP4
921 ctx->Yi.d[3] = BSWAP4(ctr);
922 # else
923 PUTU32(ctx->Yi.c + 12, ctr);
924 # endif
925 else
926 ctx->Yi.d[3] = ctr;
927 for (i = 0; i < 16 / sizeof(size_t); ++i)
928 ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
929 GCM_MUL(ctx);
930 out += 16;
931 in += 16;
932 len -= 16;
933 }
934 # endif
935 if (len) {
936 (*block) (ctx->Yi.c, ctx->EKi.c, key);
937 ++ctr;
938 if (IS_LITTLE_ENDIAN)
939 # ifdef BSWAP4
940 ctx->Yi.d[3] = BSWAP4(ctr);
941 # else
942 PUTU32(ctx->Yi.c + 12, ctr);
943 # endif
944 else
945 ctx->Yi.d[3] = ctr;
946 # if defined(GHASH)
947 while (len--) {
948 ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
949 ++n;
950 }
951 # else
952 while (len--) {
953 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
954 ++n;
955 }
956 mres = n;
957 # endif
958 }
959
960 ctx->mres = mres;
961 return 0;
962 } while (0);
963 }
964 #endif
965 for (i = 0; i < len; ++i) {
966 if (n == 0) {
967 (*block) (ctx->Yi.c, ctx->EKi.c, key);
968 ++ctr;
969 if (IS_LITTLE_ENDIAN)
970 #ifdef BSWAP4
971 ctx->Yi.d[3] = BSWAP4(ctr);
972 #else
973 PUTU32(ctx->Yi.c + 12, ctr);
974 #endif
975 else
976 ctx->Yi.d[3] = ctr;
977 }
978 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
979 ctx->Xn[mres++] = out[i] = in[i] ^ ctx->EKi.c[n];
980 n = (n + 1) % 16;
981 if (mres == sizeof(ctx->Xn)) {
982 GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
983 mres = 0;
984 }
985 #else
986 ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
987 mres = n = (n + 1) % 16;
988 if (n == 0)
989 GCM_MUL(ctx);
990 #endif
991 }
992
993 ctx->mres = mres;
994 return 0;
995 }
996
CRYPTO_gcm128_decrypt(GCM128_CONTEXT * ctx,const unsigned char * in,unsigned char * out,size_t len)997 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
998 const unsigned char *in, unsigned char *out,
999 size_t len)
1000 {
1001 DECLARE_IS_ENDIAN;
1002 unsigned int n, ctr, mres;
1003 size_t i;
1004 u64 mlen = ctx->len.u[1];
1005 block128_f block = ctx->block;
1006 void *key = ctx->key;
1007
1008 mlen += len;
1009 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1010 return -1;
1011 ctx->len.u[1] = mlen;
1012
1013 mres = ctx->mres;
1014
1015 if (ctx->ares) {
1016 /* First call to decrypt finalizes GHASH(AAD) */
1017 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1018 if (len == 0) {
1019 GCM_MUL(ctx);
1020 ctx->ares = 0;
1021 return 0;
1022 }
1023 memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1024 ctx->Xi.u[0] = 0;
1025 ctx->Xi.u[1] = 0;
1026 mres = sizeof(ctx->Xi);
1027 #else
1028 GCM_MUL(ctx);
1029 #endif
1030 ctx->ares = 0;
1031 }
1032
1033 if (IS_LITTLE_ENDIAN)
1034 #ifdef BSWAP4
1035 ctr = BSWAP4(ctx->Yi.d[3]);
1036 #else
1037 ctr = GETU32(ctx->Yi.c + 12);
1038 #endif
1039 else
1040 ctr = ctx->Yi.d[3];
1041
1042 n = mres % 16;
1043 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1044 if (16 % sizeof(size_t) == 0) { /* always true actually */
1045 do {
1046 if (n) {
1047 # if defined(GHASH)
1048 while (n && len) {
1049 *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1050 --len;
1051 n = (n + 1) % 16;
1052 }
1053 if (n == 0) {
1054 GHASH(ctx, ctx->Xn, mres);
1055 mres = 0;
1056 } else {
1057 ctx->mres = mres;
1058 return 0;
1059 }
1060 # else
1061 while (n && len) {
1062 u8 c = *(in++);
1063 *(out++) = c ^ ctx->EKi.c[n];
1064 ctx->Xi.c[n] ^= c;
1065 --len;
1066 n = (n + 1) % 16;
1067 }
1068 if (n == 0) {
1069 GCM_MUL(ctx);
1070 mres = 0;
1071 } else {
1072 ctx->mres = n;
1073 return 0;
1074 }
1075 # endif
1076 }
1077 # if defined(STRICT_ALIGNMENT)
1078 if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1079 break;
1080 # endif
1081 # if defined(GHASH)
1082 if (len >= 16 && mres) {
1083 GHASH(ctx, ctx->Xn, mres);
1084 mres = 0;
1085 }
1086 # if defined(GHASH_CHUNK)
1087 while (len >= GHASH_CHUNK) {
1088 size_t j = GHASH_CHUNK;
1089
1090 GHASH(ctx, in, GHASH_CHUNK);
1091 while (j) {
1092 size_t_aX *out_t = (size_t_aX *)out;
1093 const size_t_aX *in_t = (const size_t_aX *)in;
1094
1095 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1096 ++ctr;
1097 if (IS_LITTLE_ENDIAN)
1098 # ifdef BSWAP4
1099 ctx->Yi.d[3] = BSWAP4(ctr);
1100 # else
1101 PUTU32(ctx->Yi.c + 12, ctr);
1102 # endif
1103 else
1104 ctx->Yi.d[3] = ctr;
1105 for (i = 0; i < 16 / sizeof(size_t); ++i)
1106 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1107 out += 16;
1108 in += 16;
1109 j -= 16;
1110 }
1111 len -= GHASH_CHUNK;
1112 }
1113 # endif
1114 if ((i = (len & (size_t)-16))) {
1115 GHASH(ctx, in, i);
1116 while (len >= 16) {
1117 size_t_aX *out_t = (size_t_aX *)out;
1118 const size_t_aX *in_t = (const size_t_aX *)in;
1119
1120 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1121 ++ctr;
1122 if (IS_LITTLE_ENDIAN)
1123 # ifdef BSWAP4
1124 ctx->Yi.d[3] = BSWAP4(ctr);
1125 # else
1126 PUTU32(ctx->Yi.c + 12, ctr);
1127 # endif
1128 else
1129 ctx->Yi.d[3] = ctr;
1130 for (i = 0; i < 16 / sizeof(size_t); ++i)
1131 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1132 out += 16;
1133 in += 16;
1134 len -= 16;
1135 }
1136 }
1137 # else
1138 while (len >= 16) {
1139 size_t *out_t = (size_t *)out;
1140 const size_t *in_t = (const size_t *)in;
1141
1142 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1143 ++ctr;
1144 if (IS_LITTLE_ENDIAN)
1145 # ifdef BSWAP4
1146 ctx->Yi.d[3] = BSWAP4(ctr);
1147 # else
1148 PUTU32(ctx->Yi.c + 12, ctr);
1149 # endif
1150 else
1151 ctx->Yi.d[3] = ctr;
1152 for (i = 0; i < 16 / sizeof(size_t); ++i) {
1153 size_t c = in_t[i];
1154 out_t[i] = c ^ ctx->EKi.t[i];
1155 ctx->Xi.t[i] ^= c;
1156 }
1157 GCM_MUL(ctx);
1158 out += 16;
1159 in += 16;
1160 len -= 16;
1161 }
1162 # endif
1163 if (len) {
1164 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1165 ++ctr;
1166 if (IS_LITTLE_ENDIAN)
1167 # ifdef BSWAP4
1168 ctx->Yi.d[3] = BSWAP4(ctr);
1169 # else
1170 PUTU32(ctx->Yi.c + 12, ctr);
1171 # endif
1172 else
1173 ctx->Yi.d[3] = ctr;
1174 # if defined(GHASH)
1175 while (len--) {
1176 out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1177 ++n;
1178 }
1179 # else
1180 while (len--) {
1181 u8 c = in[n];
1182 ctx->Xi.c[n] ^= c;
1183 out[n] = c ^ ctx->EKi.c[n];
1184 ++n;
1185 }
1186 mres = n;
1187 # endif
1188 }
1189
1190 ctx->mres = mres;
1191 return 0;
1192 } while (0);
1193 }
1194 #endif
1195 for (i = 0; i < len; ++i) {
1196 u8 c;
1197 if (n == 0) {
1198 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1199 ++ctr;
1200 if (IS_LITTLE_ENDIAN)
1201 #ifdef BSWAP4
1202 ctx->Yi.d[3] = BSWAP4(ctr);
1203 #else
1204 PUTU32(ctx->Yi.c + 12, ctr);
1205 #endif
1206 else
1207 ctx->Yi.d[3] = ctr;
1208 }
1209 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1210 out[i] = (ctx->Xn[mres++] = c = in[i]) ^ ctx->EKi.c[n];
1211 n = (n + 1) % 16;
1212 if (mres == sizeof(ctx->Xn)) {
1213 GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
1214 mres = 0;
1215 }
1216 #else
1217 c = in[i];
1218 out[i] = c ^ ctx->EKi.c[n];
1219 ctx->Xi.c[n] ^= c;
1220 mres = n = (n + 1) % 16;
1221 if (n == 0)
1222 GCM_MUL(ctx);
1223 #endif
1224 }
1225
1226 ctx->mres = mres;
1227 return 0;
1228 }
1229
CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT * ctx,const unsigned char * in,unsigned char * out,size_t len,ctr128_f stream)1230 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1231 const unsigned char *in, unsigned char *out,
1232 size_t len, ctr128_f stream)
1233 {
1234 #if defined(OPENSSL_SMALL_FOOTPRINT)
1235 return CRYPTO_gcm128_encrypt(ctx, in, out, len);
1236 #else
1237 DECLARE_IS_ENDIAN;
1238 unsigned int n, ctr, mres;
1239 size_t i;
1240 u64 mlen = ctx->len.u[1];
1241 void *key = ctx->key;
1242
1243 mlen += len;
1244 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1245 return -1;
1246 ctx->len.u[1] = mlen;
1247
1248 mres = ctx->mres;
1249
1250 if (ctx->ares) {
1251 /* First call to encrypt finalizes GHASH(AAD) */
1252 #if defined(GHASH)
1253 if (len == 0) {
1254 GCM_MUL(ctx);
1255 ctx->ares = 0;
1256 return 0;
1257 }
1258 memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1259 ctx->Xi.u[0] = 0;
1260 ctx->Xi.u[1] = 0;
1261 mres = sizeof(ctx->Xi);
1262 #else
1263 GCM_MUL(ctx);
1264 #endif
1265 ctx->ares = 0;
1266 }
1267
1268 if (IS_LITTLE_ENDIAN)
1269 # ifdef BSWAP4
1270 ctr = BSWAP4(ctx->Yi.d[3]);
1271 # else
1272 ctr = GETU32(ctx->Yi.c + 12);
1273 # endif
1274 else
1275 ctr = ctx->Yi.d[3];
1276
1277 n = mres % 16;
1278 if (n) {
1279 # if defined(GHASH)
1280 while (n && len) {
1281 ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
1282 --len;
1283 n = (n + 1) % 16;
1284 }
1285 if (n == 0) {
1286 GHASH(ctx, ctx->Xn, mres);
1287 mres = 0;
1288 } else {
1289 ctx->mres = mres;
1290 return 0;
1291 }
1292 # else
1293 while (n && len) {
1294 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1295 --len;
1296 n = (n + 1) % 16;
1297 }
1298 if (n == 0) {
1299 GCM_MUL(ctx);
1300 mres = 0;
1301 } else {
1302 ctx->mres = n;
1303 return 0;
1304 }
1305 # endif
1306 }
1307 # if defined(GHASH)
1308 if (len >= 16 && mres) {
1309 GHASH(ctx, ctx->Xn, mres);
1310 mres = 0;
1311 }
1312 # if defined(GHASH_CHUNK)
1313 while (len >= GHASH_CHUNK) {
1314 (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1315 ctr += GHASH_CHUNK / 16;
1316 if (IS_LITTLE_ENDIAN)
1317 # ifdef BSWAP4
1318 ctx->Yi.d[3] = BSWAP4(ctr);
1319 # else
1320 PUTU32(ctx->Yi.c + 12, ctr);
1321 # endif
1322 else
1323 ctx->Yi.d[3] = ctr;
1324 GHASH(ctx, out, GHASH_CHUNK);
1325 out += GHASH_CHUNK;
1326 in += GHASH_CHUNK;
1327 len -= GHASH_CHUNK;
1328 }
1329 # endif
1330 # endif
1331 if ((i = (len & (size_t)-16))) {
1332 size_t j = i / 16;
1333
1334 (*stream) (in, out, j, key, ctx->Yi.c);
1335 ctr += (unsigned int)j;
1336 if (IS_LITTLE_ENDIAN)
1337 # ifdef BSWAP4
1338 ctx->Yi.d[3] = BSWAP4(ctr);
1339 # else
1340 PUTU32(ctx->Yi.c + 12, ctr);
1341 # endif
1342 else
1343 ctx->Yi.d[3] = ctr;
1344 in += i;
1345 len -= i;
1346 # if defined(GHASH)
1347 GHASH(ctx, out, i);
1348 out += i;
1349 # else
1350 while (j--) {
1351 for (i = 0; i < 16; ++i)
1352 ctx->Xi.c[i] ^= out[i];
1353 GCM_MUL(ctx);
1354 out += 16;
1355 }
1356 # endif
1357 }
1358 if (len) {
1359 (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1360 ++ctr;
1361 if (IS_LITTLE_ENDIAN)
1362 # ifdef BSWAP4
1363 ctx->Yi.d[3] = BSWAP4(ctr);
1364 # else
1365 PUTU32(ctx->Yi.c + 12, ctr);
1366 # endif
1367 else
1368 ctx->Yi.d[3] = ctr;
1369 while (len--) {
1370 # if defined(GHASH)
1371 ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
1372 # else
1373 ctx->Xi.c[mres++] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1374 # endif
1375 ++n;
1376 }
1377 }
1378
1379 ctx->mres = mres;
1380 return 0;
1381 #endif
1382 }
1383
CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT * ctx,const unsigned char * in,unsigned char * out,size_t len,ctr128_f stream)1384 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1385 const unsigned char *in, unsigned char *out,
1386 size_t len, ctr128_f stream)
1387 {
1388 #if defined(OPENSSL_SMALL_FOOTPRINT)
1389 return CRYPTO_gcm128_decrypt(ctx, in, out, len);
1390 #else
1391 DECLARE_IS_ENDIAN;
1392 unsigned int n, ctr, mres;
1393 size_t i;
1394 u64 mlen = ctx->len.u[1];
1395 void *key = ctx->key;
1396
1397 mlen += len;
1398 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1399 return -1;
1400 ctx->len.u[1] = mlen;
1401
1402 mres = ctx->mres;
1403
1404 if (ctx->ares) {
1405 /* First call to decrypt finalizes GHASH(AAD) */
1406 # if defined(GHASH)
1407 if (len == 0) {
1408 GCM_MUL(ctx);
1409 ctx->ares = 0;
1410 return 0;
1411 }
1412 memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1413 ctx->Xi.u[0] = 0;
1414 ctx->Xi.u[1] = 0;
1415 mres = sizeof(ctx->Xi);
1416 # else
1417 GCM_MUL(ctx);
1418 # endif
1419 ctx->ares = 0;
1420 }
1421
1422 if (IS_LITTLE_ENDIAN)
1423 # ifdef BSWAP4
1424 ctr = BSWAP4(ctx->Yi.d[3]);
1425 # else
1426 ctr = GETU32(ctx->Yi.c + 12);
1427 # endif
1428 else
1429 ctr = ctx->Yi.d[3];
1430
1431 n = mres % 16;
1432 if (n) {
1433 # if defined(GHASH)
1434 while (n && len) {
1435 *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1436 --len;
1437 n = (n + 1) % 16;
1438 }
1439 if (n == 0) {
1440 GHASH(ctx, ctx->Xn, mres);
1441 mres = 0;
1442 } else {
1443 ctx->mres = mres;
1444 return 0;
1445 }
1446 # else
1447 while (n && len) {
1448 u8 c = *(in++);
1449 *(out++) = c ^ ctx->EKi.c[n];
1450 ctx->Xi.c[n] ^= c;
1451 --len;
1452 n = (n + 1) % 16;
1453 }
1454 if (n == 0) {
1455 GCM_MUL(ctx);
1456 mres = 0;
1457 } else {
1458 ctx->mres = n;
1459 return 0;
1460 }
1461 # endif
1462 }
1463 # if defined(GHASH)
1464 if (len >= 16 && mres) {
1465 GHASH(ctx, ctx->Xn, mres);
1466 mres = 0;
1467 }
1468 # if defined(GHASH_CHUNK)
1469 while (len >= GHASH_CHUNK) {
1470 GHASH(ctx, in, GHASH_CHUNK);
1471 (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1472 ctr += GHASH_CHUNK / 16;
1473 if (IS_LITTLE_ENDIAN)
1474 # ifdef BSWAP4
1475 ctx->Yi.d[3] = BSWAP4(ctr);
1476 # else
1477 PUTU32(ctx->Yi.c + 12, ctr);
1478 # endif
1479 else
1480 ctx->Yi.d[3] = ctr;
1481 out += GHASH_CHUNK;
1482 in += GHASH_CHUNK;
1483 len -= GHASH_CHUNK;
1484 }
1485 # endif
1486 # endif
1487 if ((i = (len & (size_t)-16))) {
1488 size_t j = i / 16;
1489
1490 # if defined(GHASH)
1491 GHASH(ctx, in, i);
1492 # else
1493 while (j--) {
1494 size_t k;
1495 for (k = 0; k < 16; ++k)
1496 ctx->Xi.c[k] ^= in[k];
1497 GCM_MUL(ctx);
1498 in += 16;
1499 }
1500 j = i / 16;
1501 in -= i;
1502 # endif
1503 (*stream) (in, out, j, key, ctx->Yi.c);
1504 ctr += (unsigned int)j;
1505 if (IS_LITTLE_ENDIAN)
1506 # ifdef BSWAP4
1507 ctx->Yi.d[3] = BSWAP4(ctr);
1508 # else
1509 PUTU32(ctx->Yi.c + 12, ctr);
1510 # endif
1511 else
1512 ctx->Yi.d[3] = ctr;
1513 out += i;
1514 in += i;
1515 len -= i;
1516 }
1517 if (len) {
1518 (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1519 ++ctr;
1520 if (IS_LITTLE_ENDIAN)
1521 # ifdef BSWAP4
1522 ctx->Yi.d[3] = BSWAP4(ctr);
1523 # else
1524 PUTU32(ctx->Yi.c + 12, ctr);
1525 # endif
1526 else
1527 ctx->Yi.d[3] = ctr;
1528 while (len--) {
1529 # if defined(GHASH)
1530 out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1531 # else
1532 u8 c = in[n];
1533 ctx->Xi.c[mres++] ^= c;
1534 out[n] = c ^ ctx->EKi.c[n];
1535 # endif
1536 ++n;
1537 }
1538 }
1539
1540 ctx->mres = mres;
1541 return 0;
1542 #endif
1543 }
1544
CRYPTO_gcm128_finish(GCM128_CONTEXT * ctx,const unsigned char * tag,size_t len)1545 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
1546 size_t len)
1547 {
1548 DECLARE_IS_ENDIAN;
1549 u64 alen = ctx->len.u[0] << 3;
1550 u64 clen = ctx->len.u[1] << 3;
1551
1552 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1553 u128 bitlen;
1554 unsigned int mres = ctx->mres;
1555
1556 if (mres) {
1557 unsigned blocks = (mres + 15) & -16;
1558
1559 memset(ctx->Xn + mres, 0, blocks - mres);
1560 mres = blocks;
1561 if (mres == sizeof(ctx->Xn)) {
1562 GHASH(ctx, ctx->Xn, mres);
1563 mres = 0;
1564 }
1565 } else if (ctx->ares) {
1566 GCM_MUL(ctx);
1567 }
1568 #else
1569 if (ctx->mres || ctx->ares)
1570 GCM_MUL(ctx);
1571 #endif
1572
1573 if (IS_LITTLE_ENDIAN) {
1574 #ifdef BSWAP8
1575 alen = BSWAP8(alen);
1576 clen = BSWAP8(clen);
1577 #else
1578 u8 *p = ctx->len.c;
1579
1580 ctx->len.u[0] = alen;
1581 ctx->len.u[1] = clen;
1582
1583 alen = (u64)GETU32(p) << 32 | GETU32(p + 4);
1584 clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
1585 #endif
1586 }
1587
1588 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1589 bitlen.hi = alen;
1590 bitlen.lo = clen;
1591 memcpy(ctx->Xn + mres, &bitlen, sizeof(bitlen));
1592 mres += sizeof(bitlen);
1593 GHASH(ctx, ctx->Xn, mres);
1594 #else
1595 ctx->Xi.u[0] ^= alen;
1596 ctx->Xi.u[1] ^= clen;
1597 GCM_MUL(ctx);
1598 #endif
1599
1600 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1601 ctx->Xi.u[1] ^= ctx->EK0.u[1];
1602
1603 if (tag && len <= sizeof(ctx->Xi))
1604 return CRYPTO_memcmp(ctx->Xi.c, tag, len);
1605 else
1606 return -1;
1607 }
1608
CRYPTO_gcm128_tag(GCM128_CONTEXT * ctx,unsigned char * tag,size_t len)1609 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1610 {
1611 CRYPTO_gcm128_finish(ctx, NULL, 0);
1612 memcpy(tag, ctx->Xi.c,
1613 len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1614 }
1615
CRYPTO_gcm128_new(void * key,block128_f block)1616 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1617 {
1618 GCM128_CONTEXT *ret;
1619
1620 if ((ret = OPENSSL_malloc(sizeof(*ret))) != NULL)
1621 CRYPTO_gcm128_init(ret, key, block);
1622
1623 return ret;
1624 }
1625
CRYPTO_gcm128_release(GCM128_CONTEXT * ctx)1626 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1627 {
1628 OPENSSL_clear_free(ctx, sizeof(*ctx));
1629 }
1630