1 /*
2 * Copyright 2010-2022 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the Apache License 2.0 (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
8 */
9
10 #include <string.h>
11 #include <openssl/crypto.h>
12 #include "internal/cryptlib.h"
13 #include "internal/endian.h"
14 #include "crypto/modes.h"
15
16 #if defined(__GNUC__) && !defined(STRICT_ALIGNMENT)
17 typedef size_t size_t_aX __attribute((__aligned__(1)));
18 #else
19 typedef size_t size_t_aX;
20 #endif
21
22 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
23 /* redefine, because alignment is ensured */
24 # undef GETU32
25 # define GETU32(p) BSWAP4(*(const u32 *)(p))
26 # undef PUTU32
27 # define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
28 #endif
29
30 /* RISC-V uses C implementation of gmult as a fallback. */
31 #if defined(__riscv)
32 # define INCLUDE_C_GMULT_4BIT
33 #endif
34
35 #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
36 #define REDUCE1BIT(V) do { \
37 if (sizeof(size_t)==8) { \
38 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
39 V.lo = (V.hi<<63)|(V.lo>>1); \
40 V.hi = (V.hi>>1 )^T; \
41 } \
42 else { \
43 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
44 V.lo = (V.hi<<63)|(V.lo>>1); \
45 V.hi = (V.hi>>1 )^((u64)T<<32); \
46 } \
47 } while(0)
48
49 /*-
50 *
51 * NOTE: TABLE_BITS and all non-4bit implmentations have been removed in 3.1.
52 *
53 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
54 * never be set to 8. 8 is effectively reserved for testing purposes.
55 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
56 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
57 * whole spectrum of possible table driven implementations. Why? In
58 * non-"Shoup's" case memory access pattern is segmented in such manner,
59 * that it's trivial to see that cache timing information can reveal
60 * fair portion of intermediate hash value. Given that ciphertext is
61 * always available to attacker, it's possible for him to attempt to
62 * deduce secret parameter H and if successful, tamper with messages
63 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
64 * not as trivial, but there is no reason to believe that it's resistant
65 * to cache-timing attack. And the thing about "8-bit" implementation is
66 * that it consumes 16 (sixteen) times more memory, 4KB per individual
67 * key + 1KB shared. Well, on pros side it should be twice as fast as
68 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
69 * was observed to run ~75% faster, closer to 100% for commercial
70 * compilers... Yet "4-bit" procedure is preferred, because it's
71 * believed to provide better security-performance balance and adequate
72 * all-round performance. "All-round" refers to things like:
73 *
74 * - shorter setup time effectively improves overall timing for
75 * handling short messages;
76 * - larger table allocation can become unbearable because of VM
77 * subsystem penalties (for example on Windows large enough free
78 * results in VM working set trimming, meaning that consequent
79 * malloc would immediately incur working set expansion);
80 * - larger table has larger cache footprint, which can affect
81 * performance of other code paths (not necessarily even from same
82 * thread in Hyper-Threading world);
83 *
84 * Value of 1 is not appropriate for performance reasons.
85 */
86
gcm_init_4bit(u128 Htable[16],const u64 H[2])87 static void gcm_init_4bit(u128 Htable[16], const u64 H[2])
88 {
89 u128 V;
90 # if defined(OPENSSL_SMALL_FOOTPRINT)
91 int i;
92 # endif
93
94 Htable[0].hi = 0;
95 Htable[0].lo = 0;
96 V.hi = H[0];
97 V.lo = H[1];
98
99 # if defined(OPENSSL_SMALL_FOOTPRINT)
100 for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
101 REDUCE1BIT(V);
102 Htable[i] = V;
103 }
104
105 for (i = 2; i < 16; i <<= 1) {
106 u128 *Hi = Htable + i;
107 int j;
108 for (V = *Hi, j = 1; j < i; ++j) {
109 Hi[j].hi = V.hi ^ Htable[j].hi;
110 Hi[j].lo = V.lo ^ Htable[j].lo;
111 }
112 }
113 # else
114 Htable[8] = V;
115 REDUCE1BIT(V);
116 Htable[4] = V;
117 REDUCE1BIT(V);
118 Htable[2] = V;
119 REDUCE1BIT(V);
120 Htable[1] = V;
121 Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
122 V = Htable[4];
123 Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
124 Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
125 Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
126 V = Htable[8];
127 Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
128 Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
129 Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
130 Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
131 Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
132 Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
133 Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
134 # endif
135 # if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
136 /*
137 * ARM assembler expects specific dword order in Htable.
138 */
139 {
140 int j;
141 DECLARE_IS_ENDIAN;
142
143 if (IS_LITTLE_ENDIAN)
144 for (j = 0; j < 16; ++j) {
145 V = Htable[j];
146 Htable[j].hi = V.lo;
147 Htable[j].lo = V.hi;
148 } else
149 for (j = 0; j < 16; ++j) {
150 V = Htable[j];
151 Htable[j].hi = V.lo << 32 | V.lo >> 32;
152 Htable[j].lo = V.hi << 32 | V.hi >> 32;
153 }
154 }
155 # endif
156 }
157
158 # if !defined(GHASH_ASM) || defined(INCLUDE_C_GMULT_4BIT)
159 static const size_t rem_4bit[16] = {
160 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
161 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
162 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
163 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
164 };
165
gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16])166 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
167 {
168 u128 Z;
169 int cnt = 15;
170 size_t rem, nlo, nhi;
171 DECLARE_IS_ENDIAN;
172
173 nlo = ((const u8 *)Xi)[15];
174 nhi = nlo >> 4;
175 nlo &= 0xf;
176
177 Z.hi = Htable[nlo].hi;
178 Z.lo = Htable[nlo].lo;
179
180 while (1) {
181 rem = (size_t)Z.lo & 0xf;
182 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
183 Z.hi = (Z.hi >> 4);
184 if (sizeof(size_t) == 8)
185 Z.hi ^= rem_4bit[rem];
186 else
187 Z.hi ^= (u64)rem_4bit[rem] << 32;
188
189 Z.hi ^= Htable[nhi].hi;
190 Z.lo ^= Htable[nhi].lo;
191
192 if (--cnt < 0)
193 break;
194
195 nlo = ((const u8 *)Xi)[cnt];
196 nhi = nlo >> 4;
197 nlo &= 0xf;
198
199 rem = (size_t)Z.lo & 0xf;
200 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
201 Z.hi = (Z.hi >> 4);
202 if (sizeof(size_t) == 8)
203 Z.hi ^= rem_4bit[rem];
204 else
205 Z.hi ^= (u64)rem_4bit[rem] << 32;
206
207 Z.hi ^= Htable[nlo].hi;
208 Z.lo ^= Htable[nlo].lo;
209 }
210
211 if (IS_LITTLE_ENDIAN) {
212 # ifdef BSWAP8
213 Xi[0] = BSWAP8(Z.hi);
214 Xi[1] = BSWAP8(Z.lo);
215 # else
216 u8 *p = (u8 *)Xi;
217 u32 v;
218 v = (u32)(Z.hi >> 32);
219 PUTU32(p, v);
220 v = (u32)(Z.hi);
221 PUTU32(p + 4, v);
222 v = (u32)(Z.lo >> 32);
223 PUTU32(p + 8, v);
224 v = (u32)(Z.lo);
225 PUTU32(p + 12, v);
226 # endif
227 } else {
228 Xi[0] = Z.hi;
229 Xi[1] = Z.lo;
230 }
231 }
232
233 # endif
234
235 # if !defined(GHASH_ASM)
236 # if !defined(OPENSSL_SMALL_FOOTPRINT)
237 /*
238 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
239 * details... Compiler-generated code doesn't seem to give any
240 * performance improvement, at least not on x86[_64]. It's here
241 * mostly as reference and a placeholder for possible future
242 * non-trivial optimization[s]...
243 */
gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 * inp,size_t len)244 static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
245 const u8 *inp, size_t len)
246 {
247 u128 Z;
248 int cnt;
249 size_t rem, nlo, nhi;
250 DECLARE_IS_ENDIAN;
251
252 do {
253 cnt = 15;
254 nlo = ((const u8 *)Xi)[15];
255 nlo ^= inp[15];
256 nhi = nlo >> 4;
257 nlo &= 0xf;
258
259 Z.hi = Htable[nlo].hi;
260 Z.lo = Htable[nlo].lo;
261
262 while (1) {
263 rem = (size_t)Z.lo & 0xf;
264 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
265 Z.hi = (Z.hi >> 4);
266 if (sizeof(size_t) == 8)
267 Z.hi ^= rem_4bit[rem];
268 else
269 Z.hi ^= (u64)rem_4bit[rem] << 32;
270
271 Z.hi ^= Htable[nhi].hi;
272 Z.lo ^= Htable[nhi].lo;
273
274 if (--cnt < 0)
275 break;
276
277 nlo = ((const u8 *)Xi)[cnt];
278 nlo ^= inp[cnt];
279 nhi = nlo >> 4;
280 nlo &= 0xf;
281
282 rem = (size_t)Z.lo & 0xf;
283 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
284 Z.hi = (Z.hi >> 4);
285 if (sizeof(size_t) == 8)
286 Z.hi ^= rem_4bit[rem];
287 else
288 Z.hi ^= (u64)rem_4bit[rem] << 32;
289
290 Z.hi ^= Htable[nlo].hi;
291 Z.lo ^= Htable[nlo].lo;
292 }
293
294 if (IS_LITTLE_ENDIAN) {
295 # ifdef BSWAP8
296 Xi[0] = BSWAP8(Z.hi);
297 Xi[1] = BSWAP8(Z.lo);
298 # else
299 u8 *p = (u8 *)Xi;
300 u32 v;
301 v = (u32)(Z.hi >> 32);
302 PUTU32(p, v);
303 v = (u32)(Z.hi);
304 PUTU32(p + 4, v);
305 v = (u32)(Z.lo >> 32);
306 PUTU32(p + 8, v);
307 v = (u32)(Z.lo);
308 PUTU32(p + 12, v);
309 # endif
310 } else {
311 Xi[0] = Z.hi;
312 Xi[1] = Z.lo;
313 }
314
315 inp += 16;
316 /* Block size is 128 bits so len is a multiple of 16 */
317 len -= 16;
318 } while (len > 0);
319 }
320 # endif
321 # else
322 void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
323 void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
324 size_t len);
325 # endif
326
327 # define GCM_MUL(ctx) ctx->funcs.gmult(ctx->Xi.u,ctx->Htable)
328 # if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
329 # define GHASH(ctx,in,len) ctx->funcs.ghash((ctx)->Xi.u,(ctx)->Htable,in,len)
330 /*
331 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
332 * effect. In other words idea is to hash data while it's still in L1 cache
333 * after encryption pass...
334 */
335 # define GHASH_CHUNK (3*1024)
336 # endif
337
338 #if (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
339 # if !defined(I386_ONLY) && \
340 (defined(__i386) || defined(__i386__) || \
341 defined(__x86_64) || defined(__x86_64__) || \
342 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
343 # define GHASH_ASM_X86_OR_64
344
345 void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
346 void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
347 void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
348 size_t len);
349
350 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
351 # define gcm_init_avx gcm_init_clmul
352 # define gcm_gmult_avx gcm_gmult_clmul
353 # define gcm_ghash_avx gcm_ghash_clmul
354 # else
355 void gcm_init_avx(u128 Htable[16], const u64 Xi[2]);
356 void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]);
357 void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
358 size_t len);
359 # endif
360
361 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
362 # define GHASH_ASM_X86
363 void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
364 void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
365 size_t len);
366
367 void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);
368 void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,
369 size_t len);
370 # endif
371 # elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
372 # include "arm_arch.h"
373 # if __ARM_MAX_ARCH__>=7
374 # define GHASH_ASM_ARM
375 # define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL)
376 # if defined(__arm__) || defined(__arm)
377 # define NEON_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON)
378 # endif
379 void gcm_init_neon(u128 Htable[16], const u64 Xi[2]);
380 void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
381 void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
382 size_t len);
383 void gcm_init_v8(u128 Htable[16], const u64 Xi[2]);
384 void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]);
385 void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
386 size_t len);
387 # endif
388 # elif defined(__sparc__) || defined(__sparc)
389 # include "crypto/sparc_arch.h"
390 # define GHASH_ASM_SPARC
391 void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]);
392 void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]);
393 void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp,
394 size_t len);
395 # elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
396 # include "crypto/ppc_arch.h"
397 # define GHASH_ASM_PPC
398 void gcm_init_p8(u128 Htable[16], const u64 Xi[2]);
399 void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]);
400 void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
401 size_t len);
402 # elif defined(OPENSSL_CPUID_OBJ) && defined(__riscv) && __riscv_xlen == 64
403 # include "crypto/riscv_arch.h"
404 # define GHASH_ASM_RISCV
405 # undef GHASH
406 void gcm_init_clmul_rv64i_zbb_zbc(u128 Htable[16], const u64 Xi[2]);
407 void gcm_gmult_clmul_rv64i_zbb_zbc(u64 Xi[2], const u128 Htable[16]);
408 # endif
409 #endif
410
gcm_get_funcs(struct gcm_funcs_st * ctx)411 static void gcm_get_funcs(struct gcm_funcs_st *ctx)
412 {
413 /* set defaults -- overridden below as needed */
414 ctx->ginit = gcm_init_4bit;
415 #if !defined(GHASH_ASM) || defined(INCLUDE_C_GMULT_4BIT)
416 ctx->gmult = gcm_gmult_4bit;
417 #else
418 ctx->gmult = NULL;
419 #endif
420 #if !defined(GHASH_ASM) && !defined(OPENSSL_SMALL_FOOTPRINT)
421 ctx->ghash = gcm_ghash_4bit;
422 #else
423 ctx->ghash = NULL;
424 #endif
425
426 #if defined(GHASH_ASM_X86_OR_64)
427 # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
428 /* x86_64 */
429 if (OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
430 if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
431 ctx->ginit = gcm_init_avx;
432 ctx->gmult = gcm_gmult_avx;
433 ctx->ghash = gcm_ghash_avx;
434 } else {
435 ctx->ginit = gcm_init_clmul;
436 ctx->gmult = gcm_gmult_clmul;
437 ctx->ghash = gcm_ghash_clmul;
438 }
439 return;
440 }
441 # endif
442 # if defined(GHASH_ASM_X86)
443 /* x86 only */
444 # if defined(OPENSSL_IA32_SSE2)
445 if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
446 ctx->gmult = gcm_gmult_4bit_mmx;
447 ctx->ghash = gcm_ghash_4bit_mmx;
448 return;
449 }
450 # else
451 if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */
452 ctx->gmult = gcm_gmult_4bit_mmx;
453 ctx->ghash = gcm_ghash_4bit_mmx;
454 return;
455 }
456 # endif
457 ctx->gmult = gcm_gmult_4bit_x86;
458 ctx->ghash = gcm_ghash_4bit_x86;
459 return;
460 # endif
461 #elif defined(GHASH_ASM_ARM)
462 /* ARM defaults */
463 ctx->gmult = gcm_gmult_4bit;
464 ctx->ghash = gcm_ghash_4bit;
465 # ifdef PMULL_CAPABLE
466 if (PMULL_CAPABLE) {
467 ctx->ginit = (gcm_init_fn)gcm_init_v8;
468 ctx->gmult = gcm_gmult_v8;
469 ctx->ghash = gcm_ghash_v8;
470 }
471 # elif defined(NEON_CAPABLE)
472 if (NEON_CAPABLE) {
473 ctx->ginit = gcm_init_neon;
474 ctx->gmult = gcm_gmult_neon;
475 ctx->ghash = gcm_ghash_neon;
476 }
477 # endif
478 return;
479 #elif defined(GHASH_ASM_SPARC)
480 /* SPARC defaults */
481 ctx->gmult = gcm_gmult_4bit;
482 ctx->ghash = gcm_ghash_4bit;
483 if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
484 ctx->ginit = gcm_init_vis3;
485 ctx->gmult = gcm_gmult_vis3;
486 ctx->ghash = gcm_ghash_vis3;
487 }
488 return;
489 #elif defined(GHASH_ASM_PPC)
490 /* PowerPC does not define GHASH_ASM; defaults set above */
491 if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
492 ctx->ginit = gcm_init_p8;
493 ctx->gmult = gcm_gmult_p8;
494 ctx->ghash = gcm_ghash_p8;
495 }
496 return;
497 #elif defined(GHASH_ASM_RISCV) && __riscv_xlen == 64
498 /* RISCV defaults; gmult already set above */
499 ctx->ghash = NULL;
500 if (RISCV_HAS_ZBB() && RISCV_HAS_ZBC()) {
501 ctx->ginit = gcm_init_clmul_rv64i_zbb_zbc;
502 ctx->gmult = gcm_gmult_clmul_rv64i_zbb_zbc;
503 }
504 return;
505 #elif defined(GHASH_ASM)
506 /* all other architectures use the generic names */
507 ctx->gmult = gcm_gmult_4bit;
508 ctx->ghash = gcm_ghash_4bit;
509 return;
510 #endif
511 }
512
ossl_gcm_init_4bit(u128 Htable[16],const u64 H[2])513 void ossl_gcm_init_4bit(u128 Htable[16], const u64 H[2])
514 {
515 struct gcm_funcs_st funcs;
516
517 gcm_get_funcs(&funcs);
518 funcs.ginit(Htable, H);
519 }
520
ossl_gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16])521 void ossl_gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
522 {
523 struct gcm_funcs_st funcs;
524
525 gcm_get_funcs(&funcs);
526 funcs.gmult(Xi, Htable);
527 }
528
ossl_gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 * inp,size_t len)529 void ossl_gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
530 const u8 *inp, size_t len)
531 {
532 struct gcm_funcs_st funcs;
533 u64 tmp[2];
534 size_t i;
535
536 gcm_get_funcs(&funcs);
537 if (funcs.ghash != NULL) {
538 funcs.ghash(Xi, Htable, inp, len);
539 } else {
540 /* Emulate ghash if needed */
541 for (i = 0; i < len; i += 16) {
542 memcpy(tmp, &inp[i], sizeof(tmp));
543 Xi[0] ^= tmp[0];
544 Xi[1] ^= tmp[1];
545 funcs.gmult(Xi, Htable);
546 }
547 }
548 }
549
CRYPTO_gcm128_init(GCM128_CONTEXT * ctx,void * key,block128_f block)550 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
551 {
552 DECLARE_IS_ENDIAN;
553
554 memset(ctx, 0, sizeof(*ctx));
555 ctx->block = block;
556 ctx->key = key;
557
558 (*block) (ctx->H.c, ctx->H.c, key);
559
560 if (IS_LITTLE_ENDIAN) {
561 /* H is stored in host byte order */
562 #ifdef BSWAP8
563 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
564 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
565 #else
566 u8 *p = ctx->H.c;
567 u64 hi, lo;
568 hi = (u64)GETU32(p) << 32 | GETU32(p + 4);
569 lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
570 ctx->H.u[0] = hi;
571 ctx->H.u[1] = lo;
572 #endif
573 }
574
575 gcm_get_funcs(&ctx->funcs);
576 ctx->funcs.ginit(ctx->Htable, ctx->H.u);
577 }
578
CRYPTO_gcm128_setiv(GCM128_CONTEXT * ctx,const unsigned char * iv,size_t len)579 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
580 size_t len)
581 {
582 DECLARE_IS_ENDIAN;
583 unsigned int ctr;
584
585 ctx->len.u[0] = 0; /* AAD length */
586 ctx->len.u[1] = 0; /* message length */
587 ctx->ares = 0;
588 ctx->mres = 0;
589
590 if (len == 12) {
591 memcpy(ctx->Yi.c, iv, 12);
592 ctx->Yi.c[12] = 0;
593 ctx->Yi.c[13] = 0;
594 ctx->Yi.c[14] = 0;
595 ctx->Yi.c[15] = 1;
596 ctr = 1;
597 } else {
598 size_t i;
599 u64 len0 = len;
600
601 /* Borrow ctx->Xi to calculate initial Yi */
602 ctx->Xi.u[0] = 0;
603 ctx->Xi.u[1] = 0;
604
605 while (len >= 16) {
606 for (i = 0; i < 16; ++i)
607 ctx->Xi.c[i] ^= iv[i];
608 GCM_MUL(ctx);
609 iv += 16;
610 len -= 16;
611 }
612 if (len) {
613 for (i = 0; i < len; ++i)
614 ctx->Xi.c[i] ^= iv[i];
615 GCM_MUL(ctx);
616 }
617 len0 <<= 3;
618 if (IS_LITTLE_ENDIAN) {
619 #ifdef BSWAP8
620 ctx->Xi.u[1] ^= BSWAP8(len0);
621 #else
622 ctx->Xi.c[8] ^= (u8)(len0 >> 56);
623 ctx->Xi.c[9] ^= (u8)(len0 >> 48);
624 ctx->Xi.c[10] ^= (u8)(len0 >> 40);
625 ctx->Xi.c[11] ^= (u8)(len0 >> 32);
626 ctx->Xi.c[12] ^= (u8)(len0 >> 24);
627 ctx->Xi.c[13] ^= (u8)(len0 >> 16);
628 ctx->Xi.c[14] ^= (u8)(len0 >> 8);
629 ctx->Xi.c[15] ^= (u8)(len0);
630 #endif
631 } else {
632 ctx->Xi.u[1] ^= len0;
633 }
634
635 GCM_MUL(ctx);
636
637 if (IS_LITTLE_ENDIAN)
638 #ifdef BSWAP4
639 ctr = BSWAP4(ctx->Xi.d[3]);
640 #else
641 ctr = GETU32(ctx->Xi.c + 12);
642 #endif
643 else
644 ctr = ctx->Xi.d[3];
645
646 /* Copy borrowed Xi to Yi */
647 ctx->Yi.u[0] = ctx->Xi.u[0];
648 ctx->Yi.u[1] = ctx->Xi.u[1];
649 }
650
651 ctx->Xi.u[0] = 0;
652 ctx->Xi.u[1] = 0;
653
654 (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key);
655 ++ctr;
656 if (IS_LITTLE_ENDIAN)
657 #ifdef BSWAP4
658 ctx->Yi.d[3] = BSWAP4(ctr);
659 #else
660 PUTU32(ctx->Yi.c + 12, ctr);
661 #endif
662 else
663 ctx->Yi.d[3] = ctr;
664 }
665
CRYPTO_gcm128_aad(GCM128_CONTEXT * ctx,const unsigned char * aad,size_t len)666 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
667 size_t len)
668 {
669 size_t i;
670 unsigned int n;
671 u64 alen = ctx->len.u[0];
672
673 if (ctx->len.u[1])
674 return -2;
675
676 alen += len;
677 if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
678 return -1;
679 ctx->len.u[0] = alen;
680
681 n = ctx->ares;
682 if (n) {
683 while (n && len) {
684 ctx->Xi.c[n] ^= *(aad++);
685 --len;
686 n = (n + 1) % 16;
687 }
688 if (n == 0)
689 GCM_MUL(ctx);
690 else {
691 ctx->ares = n;
692 return 0;
693 }
694 }
695 #ifdef GHASH
696 if ((i = (len & (size_t)-16))) {
697 GHASH(ctx, aad, i);
698 aad += i;
699 len -= i;
700 }
701 #else
702 while (len >= 16) {
703 for (i = 0; i < 16; ++i)
704 ctx->Xi.c[i] ^= aad[i];
705 GCM_MUL(ctx);
706 aad += 16;
707 len -= 16;
708 }
709 #endif
710 if (len) {
711 n = (unsigned int)len;
712 for (i = 0; i < len; ++i)
713 ctx->Xi.c[i] ^= aad[i];
714 }
715
716 ctx->ares = n;
717 return 0;
718 }
719
CRYPTO_gcm128_encrypt(GCM128_CONTEXT * ctx,const unsigned char * in,unsigned char * out,size_t len)720 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
721 const unsigned char *in, unsigned char *out,
722 size_t len)
723 {
724 DECLARE_IS_ENDIAN;
725 unsigned int n, ctr, mres;
726 size_t i;
727 u64 mlen = ctx->len.u[1];
728 block128_f block = ctx->block;
729 void *key = ctx->key;
730
731 mlen += len;
732 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
733 return -1;
734 ctx->len.u[1] = mlen;
735
736 mres = ctx->mres;
737
738 if (ctx->ares) {
739 /* First call to encrypt finalizes GHASH(AAD) */
740 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
741 if (len == 0) {
742 GCM_MUL(ctx);
743 ctx->ares = 0;
744 return 0;
745 }
746 memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
747 ctx->Xi.u[0] = 0;
748 ctx->Xi.u[1] = 0;
749 mres = sizeof(ctx->Xi);
750 #else
751 GCM_MUL(ctx);
752 #endif
753 ctx->ares = 0;
754 }
755
756 if (IS_LITTLE_ENDIAN)
757 #ifdef BSWAP4
758 ctr = BSWAP4(ctx->Yi.d[3]);
759 #else
760 ctr = GETU32(ctx->Yi.c + 12);
761 #endif
762 else
763 ctr = ctx->Yi.d[3];
764
765 n = mres % 16;
766 #if !defined(OPENSSL_SMALL_FOOTPRINT)
767 if (16 % sizeof(size_t) == 0) { /* always true actually */
768 do {
769 if (n) {
770 # if defined(GHASH)
771 while (n && len) {
772 ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
773 --len;
774 n = (n + 1) % 16;
775 }
776 if (n == 0) {
777 GHASH(ctx, ctx->Xn, mres);
778 mres = 0;
779 } else {
780 ctx->mres = mres;
781 return 0;
782 }
783 # else
784 while (n && len) {
785 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
786 --len;
787 n = (n + 1) % 16;
788 }
789 if (n == 0) {
790 GCM_MUL(ctx);
791 mres = 0;
792 } else {
793 ctx->mres = n;
794 return 0;
795 }
796 # endif
797 }
798 # if defined(STRICT_ALIGNMENT)
799 if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
800 break;
801 # endif
802 # if defined(GHASH)
803 if (len >= 16 && mres) {
804 GHASH(ctx, ctx->Xn, mres);
805 mres = 0;
806 }
807 # if defined(GHASH_CHUNK)
808 while (len >= GHASH_CHUNK) {
809 size_t j = GHASH_CHUNK;
810
811 while (j) {
812 size_t_aX *out_t = (size_t_aX *)out;
813 const size_t_aX *in_t = (const size_t_aX *)in;
814
815 (*block) (ctx->Yi.c, ctx->EKi.c, key);
816 ++ctr;
817 if (IS_LITTLE_ENDIAN)
818 # ifdef BSWAP4
819 ctx->Yi.d[3] = BSWAP4(ctr);
820 # else
821 PUTU32(ctx->Yi.c + 12, ctr);
822 # endif
823 else
824 ctx->Yi.d[3] = ctr;
825 for (i = 0; i < 16 / sizeof(size_t); ++i)
826 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
827 out += 16;
828 in += 16;
829 j -= 16;
830 }
831 GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
832 len -= GHASH_CHUNK;
833 }
834 # endif
835 if ((i = (len & (size_t)-16))) {
836 size_t j = i;
837
838 while (len >= 16) {
839 size_t_aX *out_t = (size_t_aX *)out;
840 const size_t_aX *in_t = (const size_t_aX *)in;
841
842 (*block) (ctx->Yi.c, ctx->EKi.c, key);
843 ++ctr;
844 if (IS_LITTLE_ENDIAN)
845 # ifdef BSWAP4
846 ctx->Yi.d[3] = BSWAP4(ctr);
847 # else
848 PUTU32(ctx->Yi.c + 12, ctr);
849 # endif
850 else
851 ctx->Yi.d[3] = ctr;
852 for (i = 0; i < 16 / sizeof(size_t); ++i)
853 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
854 out += 16;
855 in += 16;
856 len -= 16;
857 }
858 GHASH(ctx, out - j, j);
859 }
860 # else
861 while (len >= 16) {
862 size_t *out_t = (size_t *)out;
863 const size_t *in_t = (const size_t *)in;
864
865 (*block) (ctx->Yi.c, ctx->EKi.c, key);
866 ++ctr;
867 if (IS_LITTLE_ENDIAN)
868 # ifdef BSWAP4
869 ctx->Yi.d[3] = BSWAP4(ctr);
870 # else
871 PUTU32(ctx->Yi.c + 12, ctr);
872 # endif
873 else
874 ctx->Yi.d[3] = ctr;
875 for (i = 0; i < 16 / sizeof(size_t); ++i)
876 ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
877 GCM_MUL(ctx);
878 out += 16;
879 in += 16;
880 len -= 16;
881 }
882 # endif
883 if (len) {
884 (*block) (ctx->Yi.c, ctx->EKi.c, key);
885 ++ctr;
886 if (IS_LITTLE_ENDIAN)
887 # ifdef BSWAP4
888 ctx->Yi.d[3] = BSWAP4(ctr);
889 # else
890 PUTU32(ctx->Yi.c + 12, ctr);
891 # endif
892 else
893 ctx->Yi.d[3] = ctr;
894 # if defined(GHASH)
895 while (len--) {
896 ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
897 ++n;
898 }
899 # else
900 while (len--) {
901 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
902 ++n;
903 }
904 mres = n;
905 # endif
906 }
907
908 ctx->mres = mres;
909 return 0;
910 } while (0);
911 }
912 #endif
913 for (i = 0; i < len; ++i) {
914 if (n == 0) {
915 (*block) (ctx->Yi.c, ctx->EKi.c, key);
916 ++ctr;
917 if (IS_LITTLE_ENDIAN)
918 #ifdef BSWAP4
919 ctx->Yi.d[3] = BSWAP4(ctr);
920 #else
921 PUTU32(ctx->Yi.c + 12, ctr);
922 #endif
923 else
924 ctx->Yi.d[3] = ctr;
925 }
926 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
927 ctx->Xn[mres++] = out[i] = in[i] ^ ctx->EKi.c[n];
928 n = (n + 1) % 16;
929 if (mres == sizeof(ctx->Xn)) {
930 GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
931 mres = 0;
932 }
933 #else
934 ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
935 mres = n = (n + 1) % 16;
936 if (n == 0)
937 GCM_MUL(ctx);
938 #endif
939 }
940
941 ctx->mres = mres;
942 return 0;
943 }
944
CRYPTO_gcm128_decrypt(GCM128_CONTEXT * ctx,const unsigned char * in,unsigned char * out,size_t len)945 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
946 const unsigned char *in, unsigned char *out,
947 size_t len)
948 {
949 DECLARE_IS_ENDIAN;
950 unsigned int n, ctr, mres;
951 size_t i;
952 u64 mlen = ctx->len.u[1];
953 block128_f block = ctx->block;
954 void *key = ctx->key;
955
956 mlen += len;
957 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
958 return -1;
959 ctx->len.u[1] = mlen;
960
961 mres = ctx->mres;
962
963 if (ctx->ares) {
964 /* First call to decrypt finalizes GHASH(AAD) */
965 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
966 if (len == 0) {
967 GCM_MUL(ctx);
968 ctx->ares = 0;
969 return 0;
970 }
971 memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
972 ctx->Xi.u[0] = 0;
973 ctx->Xi.u[1] = 0;
974 mres = sizeof(ctx->Xi);
975 #else
976 GCM_MUL(ctx);
977 #endif
978 ctx->ares = 0;
979 }
980
981 if (IS_LITTLE_ENDIAN)
982 #ifdef BSWAP4
983 ctr = BSWAP4(ctx->Yi.d[3]);
984 #else
985 ctr = GETU32(ctx->Yi.c + 12);
986 #endif
987 else
988 ctr = ctx->Yi.d[3];
989
990 n = mres % 16;
991 #if !defined(OPENSSL_SMALL_FOOTPRINT)
992 if (16 % sizeof(size_t) == 0) { /* always true actually */
993 do {
994 if (n) {
995 # if defined(GHASH)
996 while (n && len) {
997 *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
998 --len;
999 n = (n + 1) % 16;
1000 }
1001 if (n == 0) {
1002 GHASH(ctx, ctx->Xn, mres);
1003 mres = 0;
1004 } else {
1005 ctx->mres = mres;
1006 return 0;
1007 }
1008 # else
1009 while (n && len) {
1010 u8 c = *(in++);
1011 *(out++) = c ^ ctx->EKi.c[n];
1012 ctx->Xi.c[n] ^= c;
1013 --len;
1014 n = (n + 1) % 16;
1015 }
1016 if (n == 0) {
1017 GCM_MUL(ctx);
1018 mres = 0;
1019 } else {
1020 ctx->mres = n;
1021 return 0;
1022 }
1023 # endif
1024 }
1025 # if defined(STRICT_ALIGNMENT)
1026 if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1027 break;
1028 # endif
1029 # if defined(GHASH)
1030 if (len >= 16 && mres) {
1031 GHASH(ctx, ctx->Xn, mres);
1032 mres = 0;
1033 }
1034 # if defined(GHASH_CHUNK)
1035 while (len >= GHASH_CHUNK) {
1036 size_t j = GHASH_CHUNK;
1037
1038 GHASH(ctx, in, GHASH_CHUNK);
1039 while (j) {
1040 size_t_aX *out_t = (size_t_aX *)out;
1041 const size_t_aX *in_t = (const size_t_aX *)in;
1042
1043 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1044 ++ctr;
1045 if (IS_LITTLE_ENDIAN)
1046 # ifdef BSWAP4
1047 ctx->Yi.d[3] = BSWAP4(ctr);
1048 # else
1049 PUTU32(ctx->Yi.c + 12, ctr);
1050 # endif
1051 else
1052 ctx->Yi.d[3] = ctr;
1053 for (i = 0; i < 16 / sizeof(size_t); ++i)
1054 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1055 out += 16;
1056 in += 16;
1057 j -= 16;
1058 }
1059 len -= GHASH_CHUNK;
1060 }
1061 # endif
1062 if ((i = (len & (size_t)-16))) {
1063 GHASH(ctx, in, i);
1064 while (len >= 16) {
1065 size_t_aX *out_t = (size_t_aX *)out;
1066 const size_t_aX *in_t = (const size_t_aX *)in;
1067
1068 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1069 ++ctr;
1070 if (IS_LITTLE_ENDIAN)
1071 # ifdef BSWAP4
1072 ctx->Yi.d[3] = BSWAP4(ctr);
1073 # else
1074 PUTU32(ctx->Yi.c + 12, ctr);
1075 # endif
1076 else
1077 ctx->Yi.d[3] = ctr;
1078 for (i = 0; i < 16 / sizeof(size_t); ++i)
1079 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1080 out += 16;
1081 in += 16;
1082 len -= 16;
1083 }
1084 }
1085 # else
1086 while (len >= 16) {
1087 size_t *out_t = (size_t *)out;
1088 const size_t *in_t = (const size_t *)in;
1089
1090 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1091 ++ctr;
1092 if (IS_LITTLE_ENDIAN)
1093 # ifdef BSWAP4
1094 ctx->Yi.d[3] = BSWAP4(ctr);
1095 # else
1096 PUTU32(ctx->Yi.c + 12, ctr);
1097 # endif
1098 else
1099 ctx->Yi.d[3] = ctr;
1100 for (i = 0; i < 16 / sizeof(size_t); ++i) {
1101 size_t c = in_t[i];
1102 out_t[i] = c ^ ctx->EKi.t[i];
1103 ctx->Xi.t[i] ^= c;
1104 }
1105 GCM_MUL(ctx);
1106 out += 16;
1107 in += 16;
1108 len -= 16;
1109 }
1110 # endif
1111 if (len) {
1112 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1113 ++ctr;
1114 if (IS_LITTLE_ENDIAN)
1115 # ifdef BSWAP4
1116 ctx->Yi.d[3] = BSWAP4(ctr);
1117 # else
1118 PUTU32(ctx->Yi.c + 12, ctr);
1119 # endif
1120 else
1121 ctx->Yi.d[3] = ctr;
1122 # if defined(GHASH)
1123 while (len--) {
1124 out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1125 ++n;
1126 }
1127 # else
1128 while (len--) {
1129 u8 c = in[n];
1130 ctx->Xi.c[n] ^= c;
1131 out[n] = c ^ ctx->EKi.c[n];
1132 ++n;
1133 }
1134 mres = n;
1135 # endif
1136 }
1137
1138 ctx->mres = mres;
1139 return 0;
1140 } while (0);
1141 }
1142 #endif
1143 for (i = 0; i < len; ++i) {
1144 u8 c;
1145 if (n == 0) {
1146 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1147 ++ctr;
1148 if (IS_LITTLE_ENDIAN)
1149 #ifdef BSWAP4
1150 ctx->Yi.d[3] = BSWAP4(ctr);
1151 #else
1152 PUTU32(ctx->Yi.c + 12, ctr);
1153 #endif
1154 else
1155 ctx->Yi.d[3] = ctr;
1156 }
1157 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1158 out[i] = (ctx->Xn[mres++] = c = in[i]) ^ ctx->EKi.c[n];
1159 n = (n + 1) % 16;
1160 if (mres == sizeof(ctx->Xn)) {
1161 GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
1162 mres = 0;
1163 }
1164 #else
1165 c = in[i];
1166 out[i] = c ^ ctx->EKi.c[n];
1167 ctx->Xi.c[n] ^= c;
1168 mres = n = (n + 1) % 16;
1169 if (n == 0)
1170 GCM_MUL(ctx);
1171 #endif
1172 }
1173
1174 ctx->mres = mres;
1175 return 0;
1176 }
1177
CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT * ctx,const unsigned char * in,unsigned char * out,size_t len,ctr128_f stream)1178 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1179 const unsigned char *in, unsigned char *out,
1180 size_t len, ctr128_f stream)
1181 {
1182 #if defined(OPENSSL_SMALL_FOOTPRINT)
1183 return CRYPTO_gcm128_encrypt(ctx, in, out, len);
1184 #else
1185 DECLARE_IS_ENDIAN;
1186 unsigned int n, ctr, mres;
1187 size_t i;
1188 u64 mlen = ctx->len.u[1];
1189 void *key = ctx->key;
1190
1191 mlen += len;
1192 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1193 return -1;
1194 ctx->len.u[1] = mlen;
1195
1196 mres = ctx->mres;
1197
1198 if (ctx->ares) {
1199 /* First call to encrypt finalizes GHASH(AAD) */
1200 #if defined(GHASH)
1201 if (len == 0) {
1202 GCM_MUL(ctx);
1203 ctx->ares = 0;
1204 return 0;
1205 }
1206 memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1207 ctx->Xi.u[0] = 0;
1208 ctx->Xi.u[1] = 0;
1209 mres = sizeof(ctx->Xi);
1210 #else
1211 GCM_MUL(ctx);
1212 #endif
1213 ctx->ares = 0;
1214 }
1215
1216 if (IS_LITTLE_ENDIAN)
1217 # ifdef BSWAP4
1218 ctr = BSWAP4(ctx->Yi.d[3]);
1219 # else
1220 ctr = GETU32(ctx->Yi.c + 12);
1221 # endif
1222 else
1223 ctr = ctx->Yi.d[3];
1224
1225 n = mres % 16;
1226 if (n) {
1227 # if defined(GHASH)
1228 while (n && len) {
1229 ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
1230 --len;
1231 n = (n + 1) % 16;
1232 }
1233 if (n == 0) {
1234 GHASH(ctx, ctx->Xn, mres);
1235 mres = 0;
1236 } else {
1237 ctx->mres = mres;
1238 return 0;
1239 }
1240 # else
1241 while (n && len) {
1242 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1243 --len;
1244 n = (n + 1) % 16;
1245 }
1246 if (n == 0) {
1247 GCM_MUL(ctx);
1248 mres = 0;
1249 } else {
1250 ctx->mres = n;
1251 return 0;
1252 }
1253 # endif
1254 }
1255 # if defined(GHASH)
1256 if (len >= 16 && mres) {
1257 GHASH(ctx, ctx->Xn, mres);
1258 mres = 0;
1259 }
1260 # if defined(GHASH_CHUNK)
1261 while (len >= GHASH_CHUNK) {
1262 (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1263 ctr += GHASH_CHUNK / 16;
1264 if (IS_LITTLE_ENDIAN)
1265 # ifdef BSWAP4
1266 ctx->Yi.d[3] = BSWAP4(ctr);
1267 # else
1268 PUTU32(ctx->Yi.c + 12, ctr);
1269 # endif
1270 else
1271 ctx->Yi.d[3] = ctr;
1272 GHASH(ctx, out, GHASH_CHUNK);
1273 out += GHASH_CHUNK;
1274 in += GHASH_CHUNK;
1275 len -= GHASH_CHUNK;
1276 }
1277 # endif
1278 # endif
1279 if ((i = (len & (size_t)-16))) {
1280 size_t j = i / 16;
1281
1282 (*stream) (in, out, j, key, ctx->Yi.c);
1283 ctr += (unsigned int)j;
1284 if (IS_LITTLE_ENDIAN)
1285 # ifdef BSWAP4
1286 ctx->Yi.d[3] = BSWAP4(ctr);
1287 # else
1288 PUTU32(ctx->Yi.c + 12, ctr);
1289 # endif
1290 else
1291 ctx->Yi.d[3] = ctr;
1292 in += i;
1293 len -= i;
1294 # if defined(GHASH)
1295 GHASH(ctx, out, i);
1296 out += i;
1297 # else
1298 while (j--) {
1299 for (i = 0; i < 16; ++i)
1300 ctx->Xi.c[i] ^= out[i];
1301 GCM_MUL(ctx);
1302 out += 16;
1303 }
1304 # endif
1305 }
1306 if (len) {
1307 (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1308 ++ctr;
1309 if (IS_LITTLE_ENDIAN)
1310 # ifdef BSWAP4
1311 ctx->Yi.d[3] = BSWAP4(ctr);
1312 # else
1313 PUTU32(ctx->Yi.c + 12, ctr);
1314 # endif
1315 else
1316 ctx->Yi.d[3] = ctr;
1317 while (len--) {
1318 # if defined(GHASH)
1319 ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
1320 # else
1321 ctx->Xi.c[mres++] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1322 # endif
1323 ++n;
1324 }
1325 }
1326
1327 ctx->mres = mres;
1328 return 0;
1329 #endif
1330 }
1331
CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT * ctx,const unsigned char * in,unsigned char * out,size_t len,ctr128_f stream)1332 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1333 const unsigned char *in, unsigned char *out,
1334 size_t len, ctr128_f stream)
1335 {
1336 #if defined(OPENSSL_SMALL_FOOTPRINT)
1337 return CRYPTO_gcm128_decrypt(ctx, in, out, len);
1338 #else
1339 DECLARE_IS_ENDIAN;
1340 unsigned int n, ctr, mres;
1341 size_t i;
1342 u64 mlen = ctx->len.u[1];
1343 void *key = ctx->key;
1344
1345 mlen += len;
1346 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1347 return -1;
1348 ctx->len.u[1] = mlen;
1349
1350 mres = ctx->mres;
1351
1352 if (ctx->ares) {
1353 /* First call to decrypt finalizes GHASH(AAD) */
1354 # if defined(GHASH)
1355 if (len == 0) {
1356 GCM_MUL(ctx);
1357 ctx->ares = 0;
1358 return 0;
1359 }
1360 memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1361 ctx->Xi.u[0] = 0;
1362 ctx->Xi.u[1] = 0;
1363 mres = sizeof(ctx->Xi);
1364 # else
1365 GCM_MUL(ctx);
1366 # endif
1367 ctx->ares = 0;
1368 }
1369
1370 if (IS_LITTLE_ENDIAN)
1371 # ifdef BSWAP4
1372 ctr = BSWAP4(ctx->Yi.d[3]);
1373 # else
1374 ctr = GETU32(ctx->Yi.c + 12);
1375 # endif
1376 else
1377 ctr = ctx->Yi.d[3];
1378
1379 n = mres % 16;
1380 if (n) {
1381 # if defined(GHASH)
1382 while (n && len) {
1383 *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1384 --len;
1385 n = (n + 1) % 16;
1386 }
1387 if (n == 0) {
1388 GHASH(ctx, ctx->Xn, mres);
1389 mres = 0;
1390 } else {
1391 ctx->mres = mres;
1392 return 0;
1393 }
1394 # else
1395 while (n && len) {
1396 u8 c = *(in++);
1397 *(out++) = c ^ ctx->EKi.c[n];
1398 ctx->Xi.c[n] ^= c;
1399 --len;
1400 n = (n + 1) % 16;
1401 }
1402 if (n == 0) {
1403 GCM_MUL(ctx);
1404 mres = 0;
1405 } else {
1406 ctx->mres = n;
1407 return 0;
1408 }
1409 # endif
1410 }
1411 # if defined(GHASH)
1412 if (len >= 16 && mres) {
1413 GHASH(ctx, ctx->Xn, mres);
1414 mres = 0;
1415 }
1416 # if defined(GHASH_CHUNK)
1417 while (len >= GHASH_CHUNK) {
1418 GHASH(ctx, in, GHASH_CHUNK);
1419 (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1420 ctr += GHASH_CHUNK / 16;
1421 if (IS_LITTLE_ENDIAN)
1422 # ifdef BSWAP4
1423 ctx->Yi.d[3] = BSWAP4(ctr);
1424 # else
1425 PUTU32(ctx->Yi.c + 12, ctr);
1426 # endif
1427 else
1428 ctx->Yi.d[3] = ctr;
1429 out += GHASH_CHUNK;
1430 in += GHASH_CHUNK;
1431 len -= GHASH_CHUNK;
1432 }
1433 # endif
1434 # endif
1435 if ((i = (len & (size_t)-16))) {
1436 size_t j = i / 16;
1437
1438 # if defined(GHASH)
1439 GHASH(ctx, in, i);
1440 # else
1441 while (j--) {
1442 size_t k;
1443 for (k = 0; k < 16; ++k)
1444 ctx->Xi.c[k] ^= in[k];
1445 GCM_MUL(ctx);
1446 in += 16;
1447 }
1448 j = i / 16;
1449 in -= i;
1450 # endif
1451 (*stream) (in, out, j, key, ctx->Yi.c);
1452 ctr += (unsigned int)j;
1453 if (IS_LITTLE_ENDIAN)
1454 # ifdef BSWAP4
1455 ctx->Yi.d[3] = BSWAP4(ctr);
1456 # else
1457 PUTU32(ctx->Yi.c + 12, ctr);
1458 # endif
1459 else
1460 ctx->Yi.d[3] = ctr;
1461 out += i;
1462 in += i;
1463 len -= i;
1464 }
1465 if (len) {
1466 (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1467 ++ctr;
1468 if (IS_LITTLE_ENDIAN)
1469 # ifdef BSWAP4
1470 ctx->Yi.d[3] = BSWAP4(ctr);
1471 # else
1472 PUTU32(ctx->Yi.c + 12, ctr);
1473 # endif
1474 else
1475 ctx->Yi.d[3] = ctr;
1476 while (len--) {
1477 # if defined(GHASH)
1478 out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1479 # else
1480 u8 c = in[n];
1481 ctx->Xi.c[mres++] ^= c;
1482 out[n] = c ^ ctx->EKi.c[n];
1483 # endif
1484 ++n;
1485 }
1486 }
1487
1488 ctx->mres = mres;
1489 return 0;
1490 #endif
1491 }
1492
CRYPTO_gcm128_finish(GCM128_CONTEXT * ctx,const unsigned char * tag,size_t len)1493 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
1494 size_t len)
1495 {
1496 DECLARE_IS_ENDIAN;
1497 u64 alen = ctx->len.u[0] << 3;
1498 u64 clen = ctx->len.u[1] << 3;
1499
1500 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1501 u128 bitlen;
1502 unsigned int mres = ctx->mres;
1503
1504 if (mres) {
1505 unsigned blocks = (mres + 15) & -16;
1506
1507 memset(ctx->Xn + mres, 0, blocks - mres);
1508 mres = blocks;
1509 if (mres == sizeof(ctx->Xn)) {
1510 GHASH(ctx, ctx->Xn, mres);
1511 mres = 0;
1512 }
1513 } else if (ctx->ares) {
1514 GCM_MUL(ctx);
1515 }
1516 #else
1517 if (ctx->mres || ctx->ares)
1518 GCM_MUL(ctx);
1519 #endif
1520
1521 if (IS_LITTLE_ENDIAN) {
1522 #ifdef BSWAP8
1523 alen = BSWAP8(alen);
1524 clen = BSWAP8(clen);
1525 #else
1526 u8 *p = ctx->len.c;
1527
1528 ctx->len.u[0] = alen;
1529 ctx->len.u[1] = clen;
1530
1531 alen = (u64)GETU32(p) << 32 | GETU32(p + 4);
1532 clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
1533 #endif
1534 }
1535
1536 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1537 bitlen.hi = alen;
1538 bitlen.lo = clen;
1539 memcpy(ctx->Xn + mres, &bitlen, sizeof(bitlen));
1540 mres += sizeof(bitlen);
1541 GHASH(ctx, ctx->Xn, mres);
1542 #else
1543 ctx->Xi.u[0] ^= alen;
1544 ctx->Xi.u[1] ^= clen;
1545 GCM_MUL(ctx);
1546 #endif
1547
1548 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1549 ctx->Xi.u[1] ^= ctx->EK0.u[1];
1550
1551 if (tag && len <= sizeof(ctx->Xi))
1552 return CRYPTO_memcmp(ctx->Xi.c, tag, len);
1553 else
1554 return -1;
1555 }
1556
CRYPTO_gcm128_tag(GCM128_CONTEXT * ctx,unsigned char * tag,size_t len)1557 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1558 {
1559 CRYPTO_gcm128_finish(ctx, NULL, 0);
1560 memcpy(tag, ctx->Xi.c,
1561 len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1562 }
1563
CRYPTO_gcm128_new(void * key,block128_f block)1564 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1565 {
1566 GCM128_CONTEXT *ret;
1567
1568 if ((ret = OPENSSL_malloc(sizeof(*ret))) != NULL)
1569 CRYPTO_gcm128_init(ret, key, block);
1570
1571 return ret;
1572 }
1573
CRYPTO_gcm128_release(GCM128_CONTEXT * ctx)1574 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1575 {
1576 OPENSSL_clear_free(ctx, sizeof(*ctx));
1577 }
1578