xref: /openssl/crypto/modes/gcm128.c (revision 0113ec84)
1 /*
2  * Copyright 2010-2022 The OpenSSL Project Authors. All Rights Reserved.
3  *
4  * Licensed under the Apache License 2.0 (the "License").  You may not use
5  * this file except in compliance with the License.  You can obtain a copy
6  * in the file LICENSE in the source distribution or at
7  * https://www.openssl.org/source/license.html
8  */
9 
10 #include <string.h>
11 #include <openssl/crypto.h>
12 #include "internal/cryptlib.h"
13 #include "internal/endian.h"
14 #include "crypto/modes.h"
15 
16 #if defined(__GNUC__) && !defined(STRICT_ALIGNMENT)
17 typedef size_t size_t_aX __attribute((__aligned__(1)));
18 #else
19 typedef size_t size_t_aX;
20 #endif
21 
22 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
23 /* redefine, because alignment is ensured */
24 # undef  GETU32
25 # define GETU32(p)       BSWAP4(*(const u32 *)(p))
26 # undef  PUTU32
27 # define PUTU32(p,v)     *(u32 *)(p) = BSWAP4(v)
28 #endif
29 
30 /* RISC-V uses C implementation of gmult as a fallback. */
31 #if defined(__riscv)
32 # define INCLUDE_C_GMULT_4BIT
33 #endif
34 
35 #define PACK(s)         ((size_t)(s)<<(sizeof(size_t)*8-16))
36 #define REDUCE1BIT(V)   do { \
37         if (sizeof(size_t)==8) { \
38                 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
39                 V.lo  = (V.hi<<63)|(V.lo>>1); \
40                 V.hi  = (V.hi>>1 )^T; \
41         } \
42         else { \
43                 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
44                 V.lo  = (V.hi<<63)|(V.lo>>1); \
45                 V.hi  = (V.hi>>1 )^((u64)T<<32); \
46         } \
47 } while(0)
48 
49 /*-
50  *
51  * NOTE: TABLE_BITS and all non-4bit implmentations have been removed in 3.1.
52  *
53  * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
54  * never be set to 8. 8 is effectively reserved for testing purposes.
55  * TABLE_BITS>1 are lookup-table-driven implementations referred to as
56  * "Shoup's" in GCM specification. In other words OpenSSL does not cover
57  * whole spectrum of possible table driven implementations. Why? In
58  * non-"Shoup's" case memory access pattern is segmented in such manner,
59  * that it's trivial to see that cache timing information can reveal
60  * fair portion of intermediate hash value. Given that ciphertext is
61  * always available to attacker, it's possible for him to attempt to
62  * deduce secret parameter H and if successful, tamper with messages
63  * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
64  * not as trivial, but there is no reason to believe that it's resistant
65  * to cache-timing attack. And the thing about "8-bit" implementation is
66  * that it consumes 16 (sixteen) times more memory, 4KB per individual
67  * key + 1KB shared. Well, on pros side it should be twice as fast as
68  * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
69  * was observed to run ~75% faster, closer to 100% for commercial
70  * compilers... Yet "4-bit" procedure is preferred, because it's
71  * believed to provide better security-performance balance and adequate
72  * all-round performance. "All-round" refers to things like:
73  *
74  * - shorter setup time effectively improves overall timing for
75  *   handling short messages;
76  * - larger table allocation can become unbearable because of VM
77  *   subsystem penalties (for example on Windows large enough free
78  *   results in VM working set trimming, meaning that consequent
79  *   malloc would immediately incur working set expansion);
80  * - larger table has larger cache footprint, which can affect
81  *   performance of other code paths (not necessarily even from same
82  *   thread in Hyper-Threading world);
83  *
84  * Value of 1 is not appropriate for performance reasons.
85  */
86 
gcm_init_4bit(u128 Htable[16],const u64 H[2])87 static void gcm_init_4bit(u128 Htable[16], const u64 H[2])
88 {
89     u128 V;
90 # if defined(OPENSSL_SMALL_FOOTPRINT)
91     int i;
92 # endif
93 
94     Htable[0].hi = 0;
95     Htable[0].lo = 0;
96     V.hi = H[0];
97     V.lo = H[1];
98 
99 # if defined(OPENSSL_SMALL_FOOTPRINT)
100     for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
101         REDUCE1BIT(V);
102         Htable[i] = V;
103     }
104 
105     for (i = 2; i < 16; i <<= 1) {
106         u128 *Hi = Htable + i;
107         int j;
108         for (V = *Hi, j = 1; j < i; ++j) {
109             Hi[j].hi = V.hi ^ Htable[j].hi;
110             Hi[j].lo = V.lo ^ Htable[j].lo;
111         }
112     }
113 # else
114     Htable[8] = V;
115     REDUCE1BIT(V);
116     Htable[4] = V;
117     REDUCE1BIT(V);
118     Htable[2] = V;
119     REDUCE1BIT(V);
120     Htable[1] = V;
121     Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
122     V = Htable[4];
123     Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
124     Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
125     Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
126     V = Htable[8];
127     Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
128     Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
129     Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
130     Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
131     Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
132     Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
133     Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
134 # endif
135 # if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
136     /*
137      * ARM assembler expects specific dword order in Htable.
138      */
139     {
140         int j;
141         DECLARE_IS_ENDIAN;
142 
143         if (IS_LITTLE_ENDIAN)
144             for (j = 0; j < 16; ++j) {
145                 V = Htable[j];
146                 Htable[j].hi = V.lo;
147                 Htable[j].lo = V.hi;
148         } else
149             for (j = 0; j < 16; ++j) {
150                 V = Htable[j];
151                 Htable[j].hi = V.lo << 32 | V.lo >> 32;
152                 Htable[j].lo = V.hi << 32 | V.hi >> 32;
153             }
154     }
155 # endif
156 }
157 
158 # if !defined(GHASH_ASM) || defined(INCLUDE_C_GMULT_4BIT)
159 static const size_t rem_4bit[16] = {
160     PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
161     PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
162     PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
163     PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
164 };
165 
gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16])166 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
167 {
168     u128 Z;
169     int cnt = 15;
170     size_t rem, nlo, nhi;
171     DECLARE_IS_ENDIAN;
172 
173     nlo = ((const u8 *)Xi)[15];
174     nhi = nlo >> 4;
175     nlo &= 0xf;
176 
177     Z.hi = Htable[nlo].hi;
178     Z.lo = Htable[nlo].lo;
179 
180     while (1) {
181         rem = (size_t)Z.lo & 0xf;
182         Z.lo = (Z.hi << 60) | (Z.lo >> 4);
183         Z.hi = (Z.hi >> 4);
184         if (sizeof(size_t) == 8)
185             Z.hi ^= rem_4bit[rem];
186         else
187             Z.hi ^= (u64)rem_4bit[rem] << 32;
188 
189         Z.hi ^= Htable[nhi].hi;
190         Z.lo ^= Htable[nhi].lo;
191 
192         if (--cnt < 0)
193             break;
194 
195         nlo = ((const u8 *)Xi)[cnt];
196         nhi = nlo >> 4;
197         nlo &= 0xf;
198 
199         rem = (size_t)Z.lo & 0xf;
200         Z.lo = (Z.hi << 60) | (Z.lo >> 4);
201         Z.hi = (Z.hi >> 4);
202         if (sizeof(size_t) == 8)
203             Z.hi ^= rem_4bit[rem];
204         else
205             Z.hi ^= (u64)rem_4bit[rem] << 32;
206 
207         Z.hi ^= Htable[nlo].hi;
208         Z.lo ^= Htable[nlo].lo;
209     }
210 
211     if (IS_LITTLE_ENDIAN) {
212 #  ifdef BSWAP8
213         Xi[0] = BSWAP8(Z.hi);
214         Xi[1] = BSWAP8(Z.lo);
215 #  else
216         u8 *p = (u8 *)Xi;
217         u32 v;
218         v = (u32)(Z.hi >> 32);
219         PUTU32(p, v);
220         v = (u32)(Z.hi);
221         PUTU32(p + 4, v);
222         v = (u32)(Z.lo >> 32);
223         PUTU32(p + 8, v);
224         v = (u32)(Z.lo);
225         PUTU32(p + 12, v);
226 #  endif
227     } else {
228         Xi[0] = Z.hi;
229         Xi[1] = Z.lo;
230     }
231 }
232 
233 # endif
234 
235 # if !defined(GHASH_ASM)
236 #  if !defined(OPENSSL_SMALL_FOOTPRINT)
237 /*
238  * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
239  * details... Compiler-generated code doesn't seem to give any
240  * performance improvement, at least not on x86[_64]. It's here
241  * mostly as reference and a placeholder for possible future
242  * non-trivial optimization[s]...
243  */
gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 * inp,size_t len)244 static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
245                            const u8 *inp, size_t len)
246 {
247     u128 Z;
248     int cnt;
249     size_t rem, nlo, nhi;
250     DECLARE_IS_ENDIAN;
251 
252     do {
253         cnt = 15;
254         nlo = ((const u8 *)Xi)[15];
255         nlo ^= inp[15];
256         nhi = nlo >> 4;
257         nlo &= 0xf;
258 
259         Z.hi = Htable[nlo].hi;
260         Z.lo = Htable[nlo].lo;
261 
262         while (1) {
263             rem = (size_t)Z.lo & 0xf;
264             Z.lo = (Z.hi << 60) | (Z.lo >> 4);
265             Z.hi = (Z.hi >> 4);
266             if (sizeof(size_t) == 8)
267                 Z.hi ^= rem_4bit[rem];
268             else
269                 Z.hi ^= (u64)rem_4bit[rem] << 32;
270 
271             Z.hi ^= Htable[nhi].hi;
272             Z.lo ^= Htable[nhi].lo;
273 
274             if (--cnt < 0)
275                 break;
276 
277             nlo = ((const u8 *)Xi)[cnt];
278             nlo ^= inp[cnt];
279             nhi = nlo >> 4;
280             nlo &= 0xf;
281 
282             rem = (size_t)Z.lo & 0xf;
283             Z.lo = (Z.hi << 60) | (Z.lo >> 4);
284             Z.hi = (Z.hi >> 4);
285             if (sizeof(size_t) == 8)
286                 Z.hi ^= rem_4bit[rem];
287             else
288                 Z.hi ^= (u64)rem_4bit[rem] << 32;
289 
290             Z.hi ^= Htable[nlo].hi;
291             Z.lo ^= Htable[nlo].lo;
292         }
293 
294         if (IS_LITTLE_ENDIAN) {
295 #   ifdef BSWAP8
296             Xi[0] = BSWAP8(Z.hi);
297             Xi[1] = BSWAP8(Z.lo);
298 #   else
299             u8 *p = (u8 *)Xi;
300             u32 v;
301             v = (u32)(Z.hi >> 32);
302             PUTU32(p, v);
303             v = (u32)(Z.hi);
304             PUTU32(p + 4, v);
305             v = (u32)(Z.lo >> 32);
306             PUTU32(p + 8, v);
307             v = (u32)(Z.lo);
308             PUTU32(p + 12, v);
309 #   endif
310         } else {
311             Xi[0] = Z.hi;
312             Xi[1] = Z.lo;
313         }
314 
315         inp += 16;
316         /* Block size is 128 bits so len is a multiple of 16 */
317         len -= 16;
318     } while (len > 0);
319 }
320 #  endif
321 # else
322 void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
323 void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
324                     size_t len);
325 # endif
326 
327 # define GCM_MUL(ctx)      ctx->funcs.gmult(ctx->Xi.u,ctx->Htable)
328 # if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
329 #  define GHASH(ctx,in,len) ctx->funcs.ghash((ctx)->Xi.u,(ctx)->Htable,in,len)
330 /*
331  * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
332  * effect. In other words idea is to hash data while it's still in L1 cache
333  * after encryption pass...
334  */
335 #  define GHASH_CHUNK       (3*1024)
336 # endif
337 
338 #if     (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
339 # if    !defined(I386_ONLY) && \
340         (defined(__i386)        || defined(__i386__)    || \
341          defined(__x86_64)      || defined(__x86_64__)  || \
342          defined(_M_IX86)       || defined(_M_AMD64)    || defined(_M_X64))
343 #  define GHASH_ASM_X86_OR_64
344 
345 void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
346 void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
347 void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
348                      size_t len);
349 
350 #  if defined(__i386) || defined(__i386__) || defined(_M_IX86)
351 #   define gcm_init_avx   gcm_init_clmul
352 #   define gcm_gmult_avx  gcm_gmult_clmul
353 #   define gcm_ghash_avx  gcm_ghash_clmul
354 #  else
355 void gcm_init_avx(u128 Htable[16], const u64 Xi[2]);
356 void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]);
357 void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
358                    size_t len);
359 #  endif
360 
361 #  if   defined(__i386) || defined(__i386__) || defined(_M_IX86)
362 #   define GHASH_ASM_X86
363 void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
364 void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
365                         size_t len);
366 
367 void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);
368 void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,
369                         size_t len);
370 #  endif
371 # elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
372 #  include "arm_arch.h"
373 #  if __ARM_MAX_ARCH__>=7
374 #   define GHASH_ASM_ARM
375 #   define PMULL_CAPABLE        (OPENSSL_armcap_P & ARMV8_PMULL)
376 #   if defined(__arm__) || defined(__arm)
377 #    define NEON_CAPABLE        (OPENSSL_armcap_P & ARMV7_NEON)
378 #   endif
379 void gcm_init_neon(u128 Htable[16], const u64 Xi[2]);
380 void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
381 void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
382                     size_t len);
383 void gcm_init_v8(u128 Htable[16], const u64 Xi[2]);
384 void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]);
385 void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
386                   size_t len);
387 #  endif
388 # elif defined(__sparc__) || defined(__sparc)
389 #  include "crypto/sparc_arch.h"
390 #  define GHASH_ASM_SPARC
391 void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]);
392 void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]);
393 void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp,
394                     size_t len);
395 # elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
396 #  include "crypto/ppc_arch.h"
397 #  define GHASH_ASM_PPC
398 void gcm_init_p8(u128 Htable[16], const u64 Xi[2]);
399 void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]);
400 void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
401                   size_t len);
402 # elif defined(OPENSSL_CPUID_OBJ) && defined(__riscv) && __riscv_xlen == 64
403 #  include "crypto/riscv_arch.h"
404 #  define GHASH_ASM_RISCV
405 #  undef  GHASH
406 void gcm_init_clmul_rv64i_zbb_zbc(u128 Htable[16], const u64 Xi[2]);
407 void gcm_gmult_clmul_rv64i_zbb_zbc(u64 Xi[2], const u128 Htable[16]);
408 # endif
409 #endif
410 
gcm_get_funcs(struct gcm_funcs_st * ctx)411 static void gcm_get_funcs(struct gcm_funcs_st *ctx)
412 {
413     /* set defaults -- overridden below as needed */
414     ctx->ginit = gcm_init_4bit;
415 #if !defined(GHASH_ASM) || defined(INCLUDE_C_GMULT_4BIT)
416     ctx->gmult = gcm_gmult_4bit;
417 #else
418     ctx->gmult = NULL;
419 #endif
420 #if !defined(GHASH_ASM) && !defined(OPENSSL_SMALL_FOOTPRINT)
421     ctx->ghash = gcm_ghash_4bit;
422 #else
423     ctx->ghash = NULL;
424 #endif
425 
426 #if defined(GHASH_ASM_X86_OR_64)
427 # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
428     /* x86_64 */
429     if (OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
430         if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
431             ctx->ginit = gcm_init_avx;
432             ctx->gmult = gcm_gmult_avx;
433             ctx->ghash = gcm_ghash_avx;
434         } else {
435             ctx->ginit = gcm_init_clmul;
436             ctx->gmult = gcm_gmult_clmul;
437             ctx->ghash = gcm_ghash_clmul;
438         }
439         return;
440     }
441 # endif
442 # if defined(GHASH_ASM_X86)
443     /* x86 only */
444 #  if defined(OPENSSL_IA32_SSE2)
445     if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
446         ctx->gmult = gcm_gmult_4bit_mmx;
447         ctx->ghash = gcm_ghash_4bit_mmx;
448         return;
449     }
450 #  else
451     if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */
452         ctx->gmult = gcm_gmult_4bit_mmx;
453         ctx->ghash = gcm_ghash_4bit_mmx;
454         return;
455     }
456 #  endif
457     ctx->gmult = gcm_gmult_4bit_x86;
458     ctx->ghash = gcm_ghash_4bit_x86;
459     return;
460 # endif
461 #elif defined(GHASH_ASM_ARM)
462     /* ARM defaults */
463     ctx->gmult = gcm_gmult_4bit;
464     ctx->ghash = gcm_ghash_4bit;
465 # ifdef PMULL_CAPABLE
466     if (PMULL_CAPABLE) {
467         ctx->ginit = (gcm_init_fn)gcm_init_v8;
468         ctx->gmult = gcm_gmult_v8;
469         ctx->ghash = gcm_ghash_v8;
470     }
471 # elif defined(NEON_CAPABLE)
472     if (NEON_CAPABLE) {
473         ctx->ginit = gcm_init_neon;
474         ctx->gmult = gcm_gmult_neon;
475         ctx->ghash = gcm_ghash_neon;
476     }
477 # endif
478     return;
479 #elif defined(GHASH_ASM_SPARC)
480     /* SPARC defaults */
481     ctx->gmult = gcm_gmult_4bit;
482     ctx->ghash = gcm_ghash_4bit;
483     if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
484         ctx->ginit = gcm_init_vis3;
485         ctx->gmult = gcm_gmult_vis3;
486         ctx->ghash = gcm_ghash_vis3;
487     }
488     return;
489 #elif defined(GHASH_ASM_PPC)
490     /* PowerPC does not define GHASH_ASM; defaults set above */
491     if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
492         ctx->ginit = gcm_init_p8;
493         ctx->gmult = gcm_gmult_p8;
494         ctx->ghash = gcm_ghash_p8;
495     }
496     return;
497 #elif defined(GHASH_ASM_RISCV) && __riscv_xlen == 64
498     /* RISCV defaults; gmult already set above */
499     ctx->ghash = NULL;
500     if (RISCV_HAS_ZBB() && RISCV_HAS_ZBC()) {
501         ctx->ginit = gcm_init_clmul_rv64i_zbb_zbc;
502         ctx->gmult = gcm_gmult_clmul_rv64i_zbb_zbc;
503     }
504     return;
505 #elif defined(GHASH_ASM)
506     /* all other architectures use the generic names */
507     ctx->gmult = gcm_gmult_4bit;
508     ctx->ghash = gcm_ghash_4bit;
509     return;
510 #endif
511 }
512 
ossl_gcm_init_4bit(u128 Htable[16],const u64 H[2])513 void ossl_gcm_init_4bit(u128 Htable[16], const u64 H[2])
514 {
515     struct gcm_funcs_st funcs;
516 
517     gcm_get_funcs(&funcs);
518     funcs.ginit(Htable, H);
519 }
520 
ossl_gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16])521 void ossl_gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
522 {
523     struct gcm_funcs_st funcs;
524 
525     gcm_get_funcs(&funcs);
526     funcs.gmult(Xi, Htable);
527 }
528 
ossl_gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 * inp,size_t len)529 void ossl_gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
530                          const u8 *inp, size_t len)
531 {
532     struct gcm_funcs_st funcs;
533     u64 tmp[2];
534     size_t i;
535 
536     gcm_get_funcs(&funcs);
537     if (funcs.ghash != NULL) {
538         funcs.ghash(Xi, Htable, inp, len);
539     } else {
540         /* Emulate ghash if needed */
541         for (i = 0; i < len; i += 16) {
542             memcpy(tmp, &inp[i], sizeof(tmp));
543             Xi[0] ^= tmp[0];
544             Xi[1] ^= tmp[1];
545             funcs.gmult(Xi, Htable);
546         }
547     }
548 }
549 
CRYPTO_gcm128_init(GCM128_CONTEXT * ctx,void * key,block128_f block)550 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
551 {
552     DECLARE_IS_ENDIAN;
553 
554     memset(ctx, 0, sizeof(*ctx));
555     ctx->block = block;
556     ctx->key = key;
557 
558     (*block) (ctx->H.c, ctx->H.c, key);
559 
560     if (IS_LITTLE_ENDIAN) {
561         /* H is stored in host byte order */
562 #ifdef BSWAP8
563         ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
564         ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
565 #else
566         u8 *p = ctx->H.c;
567         u64 hi, lo;
568         hi = (u64)GETU32(p) << 32 | GETU32(p + 4);
569         lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
570         ctx->H.u[0] = hi;
571         ctx->H.u[1] = lo;
572 #endif
573     }
574 
575     gcm_get_funcs(&ctx->funcs);
576     ctx->funcs.ginit(ctx->Htable, ctx->H.u);
577 }
578 
CRYPTO_gcm128_setiv(GCM128_CONTEXT * ctx,const unsigned char * iv,size_t len)579 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
580                          size_t len)
581 {
582     DECLARE_IS_ENDIAN;
583     unsigned int ctr;
584 
585     ctx->len.u[0] = 0;          /* AAD length */
586     ctx->len.u[1] = 0;          /* message length */
587     ctx->ares = 0;
588     ctx->mres = 0;
589 
590     if (len == 12) {
591         memcpy(ctx->Yi.c, iv, 12);
592         ctx->Yi.c[12] = 0;
593         ctx->Yi.c[13] = 0;
594         ctx->Yi.c[14] = 0;
595         ctx->Yi.c[15] = 1;
596         ctr = 1;
597     } else {
598         size_t i;
599         u64 len0 = len;
600 
601         /* Borrow ctx->Xi to calculate initial Yi */
602         ctx->Xi.u[0] = 0;
603         ctx->Xi.u[1] = 0;
604 
605         while (len >= 16) {
606             for (i = 0; i < 16; ++i)
607                 ctx->Xi.c[i] ^= iv[i];
608             GCM_MUL(ctx);
609             iv += 16;
610             len -= 16;
611         }
612         if (len) {
613             for (i = 0; i < len; ++i)
614                 ctx->Xi.c[i] ^= iv[i];
615             GCM_MUL(ctx);
616         }
617         len0 <<= 3;
618         if (IS_LITTLE_ENDIAN) {
619 #ifdef BSWAP8
620             ctx->Xi.u[1] ^= BSWAP8(len0);
621 #else
622             ctx->Xi.c[8] ^= (u8)(len0 >> 56);
623             ctx->Xi.c[9] ^= (u8)(len0 >> 48);
624             ctx->Xi.c[10] ^= (u8)(len0 >> 40);
625             ctx->Xi.c[11] ^= (u8)(len0 >> 32);
626             ctx->Xi.c[12] ^= (u8)(len0 >> 24);
627             ctx->Xi.c[13] ^= (u8)(len0 >> 16);
628             ctx->Xi.c[14] ^= (u8)(len0 >> 8);
629             ctx->Xi.c[15] ^= (u8)(len0);
630 #endif
631         } else {
632             ctx->Xi.u[1] ^= len0;
633         }
634 
635         GCM_MUL(ctx);
636 
637         if (IS_LITTLE_ENDIAN)
638 #ifdef BSWAP4
639             ctr = BSWAP4(ctx->Xi.d[3]);
640 #else
641             ctr = GETU32(ctx->Xi.c + 12);
642 #endif
643         else
644             ctr = ctx->Xi.d[3];
645 
646         /* Copy borrowed Xi to Yi */
647         ctx->Yi.u[0] = ctx->Xi.u[0];
648         ctx->Yi.u[1] = ctx->Xi.u[1];
649     }
650 
651     ctx->Xi.u[0] = 0;
652     ctx->Xi.u[1] = 0;
653 
654     (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key);
655     ++ctr;
656     if (IS_LITTLE_ENDIAN)
657 #ifdef BSWAP4
658         ctx->Yi.d[3] = BSWAP4(ctr);
659 #else
660         PUTU32(ctx->Yi.c + 12, ctr);
661 #endif
662     else
663         ctx->Yi.d[3] = ctr;
664 }
665 
CRYPTO_gcm128_aad(GCM128_CONTEXT * ctx,const unsigned char * aad,size_t len)666 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
667                       size_t len)
668 {
669     size_t i;
670     unsigned int n;
671     u64 alen = ctx->len.u[0];
672 
673     if (ctx->len.u[1])
674         return -2;
675 
676     alen += len;
677     if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
678         return -1;
679     ctx->len.u[0] = alen;
680 
681     n = ctx->ares;
682     if (n) {
683         while (n && len) {
684             ctx->Xi.c[n] ^= *(aad++);
685             --len;
686             n = (n + 1) % 16;
687         }
688         if (n == 0)
689             GCM_MUL(ctx);
690         else {
691             ctx->ares = n;
692             return 0;
693         }
694     }
695 #ifdef GHASH
696     if ((i = (len & (size_t)-16))) {
697         GHASH(ctx, aad, i);
698         aad += i;
699         len -= i;
700     }
701 #else
702     while (len >= 16) {
703         for (i = 0; i < 16; ++i)
704             ctx->Xi.c[i] ^= aad[i];
705         GCM_MUL(ctx);
706         aad += 16;
707         len -= 16;
708     }
709 #endif
710     if (len) {
711         n = (unsigned int)len;
712         for (i = 0; i < len; ++i)
713             ctx->Xi.c[i] ^= aad[i];
714     }
715 
716     ctx->ares = n;
717     return 0;
718 }
719 
CRYPTO_gcm128_encrypt(GCM128_CONTEXT * ctx,const unsigned char * in,unsigned char * out,size_t len)720 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
721                           const unsigned char *in, unsigned char *out,
722                           size_t len)
723 {
724     DECLARE_IS_ENDIAN;
725     unsigned int n, ctr, mres;
726     size_t i;
727     u64 mlen = ctx->len.u[1];
728     block128_f block = ctx->block;
729     void *key = ctx->key;
730 
731     mlen += len;
732     if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
733         return -1;
734     ctx->len.u[1] = mlen;
735 
736     mres = ctx->mres;
737 
738     if (ctx->ares) {
739         /* First call to encrypt finalizes GHASH(AAD) */
740 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
741         if (len == 0) {
742             GCM_MUL(ctx);
743             ctx->ares = 0;
744             return 0;
745         }
746         memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
747         ctx->Xi.u[0] = 0;
748         ctx->Xi.u[1] = 0;
749         mres = sizeof(ctx->Xi);
750 #else
751         GCM_MUL(ctx);
752 #endif
753         ctx->ares = 0;
754     }
755 
756     if (IS_LITTLE_ENDIAN)
757 #ifdef BSWAP4
758         ctr = BSWAP4(ctx->Yi.d[3]);
759 #else
760         ctr = GETU32(ctx->Yi.c + 12);
761 #endif
762     else
763         ctr = ctx->Yi.d[3];
764 
765     n = mres % 16;
766 #if !defined(OPENSSL_SMALL_FOOTPRINT)
767     if (16 % sizeof(size_t) == 0) { /* always true actually */
768         do {
769             if (n) {
770 # if defined(GHASH)
771                 while (n && len) {
772                     ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
773                     --len;
774                     n = (n + 1) % 16;
775                 }
776                 if (n == 0) {
777                     GHASH(ctx, ctx->Xn, mres);
778                     mres = 0;
779                 } else {
780                     ctx->mres = mres;
781                     return 0;
782                 }
783 # else
784                 while (n && len) {
785                     ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
786                     --len;
787                     n = (n + 1) % 16;
788                 }
789                 if (n == 0) {
790                     GCM_MUL(ctx);
791                     mres = 0;
792                 } else {
793                     ctx->mres = n;
794                     return 0;
795                 }
796 # endif
797             }
798 # if defined(STRICT_ALIGNMENT)
799             if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
800                 break;
801 # endif
802 # if defined(GHASH)
803             if (len >= 16 && mres) {
804                 GHASH(ctx, ctx->Xn, mres);
805                 mres = 0;
806             }
807 #  if defined(GHASH_CHUNK)
808             while (len >= GHASH_CHUNK) {
809                 size_t j = GHASH_CHUNK;
810 
811                 while (j) {
812                     size_t_aX *out_t = (size_t_aX *)out;
813                     const size_t_aX *in_t = (const size_t_aX *)in;
814 
815                     (*block) (ctx->Yi.c, ctx->EKi.c, key);
816                     ++ctr;
817                     if (IS_LITTLE_ENDIAN)
818 #   ifdef BSWAP4
819                         ctx->Yi.d[3] = BSWAP4(ctr);
820 #   else
821                         PUTU32(ctx->Yi.c + 12, ctr);
822 #   endif
823                     else
824                         ctx->Yi.d[3] = ctr;
825                     for (i = 0; i < 16 / sizeof(size_t); ++i)
826                         out_t[i] = in_t[i] ^ ctx->EKi.t[i];
827                     out += 16;
828                     in += 16;
829                     j -= 16;
830                 }
831                 GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
832                 len -= GHASH_CHUNK;
833             }
834 #  endif
835             if ((i = (len & (size_t)-16))) {
836                 size_t j = i;
837 
838                 while (len >= 16) {
839                     size_t_aX *out_t = (size_t_aX *)out;
840                     const size_t_aX *in_t = (const size_t_aX *)in;
841 
842                     (*block) (ctx->Yi.c, ctx->EKi.c, key);
843                     ++ctr;
844                     if (IS_LITTLE_ENDIAN)
845 #  ifdef BSWAP4
846                         ctx->Yi.d[3] = BSWAP4(ctr);
847 #  else
848                         PUTU32(ctx->Yi.c + 12, ctr);
849 #  endif
850                     else
851                         ctx->Yi.d[3] = ctr;
852                     for (i = 0; i < 16 / sizeof(size_t); ++i)
853                         out_t[i] = in_t[i] ^ ctx->EKi.t[i];
854                     out += 16;
855                     in += 16;
856                     len -= 16;
857                 }
858                 GHASH(ctx, out - j, j);
859             }
860 # else
861             while (len >= 16) {
862                 size_t *out_t = (size_t *)out;
863                 const size_t *in_t = (const size_t *)in;
864 
865                 (*block) (ctx->Yi.c, ctx->EKi.c, key);
866                 ++ctr;
867                 if (IS_LITTLE_ENDIAN)
868 #  ifdef BSWAP4
869                     ctx->Yi.d[3] = BSWAP4(ctr);
870 #  else
871                     PUTU32(ctx->Yi.c + 12, ctr);
872 #  endif
873                 else
874                     ctx->Yi.d[3] = ctr;
875                 for (i = 0; i < 16 / sizeof(size_t); ++i)
876                     ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
877                 GCM_MUL(ctx);
878                 out += 16;
879                 in += 16;
880                 len -= 16;
881             }
882 # endif
883             if (len) {
884                 (*block) (ctx->Yi.c, ctx->EKi.c, key);
885                 ++ctr;
886                 if (IS_LITTLE_ENDIAN)
887 # ifdef BSWAP4
888                     ctx->Yi.d[3] = BSWAP4(ctr);
889 # else
890                     PUTU32(ctx->Yi.c + 12, ctr);
891 # endif
892                 else
893                     ctx->Yi.d[3] = ctr;
894 # if defined(GHASH)
895                 while (len--) {
896                     ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
897                     ++n;
898                 }
899 # else
900                 while (len--) {
901                     ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
902                     ++n;
903                 }
904                 mres = n;
905 # endif
906             }
907 
908             ctx->mres = mres;
909             return 0;
910         } while (0);
911     }
912 #endif
913     for (i = 0; i < len; ++i) {
914         if (n == 0) {
915             (*block) (ctx->Yi.c, ctx->EKi.c, key);
916             ++ctr;
917             if (IS_LITTLE_ENDIAN)
918 #ifdef BSWAP4
919                 ctx->Yi.d[3] = BSWAP4(ctr);
920 #else
921                 PUTU32(ctx->Yi.c + 12, ctr);
922 #endif
923             else
924                 ctx->Yi.d[3] = ctr;
925         }
926 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
927         ctx->Xn[mres++] = out[i] = in[i] ^ ctx->EKi.c[n];
928         n = (n + 1) % 16;
929         if (mres == sizeof(ctx->Xn)) {
930             GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
931             mres = 0;
932         }
933 #else
934         ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
935         mres = n = (n + 1) % 16;
936         if (n == 0)
937             GCM_MUL(ctx);
938 #endif
939     }
940 
941     ctx->mres = mres;
942     return 0;
943 }
944 
CRYPTO_gcm128_decrypt(GCM128_CONTEXT * ctx,const unsigned char * in,unsigned char * out,size_t len)945 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
946                           const unsigned char *in, unsigned char *out,
947                           size_t len)
948 {
949     DECLARE_IS_ENDIAN;
950     unsigned int n, ctr, mres;
951     size_t i;
952     u64 mlen = ctx->len.u[1];
953     block128_f block = ctx->block;
954     void *key = ctx->key;
955 
956     mlen += len;
957     if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
958         return -1;
959     ctx->len.u[1] = mlen;
960 
961     mres = ctx->mres;
962 
963     if (ctx->ares) {
964         /* First call to decrypt finalizes GHASH(AAD) */
965 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
966         if (len == 0) {
967             GCM_MUL(ctx);
968             ctx->ares = 0;
969             return 0;
970         }
971         memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
972         ctx->Xi.u[0] = 0;
973         ctx->Xi.u[1] = 0;
974         mres = sizeof(ctx->Xi);
975 #else
976         GCM_MUL(ctx);
977 #endif
978         ctx->ares = 0;
979     }
980 
981     if (IS_LITTLE_ENDIAN)
982 #ifdef BSWAP4
983         ctr = BSWAP4(ctx->Yi.d[3]);
984 #else
985         ctr = GETU32(ctx->Yi.c + 12);
986 #endif
987     else
988         ctr = ctx->Yi.d[3];
989 
990     n = mres % 16;
991 #if !defined(OPENSSL_SMALL_FOOTPRINT)
992     if (16 % sizeof(size_t) == 0) { /* always true actually */
993         do {
994             if (n) {
995 # if defined(GHASH)
996                 while (n && len) {
997                     *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
998                     --len;
999                     n = (n + 1) % 16;
1000                 }
1001                 if (n == 0) {
1002                     GHASH(ctx, ctx->Xn, mres);
1003                     mres = 0;
1004                 } else {
1005                     ctx->mres = mres;
1006                     return 0;
1007                 }
1008 # else
1009                 while (n && len) {
1010                     u8 c = *(in++);
1011                     *(out++) = c ^ ctx->EKi.c[n];
1012                     ctx->Xi.c[n] ^= c;
1013                     --len;
1014                     n = (n + 1) % 16;
1015                 }
1016                 if (n == 0) {
1017                     GCM_MUL(ctx);
1018                     mres = 0;
1019                 } else {
1020                     ctx->mres = n;
1021                     return 0;
1022                 }
1023 # endif
1024             }
1025 # if defined(STRICT_ALIGNMENT)
1026             if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1027                 break;
1028 # endif
1029 # if defined(GHASH)
1030             if (len >= 16 && mres) {
1031                 GHASH(ctx, ctx->Xn, mres);
1032                 mres = 0;
1033             }
1034 #  if defined(GHASH_CHUNK)
1035             while (len >= GHASH_CHUNK) {
1036                 size_t j = GHASH_CHUNK;
1037 
1038                 GHASH(ctx, in, GHASH_CHUNK);
1039                 while (j) {
1040                     size_t_aX *out_t = (size_t_aX *)out;
1041                     const size_t_aX *in_t = (const size_t_aX *)in;
1042 
1043                     (*block) (ctx->Yi.c, ctx->EKi.c, key);
1044                     ++ctr;
1045                     if (IS_LITTLE_ENDIAN)
1046 #   ifdef BSWAP4
1047                         ctx->Yi.d[3] = BSWAP4(ctr);
1048 #   else
1049                         PUTU32(ctx->Yi.c + 12, ctr);
1050 #   endif
1051                     else
1052                         ctx->Yi.d[3] = ctr;
1053                     for (i = 0; i < 16 / sizeof(size_t); ++i)
1054                         out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1055                     out += 16;
1056                     in += 16;
1057                     j -= 16;
1058                 }
1059                 len -= GHASH_CHUNK;
1060             }
1061 #  endif
1062             if ((i = (len & (size_t)-16))) {
1063                 GHASH(ctx, in, i);
1064                 while (len >= 16) {
1065                     size_t_aX *out_t = (size_t_aX *)out;
1066                     const size_t_aX *in_t = (const size_t_aX *)in;
1067 
1068                     (*block) (ctx->Yi.c, ctx->EKi.c, key);
1069                     ++ctr;
1070                     if (IS_LITTLE_ENDIAN)
1071 #  ifdef BSWAP4
1072                         ctx->Yi.d[3] = BSWAP4(ctr);
1073 #  else
1074                         PUTU32(ctx->Yi.c + 12, ctr);
1075 #  endif
1076                     else
1077                         ctx->Yi.d[3] = ctr;
1078                     for (i = 0; i < 16 / sizeof(size_t); ++i)
1079                         out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1080                     out += 16;
1081                     in += 16;
1082                     len -= 16;
1083                 }
1084             }
1085 # else
1086             while (len >= 16) {
1087                 size_t *out_t = (size_t *)out;
1088                 const size_t *in_t = (const size_t *)in;
1089 
1090                 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1091                 ++ctr;
1092                 if (IS_LITTLE_ENDIAN)
1093 #  ifdef BSWAP4
1094                     ctx->Yi.d[3] = BSWAP4(ctr);
1095 #  else
1096                     PUTU32(ctx->Yi.c + 12, ctr);
1097 #  endif
1098                 else
1099                     ctx->Yi.d[3] = ctr;
1100                 for (i = 0; i < 16 / sizeof(size_t); ++i) {
1101                     size_t c = in_t[i];
1102                     out_t[i] = c ^ ctx->EKi.t[i];
1103                     ctx->Xi.t[i] ^= c;
1104                 }
1105                 GCM_MUL(ctx);
1106                 out += 16;
1107                 in += 16;
1108                 len -= 16;
1109             }
1110 # endif
1111             if (len) {
1112                 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1113                 ++ctr;
1114                 if (IS_LITTLE_ENDIAN)
1115 # ifdef BSWAP4
1116                     ctx->Yi.d[3] = BSWAP4(ctr);
1117 # else
1118                     PUTU32(ctx->Yi.c + 12, ctr);
1119 # endif
1120                 else
1121                     ctx->Yi.d[3] = ctr;
1122 # if defined(GHASH)
1123                 while (len--) {
1124                     out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1125                     ++n;
1126                 }
1127 # else
1128                 while (len--) {
1129                     u8 c = in[n];
1130                     ctx->Xi.c[n] ^= c;
1131                     out[n] = c ^ ctx->EKi.c[n];
1132                     ++n;
1133                 }
1134                 mres = n;
1135 # endif
1136             }
1137 
1138             ctx->mres = mres;
1139             return 0;
1140         } while (0);
1141     }
1142 #endif
1143     for (i = 0; i < len; ++i) {
1144         u8 c;
1145         if (n == 0) {
1146             (*block) (ctx->Yi.c, ctx->EKi.c, key);
1147             ++ctr;
1148             if (IS_LITTLE_ENDIAN)
1149 #ifdef BSWAP4
1150                 ctx->Yi.d[3] = BSWAP4(ctr);
1151 #else
1152                 PUTU32(ctx->Yi.c + 12, ctr);
1153 #endif
1154             else
1155                 ctx->Yi.d[3] = ctr;
1156         }
1157 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1158         out[i] = (ctx->Xn[mres++] = c = in[i]) ^ ctx->EKi.c[n];
1159         n = (n + 1) % 16;
1160         if (mres == sizeof(ctx->Xn)) {
1161             GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
1162             mres = 0;
1163         }
1164 #else
1165         c = in[i];
1166         out[i] = c ^ ctx->EKi.c[n];
1167         ctx->Xi.c[n] ^= c;
1168         mres = n = (n + 1) % 16;
1169         if (n == 0)
1170             GCM_MUL(ctx);
1171 #endif
1172     }
1173 
1174     ctx->mres = mres;
1175     return 0;
1176 }
1177 
CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT * ctx,const unsigned char * in,unsigned char * out,size_t len,ctr128_f stream)1178 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1179                                 const unsigned char *in, unsigned char *out,
1180                                 size_t len, ctr128_f stream)
1181 {
1182 #if defined(OPENSSL_SMALL_FOOTPRINT)
1183     return CRYPTO_gcm128_encrypt(ctx, in, out, len);
1184 #else
1185     DECLARE_IS_ENDIAN;
1186     unsigned int n, ctr, mres;
1187     size_t i;
1188     u64 mlen = ctx->len.u[1];
1189     void *key = ctx->key;
1190 
1191     mlen += len;
1192     if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1193         return -1;
1194     ctx->len.u[1] = mlen;
1195 
1196     mres = ctx->mres;
1197 
1198     if (ctx->ares) {
1199         /* First call to encrypt finalizes GHASH(AAD) */
1200 #if defined(GHASH)
1201         if (len == 0) {
1202             GCM_MUL(ctx);
1203             ctx->ares = 0;
1204             return 0;
1205         }
1206         memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1207         ctx->Xi.u[0] = 0;
1208         ctx->Xi.u[1] = 0;
1209         mres = sizeof(ctx->Xi);
1210 #else
1211         GCM_MUL(ctx);
1212 #endif
1213         ctx->ares = 0;
1214     }
1215 
1216     if (IS_LITTLE_ENDIAN)
1217 # ifdef BSWAP4
1218         ctr = BSWAP4(ctx->Yi.d[3]);
1219 # else
1220         ctr = GETU32(ctx->Yi.c + 12);
1221 # endif
1222     else
1223         ctr = ctx->Yi.d[3];
1224 
1225     n = mres % 16;
1226     if (n) {
1227 # if defined(GHASH)
1228         while (n && len) {
1229             ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
1230             --len;
1231             n = (n + 1) % 16;
1232         }
1233         if (n == 0) {
1234             GHASH(ctx, ctx->Xn, mres);
1235             mres = 0;
1236         } else {
1237             ctx->mres = mres;
1238             return 0;
1239         }
1240 # else
1241         while (n && len) {
1242             ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1243             --len;
1244             n = (n + 1) % 16;
1245         }
1246         if (n == 0) {
1247             GCM_MUL(ctx);
1248             mres = 0;
1249         } else {
1250             ctx->mres = n;
1251             return 0;
1252         }
1253 # endif
1254     }
1255 # if defined(GHASH)
1256         if (len >= 16 && mres) {
1257             GHASH(ctx, ctx->Xn, mres);
1258             mres = 0;
1259         }
1260 #  if defined(GHASH_CHUNK)
1261     while (len >= GHASH_CHUNK) {
1262         (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1263         ctr += GHASH_CHUNK / 16;
1264         if (IS_LITTLE_ENDIAN)
1265 #   ifdef BSWAP4
1266             ctx->Yi.d[3] = BSWAP4(ctr);
1267 #   else
1268             PUTU32(ctx->Yi.c + 12, ctr);
1269 #   endif
1270         else
1271             ctx->Yi.d[3] = ctr;
1272         GHASH(ctx, out, GHASH_CHUNK);
1273         out += GHASH_CHUNK;
1274         in += GHASH_CHUNK;
1275         len -= GHASH_CHUNK;
1276     }
1277 #  endif
1278 # endif
1279     if ((i = (len & (size_t)-16))) {
1280         size_t j = i / 16;
1281 
1282         (*stream) (in, out, j, key, ctx->Yi.c);
1283         ctr += (unsigned int)j;
1284         if (IS_LITTLE_ENDIAN)
1285 # ifdef BSWAP4
1286             ctx->Yi.d[3] = BSWAP4(ctr);
1287 # else
1288             PUTU32(ctx->Yi.c + 12, ctr);
1289 # endif
1290         else
1291             ctx->Yi.d[3] = ctr;
1292         in += i;
1293         len -= i;
1294 # if defined(GHASH)
1295         GHASH(ctx, out, i);
1296         out += i;
1297 # else
1298         while (j--) {
1299             for (i = 0; i < 16; ++i)
1300                 ctx->Xi.c[i] ^= out[i];
1301             GCM_MUL(ctx);
1302             out += 16;
1303         }
1304 # endif
1305     }
1306     if (len) {
1307         (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1308         ++ctr;
1309         if (IS_LITTLE_ENDIAN)
1310 # ifdef BSWAP4
1311             ctx->Yi.d[3] = BSWAP4(ctr);
1312 # else
1313             PUTU32(ctx->Yi.c + 12, ctr);
1314 # endif
1315         else
1316             ctx->Yi.d[3] = ctr;
1317         while (len--) {
1318 # if defined(GHASH)
1319             ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
1320 # else
1321             ctx->Xi.c[mres++] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1322 # endif
1323             ++n;
1324         }
1325     }
1326 
1327     ctx->mres = mres;
1328     return 0;
1329 #endif
1330 }
1331 
CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT * ctx,const unsigned char * in,unsigned char * out,size_t len,ctr128_f stream)1332 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1333                                 const unsigned char *in, unsigned char *out,
1334                                 size_t len, ctr128_f stream)
1335 {
1336 #if defined(OPENSSL_SMALL_FOOTPRINT)
1337     return CRYPTO_gcm128_decrypt(ctx, in, out, len);
1338 #else
1339     DECLARE_IS_ENDIAN;
1340     unsigned int n, ctr, mres;
1341     size_t i;
1342     u64 mlen = ctx->len.u[1];
1343     void *key = ctx->key;
1344 
1345     mlen += len;
1346     if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1347         return -1;
1348     ctx->len.u[1] = mlen;
1349 
1350     mres = ctx->mres;
1351 
1352     if (ctx->ares) {
1353         /* First call to decrypt finalizes GHASH(AAD) */
1354 # if defined(GHASH)
1355         if (len == 0) {
1356             GCM_MUL(ctx);
1357             ctx->ares = 0;
1358             return 0;
1359         }
1360         memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1361         ctx->Xi.u[0] = 0;
1362         ctx->Xi.u[1] = 0;
1363         mres = sizeof(ctx->Xi);
1364 # else
1365         GCM_MUL(ctx);
1366 # endif
1367         ctx->ares = 0;
1368     }
1369 
1370     if (IS_LITTLE_ENDIAN)
1371 # ifdef BSWAP4
1372         ctr = BSWAP4(ctx->Yi.d[3]);
1373 # else
1374         ctr = GETU32(ctx->Yi.c + 12);
1375 # endif
1376     else
1377         ctr = ctx->Yi.d[3];
1378 
1379     n = mres % 16;
1380     if (n) {
1381 # if defined(GHASH)
1382         while (n && len) {
1383             *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1384             --len;
1385             n = (n + 1) % 16;
1386         }
1387         if (n == 0) {
1388             GHASH(ctx, ctx->Xn, mres);
1389             mres = 0;
1390         } else {
1391             ctx->mres = mres;
1392             return 0;
1393         }
1394 # else
1395         while (n && len) {
1396             u8 c = *(in++);
1397             *(out++) = c ^ ctx->EKi.c[n];
1398             ctx->Xi.c[n] ^= c;
1399             --len;
1400             n = (n + 1) % 16;
1401         }
1402         if (n == 0) {
1403             GCM_MUL(ctx);
1404             mres = 0;
1405         } else {
1406             ctx->mres = n;
1407             return 0;
1408         }
1409 # endif
1410     }
1411 # if defined(GHASH)
1412     if (len >= 16 && mres) {
1413         GHASH(ctx, ctx->Xn, mres);
1414         mres = 0;
1415     }
1416 #  if defined(GHASH_CHUNK)
1417     while (len >= GHASH_CHUNK) {
1418         GHASH(ctx, in, GHASH_CHUNK);
1419         (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1420         ctr += GHASH_CHUNK / 16;
1421         if (IS_LITTLE_ENDIAN)
1422 #   ifdef BSWAP4
1423             ctx->Yi.d[3] = BSWAP4(ctr);
1424 #   else
1425             PUTU32(ctx->Yi.c + 12, ctr);
1426 #   endif
1427         else
1428             ctx->Yi.d[3] = ctr;
1429         out += GHASH_CHUNK;
1430         in += GHASH_CHUNK;
1431         len -= GHASH_CHUNK;
1432     }
1433 #  endif
1434 # endif
1435     if ((i = (len & (size_t)-16))) {
1436         size_t j = i / 16;
1437 
1438 # if defined(GHASH)
1439         GHASH(ctx, in, i);
1440 # else
1441         while (j--) {
1442             size_t k;
1443             for (k = 0; k < 16; ++k)
1444                 ctx->Xi.c[k] ^= in[k];
1445             GCM_MUL(ctx);
1446             in += 16;
1447         }
1448         j = i / 16;
1449         in -= i;
1450 # endif
1451         (*stream) (in, out, j, key, ctx->Yi.c);
1452         ctr += (unsigned int)j;
1453         if (IS_LITTLE_ENDIAN)
1454 # ifdef BSWAP4
1455             ctx->Yi.d[3] = BSWAP4(ctr);
1456 # else
1457             PUTU32(ctx->Yi.c + 12, ctr);
1458 # endif
1459         else
1460             ctx->Yi.d[3] = ctr;
1461         out += i;
1462         in += i;
1463         len -= i;
1464     }
1465     if (len) {
1466         (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1467         ++ctr;
1468         if (IS_LITTLE_ENDIAN)
1469 # ifdef BSWAP4
1470             ctx->Yi.d[3] = BSWAP4(ctr);
1471 # else
1472             PUTU32(ctx->Yi.c + 12, ctr);
1473 # endif
1474         else
1475             ctx->Yi.d[3] = ctr;
1476         while (len--) {
1477 # if defined(GHASH)
1478             out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1479 # else
1480             u8 c = in[n];
1481             ctx->Xi.c[mres++] ^= c;
1482             out[n] = c ^ ctx->EKi.c[n];
1483 # endif
1484             ++n;
1485         }
1486     }
1487 
1488     ctx->mres = mres;
1489     return 0;
1490 #endif
1491 }
1492 
CRYPTO_gcm128_finish(GCM128_CONTEXT * ctx,const unsigned char * tag,size_t len)1493 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
1494                          size_t len)
1495 {
1496     DECLARE_IS_ENDIAN;
1497     u64 alen = ctx->len.u[0] << 3;
1498     u64 clen = ctx->len.u[1] << 3;
1499 
1500 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1501     u128 bitlen;
1502     unsigned int mres = ctx->mres;
1503 
1504     if (mres) {
1505         unsigned blocks = (mres + 15) & -16;
1506 
1507         memset(ctx->Xn + mres, 0, blocks - mres);
1508         mres = blocks;
1509         if (mres == sizeof(ctx->Xn)) {
1510             GHASH(ctx, ctx->Xn, mres);
1511             mres = 0;
1512         }
1513     } else if (ctx->ares) {
1514         GCM_MUL(ctx);
1515     }
1516 #else
1517     if (ctx->mres || ctx->ares)
1518         GCM_MUL(ctx);
1519 #endif
1520 
1521     if (IS_LITTLE_ENDIAN) {
1522 #ifdef BSWAP8
1523         alen = BSWAP8(alen);
1524         clen = BSWAP8(clen);
1525 #else
1526         u8 *p = ctx->len.c;
1527 
1528         ctx->len.u[0] = alen;
1529         ctx->len.u[1] = clen;
1530 
1531         alen = (u64)GETU32(p) << 32 | GETU32(p + 4);
1532         clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
1533 #endif
1534     }
1535 
1536 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1537     bitlen.hi = alen;
1538     bitlen.lo = clen;
1539     memcpy(ctx->Xn + mres, &bitlen, sizeof(bitlen));
1540     mres += sizeof(bitlen);
1541     GHASH(ctx, ctx->Xn, mres);
1542 #else
1543     ctx->Xi.u[0] ^= alen;
1544     ctx->Xi.u[1] ^= clen;
1545     GCM_MUL(ctx);
1546 #endif
1547 
1548     ctx->Xi.u[0] ^= ctx->EK0.u[0];
1549     ctx->Xi.u[1] ^= ctx->EK0.u[1];
1550 
1551     if (tag && len <= sizeof(ctx->Xi))
1552         return CRYPTO_memcmp(ctx->Xi.c, tag, len);
1553     else
1554         return -1;
1555 }
1556 
CRYPTO_gcm128_tag(GCM128_CONTEXT * ctx,unsigned char * tag,size_t len)1557 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1558 {
1559     CRYPTO_gcm128_finish(ctx, NULL, 0);
1560     memcpy(tag, ctx->Xi.c,
1561            len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1562 }
1563 
CRYPTO_gcm128_new(void * key,block128_f block)1564 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1565 {
1566     GCM128_CONTEXT *ret;
1567 
1568     if ((ret = OPENSSL_malloc(sizeof(*ret))) != NULL)
1569         CRYPTO_gcm128_init(ret, key, block);
1570 
1571     return ret;
1572 }
1573 
CRYPTO_gcm128_release(GCM128_CONTEXT * ctx)1574 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1575 {
1576     OPENSSL_clear_free(ctx, sizeof(*ctx));
1577 }
1578