xref: /openssl/crypto/ec/ecp_nistp256.c (revision 7ed6de99)
1 /*
2  * Copyright 2011-2024 The OpenSSL Project Authors. All Rights Reserved.
3  *
4  * Licensed under the Apache License 2.0 (the "License").  You may not use
5  * this file except in compliance with the License.  You can obtain a copy
6  * in the file LICENSE in the source distribution or at
7  * https://www.openssl.org/source/license.html
8  */
9 
10 /* Copyright 2011 Google Inc.
11  *
12  * Licensed under the Apache License, Version 2.0 (the "License");
13  *
14  * you may not use this file except in compliance with the License.
15  * You may obtain a copy of the License at
16  *
17  *     http://www.apache.org/licenses/LICENSE-2.0
18  *
19  *  Unless required by applicable law or agreed to in writing, software
20  *  distributed under the License is distributed on an "AS IS" BASIS,
21  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22  *  See the License for the specific language governing permissions and
23  *  limitations under the License.
24  */
25 
26 /*
27  * ECDSA low level APIs are deprecated for public use, but still ok for
28  * internal use.
29  */
30 #include "internal/deprecated.h"
31 
32 /*
33  * A 64-bit implementation of the NIST P-256 elliptic curve point multiplication
34  *
35  * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c.
36  * Otherwise based on Emilia's P224 work, which was inspired by my curve25519
37  * work which got its smarts from Daniel J. Bernstein's work on the same.
38  */
39 
40 #include <openssl/opensslconf.h>
41 
42 #include <stdint.h>
43 #include <string.h>
44 #include <openssl/err.h>
45 #include "ec_local.h"
46 
47 #include "internal/numbers.h"
48 
49 #ifndef INT128_MAX
50 # error "Your compiler doesn't appear to support 128-bit integer types"
51 #endif
52 
53 typedef uint8_t u8;
54 typedef uint32_t u32;
55 typedef uint64_t u64;
56 
57 /*
58  * The underlying field. P256 operates over GF(2^256-2^224+2^192+2^96-1). We
59  * can serialize an element of this field into 32 bytes. We call this an
60  * felem_bytearray.
61  */
62 
63 typedef u8 felem_bytearray[32];
64 
65 /*
66  * These are the parameters of P256, taken from FIPS 186-3, page 86. These
67  * values are big-endian.
68  */
69 static const felem_bytearray nistp256_curve_params[5] = {
70     {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* p */
71      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
72      0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
73      0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
74     {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* a = -3 */
75      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
76      0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
77      0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc},
78     {0x5a, 0xc6, 0x35, 0xd8, 0xaa, 0x3a, 0x93, 0xe7, /* b */
79      0xb3, 0xeb, 0xbd, 0x55, 0x76, 0x98, 0x86, 0xbc,
80      0x65, 0x1d, 0x06, 0xb0, 0xcc, 0x53, 0xb0, 0xf6,
81      0x3b, 0xce, 0x3c, 0x3e, 0x27, 0xd2, 0x60, 0x4b},
82     {0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47, /* x */
83      0xf8, 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2,
84      0x77, 0x03, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 0xa0,
85      0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96},
86     {0x4f, 0xe3, 0x42, 0xe2, 0xfe, 0x1a, 0x7f, 0x9b, /* y */
87      0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0x0f, 0x9e, 0x16,
88      0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce,
89      0xcb, 0xb6, 0x40, 0x68, 0x37, 0xbf, 0x51, 0xf5}
90 };
91 
92 /*-
93  * The representation of field elements.
94  * ------------------------------------
95  *
96  * We represent field elements with either four 128-bit values, eight 128-bit
97  * values, or four 64-bit values. The field element represented is:
98  *   v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + v[3]*2^192  (mod p)
99  * or:
100  *   v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + ... + v[7]*2^448  (mod p)
101  *
102  * 128-bit values are called 'limbs'. Since the limbs are spaced only 64 bits
103  * apart, but are 128-bits wide, the most significant bits of each limb overlap
104  * with the least significant bits of the next.
105  *
106  * A field element with four limbs is an 'felem'. One with eight limbs is a
107  * 'longfelem'
108  *
109  * A field element with four, 64-bit values is called a 'smallfelem'. Small
110  * values are used as intermediate values before multiplication.
111  */
112 
113 #define NLIMBS 4
114 
115 typedef uint128_t limb;
116 typedef limb felem[NLIMBS];
117 typedef limb longfelem[NLIMBS * 2];
118 typedef u64 smallfelem[NLIMBS];
119 
120 /* This is the value of the prime as four 64-bit words, little-endian. */
121 static const u64 kPrime[4] = {
122     0xfffffffffffffffful, 0xffffffff, 0, 0xffffffff00000001ul
123 };
124 static const u64 bottom63bits = 0x7ffffffffffffffful;
125 
126 /*
127  * bin32_to_felem takes a little-endian byte array and converts it into felem
128  * form. This assumes that the CPU is little-endian.
129  */
bin32_to_felem(felem out,const u8 in[32])130 static void bin32_to_felem(felem out, const u8 in[32])
131 {
132     out[0] = *((u64 *)&in[0]);
133     out[1] = *((u64 *)&in[8]);
134     out[2] = *((u64 *)&in[16]);
135     out[3] = *((u64 *)&in[24]);
136 }
137 
138 /*
139  * smallfelem_to_bin32 takes a smallfelem and serializes into a little
140  * endian, 32 byte array. This assumes that the CPU is little-endian.
141  */
smallfelem_to_bin32(u8 out[32],const smallfelem in)142 static void smallfelem_to_bin32(u8 out[32], const smallfelem in)
143 {
144     *((u64 *)&out[0]) = in[0];
145     *((u64 *)&out[8]) = in[1];
146     *((u64 *)&out[16]) = in[2];
147     *((u64 *)&out[24]) = in[3];
148 }
149 
150 /* BN_to_felem converts an OpenSSL BIGNUM into an felem */
BN_to_felem(felem out,const BIGNUM * bn)151 static int BN_to_felem(felem out, const BIGNUM *bn)
152 {
153     felem_bytearray b_out;
154     int num_bytes;
155 
156     if (BN_is_negative(bn)) {
157         ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
158         return 0;
159     }
160     num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out));
161     if (num_bytes < 0) {
162         ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
163         return 0;
164     }
165     bin32_to_felem(out, b_out);
166     return 1;
167 }
168 
169 /* felem_to_BN converts an felem into an OpenSSL BIGNUM */
smallfelem_to_BN(BIGNUM * out,const smallfelem in)170 static BIGNUM *smallfelem_to_BN(BIGNUM *out, const smallfelem in)
171 {
172     felem_bytearray b_out;
173     smallfelem_to_bin32(b_out, in);
174     return BN_lebin2bn(b_out, sizeof(b_out), out);
175 }
176 
177 /*-
178  * Field operations
179  * ----------------
180  */
181 
smallfelem_one(smallfelem out)182 static void smallfelem_one(smallfelem out)
183 {
184     out[0] = 1;
185     out[1] = 0;
186     out[2] = 0;
187     out[3] = 0;
188 }
189 
smallfelem_assign(smallfelem out,const smallfelem in)190 static void smallfelem_assign(smallfelem out, const smallfelem in)
191 {
192     out[0] = in[0];
193     out[1] = in[1];
194     out[2] = in[2];
195     out[3] = in[3];
196 }
197 
felem_assign(felem out,const felem in)198 static void felem_assign(felem out, const felem in)
199 {
200     out[0] = in[0];
201     out[1] = in[1];
202     out[2] = in[2];
203     out[3] = in[3];
204 }
205 
206 /* felem_sum sets out = out + in. */
felem_sum(felem out,const felem in)207 static void felem_sum(felem out, const felem in)
208 {
209     out[0] += in[0];
210     out[1] += in[1];
211     out[2] += in[2];
212     out[3] += in[3];
213 }
214 
215 /* felem_small_sum sets out = out + in. */
felem_small_sum(felem out,const smallfelem in)216 static void felem_small_sum(felem out, const smallfelem in)
217 {
218     out[0] += in[0];
219     out[1] += in[1];
220     out[2] += in[2];
221     out[3] += in[3];
222 }
223 
224 /* felem_scalar sets out = out * scalar */
felem_scalar(felem out,const u64 scalar)225 static void felem_scalar(felem out, const u64 scalar)
226 {
227     out[0] *= scalar;
228     out[1] *= scalar;
229     out[2] *= scalar;
230     out[3] *= scalar;
231 }
232 
233 /* longfelem_scalar sets out = out * scalar */
longfelem_scalar(longfelem out,const u64 scalar)234 static void longfelem_scalar(longfelem out, const u64 scalar)
235 {
236     out[0] *= scalar;
237     out[1] *= scalar;
238     out[2] *= scalar;
239     out[3] *= scalar;
240     out[4] *= scalar;
241     out[5] *= scalar;
242     out[6] *= scalar;
243     out[7] *= scalar;
244 }
245 
246 #define two105m41m9 (((limb)1) << 105) - (((limb)1) << 41) - (((limb)1) << 9)
247 #define two105 (((limb)1) << 105)
248 #define two105m41p9 (((limb)1) << 105) - (((limb)1) << 41) + (((limb)1) << 9)
249 
250 /* zero105 is 0 mod p */
251 static const felem zero105 =
252     { two105m41m9, two105, two105m41p9, two105m41p9 };
253 
254 /*-
255  * smallfelem_neg sets |out| to |-small|
256  * On exit:
257  *   out[i] < out[i] + 2^105
258  */
smallfelem_neg(felem out,const smallfelem small)259 static void smallfelem_neg(felem out, const smallfelem small)
260 {
261     /* In order to prevent underflow, we subtract from 0 mod p. */
262     out[0] = zero105[0] - small[0];
263     out[1] = zero105[1] - small[1];
264     out[2] = zero105[2] - small[2];
265     out[3] = zero105[3] - small[3];
266 }
267 
268 /*-
269  * felem_diff subtracts |in| from |out|
270  * On entry:
271  *   in[i] < 2^104
272  * On exit:
273  *   out[i] < out[i] + 2^105
274  */
felem_diff(felem out,const felem in)275 static void felem_diff(felem out, const felem in)
276 {
277     /*
278      * In order to prevent underflow, we add 0 mod p before subtracting.
279      */
280     out[0] += zero105[0];
281     out[1] += zero105[1];
282     out[2] += zero105[2];
283     out[3] += zero105[3];
284 
285     out[0] -= in[0];
286     out[1] -= in[1];
287     out[2] -= in[2];
288     out[3] -= in[3];
289 }
290 
291 #define two107m43m11 (((limb)1) << 107) - (((limb)1) << 43) - (((limb)1) << 11)
292 #define two107 (((limb)1) << 107)
293 #define two107m43p11 (((limb)1) << 107) - (((limb)1) << 43) + (((limb)1) << 11)
294 
295 /* zero107 is 0 mod p */
296 static const felem zero107 = {
297     two107m43m11, two107, two107m43p11, two107m43p11
298 };
299 
300 /*-
301  * An alternative felem_diff for larger inputs |in|
302  * felem_diff_zero107 subtracts |in| from |out|
303  * On entry:
304  *   in[i] < 2^106
305  * On exit:
306  *   out[i] < out[i] + 2^107
307  */
felem_diff_zero107(felem out,const felem in)308 static void felem_diff_zero107(felem out, const felem in)
309 {
310     /*
311      * In order to prevent underflow, we add 0 mod p before subtracting.
312      */
313     out[0] += zero107[0];
314     out[1] += zero107[1];
315     out[2] += zero107[2];
316     out[3] += zero107[3];
317 
318     out[0] -= in[0];
319     out[1] -= in[1];
320     out[2] -= in[2];
321     out[3] -= in[3];
322 }
323 
324 /*-
325  * longfelem_diff subtracts |in| from |out|
326  * On entry:
327  *   in[i] < 7*2^67
328  * On exit:
329  *   out[i] < out[i] + 2^70 + 2^40
330  */
longfelem_diff(longfelem out,const longfelem in)331 static void longfelem_diff(longfelem out, const longfelem in)
332 {
333     static const limb two70m8p6 =
334         (((limb) 1) << 70) - (((limb) 1) << 8) + (((limb) 1) << 6);
335     static const limb two70p40 = (((limb) 1) << 70) + (((limb) 1) << 40);
336     static const limb two70 = (((limb) 1) << 70);
337     static const limb two70m40m38p6 =
338         (((limb) 1) << 70) - (((limb) 1) << 40) - (((limb) 1) << 38) +
339         (((limb) 1) << 6);
340     static const limb two70m6 = (((limb) 1) << 70) - (((limb) 1) << 6);
341 
342     /* add 0 mod p to avoid underflow */
343     out[0] += two70m8p6;
344     out[1] += two70p40;
345     out[2] += two70;
346     out[3] += two70m40m38p6;
347     out[4] += two70m6;
348     out[5] += two70m6;
349     out[6] += two70m6;
350     out[7] += two70m6;
351 
352     /* in[i] < 7*2^67 < 2^70 - 2^40 - 2^38 + 2^6 */
353     out[0] -= in[0];
354     out[1] -= in[1];
355     out[2] -= in[2];
356     out[3] -= in[3];
357     out[4] -= in[4];
358     out[5] -= in[5];
359     out[6] -= in[6];
360     out[7] -= in[7];
361 }
362 
363 #define two64m0 (((limb)1) << 64) - 1
364 #define two110p32m0 (((limb)1) << 110) + (((limb)1) << 32) - 1
365 #define two64m46 (((limb)1) << 64) - (((limb)1) << 46)
366 #define two64m32 (((limb)1) << 64) - (((limb)1) << 32)
367 
368 /* zero110 is 0 mod p */
369 static const felem zero110 = { two64m0, two110p32m0, two64m46, two64m32 };
370 
371 /*-
372  * felem_shrink converts an felem into a smallfelem. The result isn't quite
373  * minimal as the value may be greater than p.
374  *
375  * On entry:
376  *   in[i] < 2^109
377  * On exit:
378  *   out[i] < 2^64
379  */
felem_shrink(smallfelem out,const felem in)380 static void felem_shrink(smallfelem out, const felem in)
381 {
382     felem tmp;
383     u64 a, b, mask;
384     u64 high, low;
385     static const u64 kPrime3Test = 0x7fffffff00000001ul; /* 2^63 - 2^32 + 1 */
386 
387     /* Carry 2->3 */
388     tmp[3] = zero110[3] + in[3] + ((u64)(in[2] >> 64));
389     /* tmp[3] < 2^110 */
390 
391     tmp[2] = zero110[2] + (u64)in[2];
392     tmp[0] = zero110[0] + in[0];
393     tmp[1] = zero110[1] + in[1];
394     /* tmp[0] < 2**110, tmp[1] < 2^111, tmp[2] < 2**65 */
395 
396     /*
397      * We perform two partial reductions where we eliminate the high-word of
398      * tmp[3]. We don't update the other words till the end.
399      */
400     a = tmp[3] >> 64;           /* a < 2^46 */
401     tmp[3] = (u64)tmp[3];
402     tmp[3] -= a;
403     tmp[3] += ((limb) a) << 32;
404     /* tmp[3] < 2^79 */
405 
406     b = a;
407     a = tmp[3] >> 64;           /* a < 2^15 */
408     b += a;                     /* b < 2^46 + 2^15 < 2^47 */
409     tmp[3] = (u64)tmp[3];
410     tmp[3] -= a;
411     tmp[3] += ((limb) a) << 32;
412     /* tmp[3] < 2^64 + 2^47 */
413 
414     /*
415      * This adjusts the other two words to complete the two partial
416      * reductions.
417      */
418     tmp[0] += b;
419     tmp[1] -= (((limb) b) << 32);
420 
421     /*
422      * In order to make space in tmp[3] for the carry from 2 -> 3, we
423      * conditionally subtract kPrime if tmp[3] is large enough.
424      */
425     high = (u64)(tmp[3] >> 64);
426     /* As tmp[3] < 2^65, high is either 1 or 0 */
427     high = 0 - high;
428     /*-
429      * high is:
430      *   all ones   if the high word of tmp[3] is 1
431      *   all zeros  if the high word of tmp[3] if 0
432      */
433     low = (u64)tmp[3];
434     mask = 0 - (low >> 63);
435     /*-
436      * mask is:
437      *   all ones   if the MSB of low is 1
438      *   all zeros  if the MSB of low if 0
439      */
440     low &= bottom63bits;
441     low -= kPrime3Test;
442     /* if low was greater than kPrime3Test then the MSB is zero */
443     low = ~low;
444     low = 0 - (low >> 63);
445     /*-
446      * low is:
447      *   all ones   if low was > kPrime3Test
448      *   all zeros  if low was <= kPrime3Test
449      */
450     mask = (mask & low) | high;
451     tmp[0] -= mask & kPrime[0];
452     tmp[1] -= mask & kPrime[1];
453     /* kPrime[2] is zero, so omitted */
454     tmp[3] -= mask & kPrime[3];
455     /* tmp[3] < 2**64 - 2**32 + 1 */
456 
457     tmp[1] += ((u64)(tmp[0] >> 64));
458     tmp[0] = (u64)tmp[0];
459     tmp[2] += ((u64)(tmp[1] >> 64));
460     tmp[1] = (u64)tmp[1];
461     tmp[3] += ((u64)(tmp[2] >> 64));
462     tmp[2] = (u64)tmp[2];
463     /* tmp[i] < 2^64 */
464 
465     out[0] = tmp[0];
466     out[1] = tmp[1];
467     out[2] = tmp[2];
468     out[3] = tmp[3];
469 }
470 
471 /* smallfelem_expand converts a smallfelem to an felem */
smallfelem_expand(felem out,const smallfelem in)472 static void smallfelem_expand(felem out, const smallfelem in)
473 {
474     out[0] = in[0];
475     out[1] = in[1];
476     out[2] = in[2];
477     out[3] = in[3];
478 }
479 
480 /*-
481  * smallfelem_square sets |out| = |small|^2
482  * On entry:
483  *   small[i] < 2^64
484  * On exit:
485  *   out[i] < 7 * 2^64 < 2^67
486  */
smallfelem_square(longfelem out,const smallfelem small)487 static void smallfelem_square(longfelem out, const smallfelem small)
488 {
489     limb a;
490     u64 high, low;
491 
492     a = ((uint128_t) small[0]) * small[0];
493     low = a;
494     high = a >> 64;
495     out[0] = low;
496     out[1] = high;
497 
498     a = ((uint128_t) small[0]) * small[1];
499     low = a;
500     high = a >> 64;
501     out[1] += low;
502     out[1] += low;
503     out[2] = high;
504 
505     a = ((uint128_t) small[0]) * small[2];
506     low = a;
507     high = a >> 64;
508     out[2] += low;
509     out[2] *= 2;
510     out[3] = high;
511 
512     a = ((uint128_t) small[0]) * small[3];
513     low = a;
514     high = a >> 64;
515     out[3] += low;
516     out[4] = high;
517 
518     a = ((uint128_t) small[1]) * small[2];
519     low = a;
520     high = a >> 64;
521     out[3] += low;
522     out[3] *= 2;
523     out[4] += high;
524 
525     a = ((uint128_t) small[1]) * small[1];
526     low = a;
527     high = a >> 64;
528     out[2] += low;
529     out[3] += high;
530 
531     a = ((uint128_t) small[1]) * small[3];
532     low = a;
533     high = a >> 64;
534     out[4] += low;
535     out[4] *= 2;
536     out[5] = high;
537 
538     a = ((uint128_t) small[2]) * small[3];
539     low = a;
540     high = a >> 64;
541     out[5] += low;
542     out[5] *= 2;
543     out[6] = high;
544     out[6] += high;
545 
546     a = ((uint128_t) small[2]) * small[2];
547     low = a;
548     high = a >> 64;
549     out[4] += low;
550     out[5] += high;
551 
552     a = ((uint128_t) small[3]) * small[3];
553     low = a;
554     high = a >> 64;
555     out[6] += low;
556     out[7] = high;
557 }
558 
559 /*-
560  * felem_square sets |out| = |in|^2
561  * On entry:
562  *   in[i] < 2^109
563  * On exit:
564  *   out[i] < 7 * 2^64 < 2^67
565  */
felem_square(longfelem out,const felem in)566 static void felem_square(longfelem out, const felem in)
567 {
568     u64 small[4];
569     felem_shrink(small, in);
570     smallfelem_square(out, small);
571 }
572 
573 /*-
574  * smallfelem_mul sets |out| = |small1| * |small2|
575  * On entry:
576  *   small1[i] < 2^64
577  *   small2[i] < 2^64
578  * On exit:
579  *   out[i] < 7 * 2^64 < 2^67
580  */
smallfelem_mul(longfelem out,const smallfelem small1,const smallfelem small2)581 static void smallfelem_mul(longfelem out, const smallfelem small1,
582                            const smallfelem small2)
583 {
584     limb a;
585     u64 high, low;
586 
587     a = ((uint128_t) small1[0]) * small2[0];
588     low = a;
589     high = a >> 64;
590     out[0] = low;
591     out[1] = high;
592 
593     a = ((uint128_t) small1[0]) * small2[1];
594     low = a;
595     high = a >> 64;
596     out[1] += low;
597     out[2] = high;
598 
599     a = ((uint128_t) small1[1]) * small2[0];
600     low = a;
601     high = a >> 64;
602     out[1] += low;
603     out[2] += high;
604 
605     a = ((uint128_t) small1[0]) * small2[2];
606     low = a;
607     high = a >> 64;
608     out[2] += low;
609     out[3] = high;
610 
611     a = ((uint128_t) small1[1]) * small2[1];
612     low = a;
613     high = a >> 64;
614     out[2] += low;
615     out[3] += high;
616 
617     a = ((uint128_t) small1[2]) * small2[0];
618     low = a;
619     high = a >> 64;
620     out[2] += low;
621     out[3] += high;
622 
623     a = ((uint128_t) small1[0]) * small2[3];
624     low = a;
625     high = a >> 64;
626     out[3] += low;
627     out[4] = high;
628 
629     a = ((uint128_t) small1[1]) * small2[2];
630     low = a;
631     high = a >> 64;
632     out[3] += low;
633     out[4] += high;
634 
635     a = ((uint128_t) small1[2]) * small2[1];
636     low = a;
637     high = a >> 64;
638     out[3] += low;
639     out[4] += high;
640 
641     a = ((uint128_t) small1[3]) * small2[0];
642     low = a;
643     high = a >> 64;
644     out[3] += low;
645     out[4] += high;
646 
647     a = ((uint128_t) small1[1]) * small2[3];
648     low = a;
649     high = a >> 64;
650     out[4] += low;
651     out[5] = high;
652 
653     a = ((uint128_t) small1[2]) * small2[2];
654     low = a;
655     high = a >> 64;
656     out[4] += low;
657     out[5] += high;
658 
659     a = ((uint128_t) small1[3]) * small2[1];
660     low = a;
661     high = a >> 64;
662     out[4] += low;
663     out[5] += high;
664 
665     a = ((uint128_t) small1[2]) * small2[3];
666     low = a;
667     high = a >> 64;
668     out[5] += low;
669     out[6] = high;
670 
671     a = ((uint128_t) small1[3]) * small2[2];
672     low = a;
673     high = a >> 64;
674     out[5] += low;
675     out[6] += high;
676 
677     a = ((uint128_t) small1[3]) * small2[3];
678     low = a;
679     high = a >> 64;
680     out[6] += low;
681     out[7] = high;
682 }
683 
684 /*-
685  * felem_mul sets |out| = |in1| * |in2|
686  * On entry:
687  *   in1[i] < 2^109
688  *   in2[i] < 2^109
689  * On exit:
690  *   out[i] < 7 * 2^64 < 2^67
691  */
felem_mul(longfelem out,const felem in1,const felem in2)692 static void felem_mul(longfelem out, const felem in1, const felem in2)
693 {
694     smallfelem small1, small2;
695     felem_shrink(small1, in1);
696     felem_shrink(small2, in2);
697     smallfelem_mul(out, small1, small2);
698 }
699 
700 /*-
701  * felem_small_mul sets |out| = |small1| * |in2|
702  * On entry:
703  *   small1[i] < 2^64
704  *   in2[i] < 2^109
705  * On exit:
706  *   out[i] < 7 * 2^64 < 2^67
707  */
felem_small_mul(longfelem out,const smallfelem small1,const felem in2)708 static void felem_small_mul(longfelem out, const smallfelem small1,
709                             const felem in2)
710 {
711     smallfelem small2;
712     felem_shrink(small2, in2);
713     smallfelem_mul(out, small1, small2);
714 }
715 
716 #define two100m36m4 (((limb)1) << 100) - (((limb)1) << 36) - (((limb)1) << 4)
717 #define two100 (((limb)1) << 100)
718 #define two100m36p4 (((limb)1) << 100) - (((limb)1) << 36) + (((limb)1) << 4)
719 /* zero100 is 0 mod p */
720 static const felem zero100 =
721     { two100m36m4, two100, two100m36p4, two100m36p4 };
722 
723 /*-
724  * Internal function for the different flavours of felem_reduce.
725  * felem_reduce_ reduces the higher coefficients in[4]-in[7].
726  * On entry:
727  *   out[0] >= in[6] + 2^32*in[6] + in[7] + 2^32*in[7]
728  *   out[1] >= in[7] + 2^32*in[4]
729  *   out[2] >= in[5] + 2^32*in[5]
730  *   out[3] >= in[4] + 2^32*in[5] + 2^32*in[6]
731  * On exit:
732  *   out[0] <= out[0] + in[4] + 2^32*in[5]
733  *   out[1] <= out[1] + in[5] + 2^33*in[6]
734  *   out[2] <= out[2] + in[7] + 2*in[6] + 2^33*in[7]
735  *   out[3] <= out[3] + 2^32*in[4] + 3*in[7]
736  */
felem_reduce_(felem out,const longfelem in)737 static void felem_reduce_(felem out, const longfelem in)
738 {
739     int128_t c;
740     /* combine common terms from below */
741     c = in[4] + (in[5] << 32);
742     out[0] += c;
743     out[3] -= c;
744 
745     c = in[5] - in[7];
746     out[1] += c;
747     out[2] -= c;
748 
749     /* the remaining terms */
750     /* 256: [(0,1),(96,-1),(192,-1),(224,1)] */
751     out[1] -= (in[4] << 32);
752     out[3] += (in[4] << 32);
753 
754     /* 320: [(32,1),(64,1),(128,-1),(160,-1),(224,-1)] */
755     out[2] -= (in[5] << 32);
756 
757     /* 384: [(0,-1),(32,-1),(96,2),(128,2),(224,-1)] */
758     out[0] -= in[6];
759     out[0] -= (in[6] << 32);
760     out[1] += (in[6] << 33);
761     out[2] += (in[6] * 2);
762     out[3] -= (in[6] << 32);
763 
764     /* 448: [(0,-1),(32,-1),(64,-1),(128,1),(160,2),(192,3)] */
765     out[0] -= in[7];
766     out[0] -= (in[7] << 32);
767     out[2] += (in[7] << 33);
768     out[3] += (in[7] * 3);
769 }
770 
771 /*-
772  * felem_reduce converts a longfelem into an felem.
773  * To be called directly after felem_square or felem_mul.
774  * On entry:
775  *   in[0] < 2^64, in[1] < 3*2^64, in[2] < 5*2^64, in[3] < 7*2^64
776  *   in[4] < 7*2^64, in[5] < 5*2^64, in[6] < 3*2^64, in[7] < 2*64
777  * On exit:
778  *   out[i] < 2^101
779  */
felem_reduce(felem out,const longfelem in)780 static void felem_reduce(felem out, const longfelem in)
781 {
782     out[0] = zero100[0] + in[0];
783     out[1] = zero100[1] + in[1];
784     out[2] = zero100[2] + in[2];
785     out[3] = zero100[3] + in[3];
786 
787     felem_reduce_(out, in);
788 
789     /*-
790      * out[0] > 2^100 - 2^36 - 2^4 - 3*2^64 - 3*2^96 - 2^64 - 2^96 > 0
791      * out[1] > 2^100 - 2^64 - 7*2^96 > 0
792      * out[2] > 2^100 - 2^36 + 2^4 - 5*2^64 - 5*2^96 > 0
793      * out[3] > 2^100 - 2^36 + 2^4 - 7*2^64 - 5*2^96 - 3*2^96 > 0
794      *
795      * out[0] < 2^100 + 2^64 + 7*2^64 + 5*2^96 < 2^101
796      * out[1] < 2^100 + 3*2^64 + 5*2^64 + 3*2^97 < 2^101
797      * out[2] < 2^100 + 5*2^64 + 2^64 + 3*2^65 + 2^97 < 2^101
798      * out[3] < 2^100 + 7*2^64 + 7*2^96 + 3*2^64 < 2^101
799      */
800 }
801 
802 /*-
803  * felem_reduce_zero105 converts a larger longfelem into an felem.
804  * On entry:
805  *   in[0] < 2^71
806  * On exit:
807  *   out[i] < 2^106
808  */
felem_reduce_zero105(felem out,const longfelem in)809 static void felem_reduce_zero105(felem out, const longfelem in)
810 {
811     out[0] = zero105[0] + in[0];
812     out[1] = zero105[1] + in[1];
813     out[2] = zero105[2] + in[2];
814     out[3] = zero105[3] + in[3];
815 
816     felem_reduce_(out, in);
817 
818     /*-
819      * out[0] > 2^105 - 2^41 - 2^9 - 2^71 - 2^103 - 2^71 - 2^103 > 0
820      * out[1] > 2^105 - 2^71 - 2^103 > 0
821      * out[2] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 > 0
822      * out[3] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 - 2^103 > 0
823      *
824      * out[0] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
825      * out[1] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
826      * out[2] < 2^105 + 2^71 + 2^71 + 2^71 + 2^103 < 2^106
827      * out[3] < 2^105 + 2^71 + 2^103 + 2^71 < 2^106
828      */
829 }
830 
831 /*
832  * subtract_u64 sets *result = *result - v and *carry to one if the
833  * subtraction underflowed.
834  */
subtract_u64(u64 * result,u64 * carry,u64 v)835 static void subtract_u64(u64 *result, u64 *carry, u64 v)
836 {
837     uint128_t r = *result;
838     r -= v;
839     *carry = (r >> 64) & 1;
840     *result = (u64)r;
841 }
842 
843 /*
844  * felem_contract converts |in| to its unique, minimal representation. On
845  * entry: in[i] < 2^109
846  */
felem_contract(smallfelem out,const felem in)847 static void felem_contract(smallfelem out, const felem in)
848 {
849     unsigned i;
850     u64 all_equal_so_far = 0, result = 0, carry;
851 
852     felem_shrink(out, in);
853     /* small is minimal except that the value might be > p */
854 
855     all_equal_so_far--;
856     /*
857      * We are doing a constant time test if out >= kPrime. We need to compare
858      * each u64, from most-significant to least significant. For each one, if
859      * all words so far have been equal (m is all ones) then a non-equal
860      * result is the answer. Otherwise we continue.
861      */
862     for (i = 3; i < 4; i--) {
863         u64 equal;
864         uint128_t a = ((uint128_t) kPrime[i]) - out[i];
865         /*
866          * if out[i] > kPrime[i] then a will underflow and the high 64-bits
867          * will all be set.
868          */
869         result |= all_equal_so_far & ((u64)(a >> 64));
870 
871         /*
872          * if kPrime[i] == out[i] then |equal| will be all zeros and the
873          * decrement will make it all ones.
874          */
875         equal = kPrime[i] ^ out[i];
876         equal--;
877         equal &= equal << 32;
878         equal &= equal << 16;
879         equal &= equal << 8;
880         equal &= equal << 4;
881         equal &= equal << 2;
882         equal &= equal << 1;
883         equal = 0 - (equal >> 63);
884 
885         all_equal_so_far &= equal;
886     }
887 
888     /*
889      * if all_equal_so_far is still all ones then the two values are equal
890      * and so out >= kPrime is true.
891      */
892     result |= all_equal_so_far;
893 
894     /* if out >= kPrime then we subtract kPrime. */
895     subtract_u64(&out[0], &carry, result & kPrime[0]);
896     subtract_u64(&out[1], &carry, carry);
897     subtract_u64(&out[2], &carry, carry);
898     subtract_u64(&out[3], &carry, carry);
899 
900     subtract_u64(&out[1], &carry, result & kPrime[1]);
901     subtract_u64(&out[2], &carry, carry);
902     subtract_u64(&out[3], &carry, carry);
903 
904     subtract_u64(&out[2], &carry, result & kPrime[2]);
905     subtract_u64(&out[3], &carry, carry);
906 
907     subtract_u64(&out[3], &carry, result & kPrime[3]);
908 }
909 
smallfelem_square_contract(smallfelem out,const smallfelem in)910 static void smallfelem_square_contract(smallfelem out, const smallfelem in)
911 {
912     longfelem longtmp;
913     felem tmp;
914 
915     smallfelem_square(longtmp, in);
916     felem_reduce(tmp, longtmp);
917     felem_contract(out, tmp);
918 }
919 
smallfelem_mul_contract(smallfelem out,const smallfelem in1,const smallfelem in2)920 static void smallfelem_mul_contract(smallfelem out, const smallfelem in1,
921                                     const smallfelem in2)
922 {
923     longfelem longtmp;
924     felem tmp;
925 
926     smallfelem_mul(longtmp, in1, in2);
927     felem_reduce(tmp, longtmp);
928     felem_contract(out, tmp);
929 }
930 
931 /*-
932  * felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0
933  * otherwise.
934  * On entry:
935  *   small[i] < 2^64
936  */
smallfelem_is_zero(const smallfelem small)937 static limb smallfelem_is_zero(const smallfelem small)
938 {
939     limb result;
940     u64 is_p;
941 
942     u64 is_zero = small[0] | small[1] | small[2] | small[3];
943     is_zero--;
944     is_zero &= is_zero << 32;
945     is_zero &= is_zero << 16;
946     is_zero &= is_zero << 8;
947     is_zero &= is_zero << 4;
948     is_zero &= is_zero << 2;
949     is_zero &= is_zero << 1;
950     is_zero = 0 - (is_zero >> 63);
951 
952     is_p = (small[0] ^ kPrime[0]) |
953         (small[1] ^ kPrime[1]) |
954         (small[2] ^ kPrime[2]) | (small[3] ^ kPrime[3]);
955     is_p--;
956     is_p &= is_p << 32;
957     is_p &= is_p << 16;
958     is_p &= is_p << 8;
959     is_p &= is_p << 4;
960     is_p &= is_p << 2;
961     is_p &= is_p << 1;
962     is_p = 0 - (is_p >> 63);
963 
964     is_zero |= is_p;
965 
966     result = is_zero;
967     result |= ((limb) is_zero) << 64;
968     return result;
969 }
970 
smallfelem_is_zero_int(const void * small)971 static int smallfelem_is_zero_int(const void *small)
972 {
973     return (int)(smallfelem_is_zero(small) & ((limb) 1));
974 }
975 
976 /*-
977  * felem_inv calculates |out| = |in|^{-1}
978  *
979  * Based on Fermat's Little Theorem:
980  *   a^p = a (mod p)
981  *   a^{p-1} = 1 (mod p)
982  *   a^{p-2} = a^{-1} (mod p)
983  */
felem_inv(felem out,const felem in)984 static void felem_inv(felem out, const felem in)
985 {
986     felem ftmp, ftmp2;
987     /* each e_I will hold |in|^{2^I - 1} */
988     felem e2, e4, e8, e16, e32, e64;
989     longfelem tmp;
990     unsigned i;
991 
992     felem_square(tmp, in);
993     felem_reduce(ftmp, tmp);    /* 2^1 */
994     felem_mul(tmp, in, ftmp);
995     felem_reduce(ftmp, tmp);    /* 2^2 - 2^0 */
996     felem_assign(e2, ftmp);
997     felem_square(tmp, ftmp);
998     felem_reduce(ftmp, tmp);    /* 2^3 - 2^1 */
999     felem_square(tmp, ftmp);
1000     felem_reduce(ftmp, tmp);    /* 2^4 - 2^2 */
1001     felem_mul(tmp, ftmp, e2);
1002     felem_reduce(ftmp, tmp);    /* 2^4 - 2^0 */
1003     felem_assign(e4, ftmp);
1004     felem_square(tmp, ftmp);
1005     felem_reduce(ftmp, tmp);    /* 2^5 - 2^1 */
1006     felem_square(tmp, ftmp);
1007     felem_reduce(ftmp, tmp);    /* 2^6 - 2^2 */
1008     felem_square(tmp, ftmp);
1009     felem_reduce(ftmp, tmp);    /* 2^7 - 2^3 */
1010     felem_square(tmp, ftmp);
1011     felem_reduce(ftmp, tmp);    /* 2^8 - 2^4 */
1012     felem_mul(tmp, ftmp, e4);
1013     felem_reduce(ftmp, tmp);    /* 2^8 - 2^0 */
1014     felem_assign(e8, ftmp);
1015     for (i = 0; i < 8; i++) {
1016         felem_square(tmp, ftmp);
1017         felem_reduce(ftmp, tmp);
1018     }                           /* 2^16 - 2^8 */
1019     felem_mul(tmp, ftmp, e8);
1020     felem_reduce(ftmp, tmp);    /* 2^16 - 2^0 */
1021     felem_assign(e16, ftmp);
1022     for (i = 0; i < 16; i++) {
1023         felem_square(tmp, ftmp);
1024         felem_reduce(ftmp, tmp);
1025     }                           /* 2^32 - 2^16 */
1026     felem_mul(tmp, ftmp, e16);
1027     felem_reduce(ftmp, tmp);    /* 2^32 - 2^0 */
1028     felem_assign(e32, ftmp);
1029     for (i = 0; i < 32; i++) {
1030         felem_square(tmp, ftmp);
1031         felem_reduce(ftmp, tmp);
1032     }                           /* 2^64 - 2^32 */
1033     felem_assign(e64, ftmp);
1034     felem_mul(tmp, ftmp, in);
1035     felem_reduce(ftmp, tmp);    /* 2^64 - 2^32 + 2^0 */
1036     for (i = 0; i < 192; i++) {
1037         felem_square(tmp, ftmp);
1038         felem_reduce(ftmp, tmp);
1039     }                           /* 2^256 - 2^224 + 2^192 */
1040 
1041     felem_mul(tmp, e64, e32);
1042     felem_reduce(ftmp2, tmp);   /* 2^64 - 2^0 */
1043     for (i = 0; i < 16; i++) {
1044         felem_square(tmp, ftmp2);
1045         felem_reduce(ftmp2, tmp);
1046     }                           /* 2^80 - 2^16 */
1047     felem_mul(tmp, ftmp2, e16);
1048     felem_reduce(ftmp2, tmp);   /* 2^80 - 2^0 */
1049     for (i = 0; i < 8; i++) {
1050         felem_square(tmp, ftmp2);
1051         felem_reduce(ftmp2, tmp);
1052     }                           /* 2^88 - 2^8 */
1053     felem_mul(tmp, ftmp2, e8);
1054     felem_reduce(ftmp2, tmp);   /* 2^88 - 2^0 */
1055     for (i = 0; i < 4; i++) {
1056         felem_square(tmp, ftmp2);
1057         felem_reduce(ftmp2, tmp);
1058     }                           /* 2^92 - 2^4 */
1059     felem_mul(tmp, ftmp2, e4);
1060     felem_reduce(ftmp2, tmp);   /* 2^92 - 2^0 */
1061     felem_square(tmp, ftmp2);
1062     felem_reduce(ftmp2, tmp);   /* 2^93 - 2^1 */
1063     felem_square(tmp, ftmp2);
1064     felem_reduce(ftmp2, tmp);   /* 2^94 - 2^2 */
1065     felem_mul(tmp, ftmp2, e2);
1066     felem_reduce(ftmp2, tmp);   /* 2^94 - 2^0 */
1067     felem_square(tmp, ftmp2);
1068     felem_reduce(ftmp2, tmp);   /* 2^95 - 2^1 */
1069     felem_square(tmp, ftmp2);
1070     felem_reduce(ftmp2, tmp);   /* 2^96 - 2^2 */
1071     felem_mul(tmp, ftmp2, in);
1072     felem_reduce(ftmp2, tmp);   /* 2^96 - 3 */
1073 
1074     felem_mul(tmp, ftmp2, ftmp);
1075     felem_reduce(out, tmp);     /* 2^256 - 2^224 + 2^192 + 2^96 - 3 */
1076 }
1077 
smallfelem_inv_contract(smallfelem out,const smallfelem in)1078 static void smallfelem_inv_contract(smallfelem out, const smallfelem in)
1079 {
1080     felem tmp;
1081 
1082     smallfelem_expand(tmp, in);
1083     felem_inv(tmp, tmp);
1084     felem_contract(out, tmp);
1085 }
1086 
1087 /*-
1088  * Group operations
1089  * ----------------
1090  *
1091  * Building on top of the field operations we have the operations on the
1092  * elliptic curve group itself. Points on the curve are represented in Jacobian
1093  * coordinates
1094  */
1095 
1096 /*-
1097  * point_double calculates 2*(x_in, y_in, z_in)
1098  *
1099  * The method is taken from:
1100  *   http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
1101  *
1102  * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
1103  * while x_out == y_in is not (maybe this works, but it's not tested).
1104  */
1105 static void
point_double(felem x_out,felem y_out,felem z_out,const felem x_in,const felem y_in,const felem z_in)1106 point_double(felem x_out, felem y_out, felem z_out,
1107              const felem x_in, const felem y_in, const felem z_in)
1108 {
1109     longfelem tmp, tmp2;
1110     felem delta, gamma, beta, alpha, ftmp, ftmp2;
1111     smallfelem small1, small2;
1112 
1113     felem_assign(ftmp, x_in);
1114     /* ftmp[i] < 2^106 */
1115     felem_assign(ftmp2, x_in);
1116     /* ftmp2[i] < 2^106 */
1117 
1118     /* delta = z^2 */
1119     felem_square(tmp, z_in);
1120     felem_reduce(delta, tmp);
1121     /* delta[i] < 2^101 */
1122 
1123     /* gamma = y^2 */
1124     felem_square(tmp, y_in);
1125     felem_reduce(gamma, tmp);
1126     /* gamma[i] < 2^101 */
1127     felem_shrink(small1, gamma);
1128 
1129     /* beta = x*gamma */
1130     felem_small_mul(tmp, small1, x_in);
1131     felem_reduce(beta, tmp);
1132     /* beta[i] < 2^101 */
1133 
1134     /* alpha = 3*(x-delta)*(x+delta) */
1135     felem_diff(ftmp, delta);
1136     /* ftmp[i] < 2^105 + 2^106 < 2^107 */
1137     felem_sum(ftmp2, delta);
1138     /* ftmp2[i] < 2^105 + 2^106 < 2^107 */
1139     felem_scalar(ftmp2, 3);
1140     /* ftmp2[i] < 3 * 2^107 < 2^109 */
1141     felem_mul(tmp, ftmp, ftmp2);
1142     felem_reduce(alpha, tmp);
1143     /* alpha[i] < 2^101 */
1144     felem_shrink(small2, alpha);
1145 
1146     /* x' = alpha^2 - 8*beta */
1147     smallfelem_square(tmp, small2);
1148     felem_reduce(x_out, tmp);
1149     felem_assign(ftmp, beta);
1150     felem_scalar(ftmp, 8);
1151     /* ftmp[i] < 8 * 2^101 = 2^104 */
1152     felem_diff(x_out, ftmp);
1153     /* x_out[i] < 2^105 + 2^101 < 2^106 */
1154 
1155     /* z' = (y + z)^2 - gamma - delta */
1156     felem_sum(delta, gamma);
1157     /* delta[i] < 2^101 + 2^101 = 2^102 */
1158     felem_assign(ftmp, y_in);
1159     felem_sum(ftmp, z_in);
1160     /* ftmp[i] < 2^106 + 2^106 = 2^107 */
1161     felem_square(tmp, ftmp);
1162     felem_reduce(z_out, tmp);
1163     felem_diff(z_out, delta);
1164     /* z_out[i] < 2^105 + 2^101 < 2^106 */
1165 
1166     /* y' = alpha*(4*beta - x') - 8*gamma^2 */
1167     felem_scalar(beta, 4);
1168     /* beta[i] < 4 * 2^101 = 2^103 */
1169     felem_diff_zero107(beta, x_out);
1170     /* beta[i] < 2^107 + 2^103 < 2^108 */
1171     felem_small_mul(tmp, small2, beta);
1172     /* tmp[i] < 7 * 2^64 < 2^67 */
1173     smallfelem_square(tmp2, small1);
1174     /* tmp2[i] < 7 * 2^64 */
1175     longfelem_scalar(tmp2, 8);
1176     /* tmp2[i] < 8 * 7 * 2^64 = 7 * 2^67 */
1177     longfelem_diff(tmp, tmp2);
1178     /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
1179     felem_reduce_zero105(y_out, tmp);
1180     /* y_out[i] < 2^106 */
1181 }
1182 
1183 /*
1184  * point_double_small is the same as point_double, except that it operates on
1185  * smallfelems
1186  */
1187 static void
point_double_small(smallfelem x_out,smallfelem y_out,smallfelem z_out,const smallfelem x_in,const smallfelem y_in,const smallfelem z_in)1188 point_double_small(smallfelem x_out, smallfelem y_out, smallfelem z_out,
1189                    const smallfelem x_in, const smallfelem y_in,
1190                    const smallfelem z_in)
1191 {
1192     felem felem_x_out, felem_y_out, felem_z_out;
1193     felem felem_x_in, felem_y_in, felem_z_in;
1194 
1195     smallfelem_expand(felem_x_in, x_in);
1196     smallfelem_expand(felem_y_in, y_in);
1197     smallfelem_expand(felem_z_in, z_in);
1198     point_double(felem_x_out, felem_y_out, felem_z_out,
1199                  felem_x_in, felem_y_in, felem_z_in);
1200     felem_shrink(x_out, felem_x_out);
1201     felem_shrink(y_out, felem_y_out);
1202     felem_shrink(z_out, felem_z_out);
1203 }
1204 
1205 /* copy_conditional copies in to out iff mask is all ones. */
copy_conditional(felem out,const felem in,limb mask)1206 static void copy_conditional(felem out, const felem in, limb mask)
1207 {
1208     unsigned i;
1209     for (i = 0; i < NLIMBS; ++i) {
1210         const limb tmp = mask & (in[i] ^ out[i]);
1211         out[i] ^= tmp;
1212     }
1213 }
1214 
1215 /* copy_small_conditional copies in to out iff mask is all ones. */
copy_small_conditional(felem out,const smallfelem in,limb mask)1216 static void copy_small_conditional(felem out, const smallfelem in, limb mask)
1217 {
1218     unsigned i;
1219     const u64 mask64 = mask;
1220     for (i = 0; i < NLIMBS; ++i) {
1221         out[i] = ((limb) (in[i] & mask64)) | (out[i] & ~mask);
1222     }
1223 }
1224 
1225 /*-
1226  * point_add calculates (x1, y1, z1) + (x2, y2, z2)
1227  *
1228  * The method is taken from:
1229  *   http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
1230  * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
1231  *
1232  * This function includes a branch for checking whether the two input points
1233  * are equal, (while not equal to the point at infinity). This case never
1234  * happens during single point multiplication, so there is no timing leak for
1235  * ECDH or ECDSA signing.
1236  */
point_add(felem x3,felem y3,felem z3,const felem x1,const felem y1,const felem z1,const int mixed,const smallfelem x2,const smallfelem y2,const smallfelem z2)1237 static void point_add(felem x3, felem y3, felem z3,
1238                       const felem x1, const felem y1, const felem z1,
1239                       const int mixed, const smallfelem x2,
1240                       const smallfelem y2, const smallfelem z2)
1241 {
1242     felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
1243     longfelem tmp, tmp2;
1244     smallfelem small1, small2, small3, small4, small5;
1245     limb x_equal, y_equal, z1_is_zero, z2_is_zero;
1246     limb points_equal;
1247 
1248     felem_shrink(small3, z1);
1249 
1250     z1_is_zero = smallfelem_is_zero(small3);
1251     z2_is_zero = smallfelem_is_zero(z2);
1252 
1253     /* ftmp = z1z1 = z1**2 */
1254     smallfelem_square(tmp, small3);
1255     felem_reduce(ftmp, tmp);
1256     /* ftmp[i] < 2^101 */
1257     felem_shrink(small1, ftmp);
1258 
1259     if (!mixed) {
1260         /* ftmp2 = z2z2 = z2**2 */
1261         smallfelem_square(tmp, z2);
1262         felem_reduce(ftmp2, tmp);
1263         /* ftmp2[i] < 2^101 */
1264         felem_shrink(small2, ftmp2);
1265 
1266         felem_shrink(small5, x1);
1267 
1268         /* u1 = ftmp3 = x1*z2z2 */
1269         smallfelem_mul(tmp, small5, small2);
1270         felem_reduce(ftmp3, tmp);
1271         /* ftmp3[i] < 2^101 */
1272 
1273         /* ftmp5 = z1 + z2 */
1274         felem_assign(ftmp5, z1);
1275         felem_small_sum(ftmp5, z2);
1276         /* ftmp5[i] < 2^107 */
1277 
1278         /* ftmp5 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2 */
1279         felem_square(tmp, ftmp5);
1280         felem_reduce(ftmp5, tmp);
1281         /* ftmp2 = z2z2 + z1z1 */
1282         felem_sum(ftmp2, ftmp);
1283         /* ftmp2[i] < 2^101 + 2^101 = 2^102 */
1284         felem_diff(ftmp5, ftmp2);
1285         /* ftmp5[i] < 2^105 + 2^101 < 2^106 */
1286 
1287         /* ftmp2 = z2 * z2z2 */
1288         smallfelem_mul(tmp, small2, z2);
1289         felem_reduce(ftmp2, tmp);
1290 
1291         /* s1 = ftmp2 = y1 * z2**3 */
1292         felem_mul(tmp, y1, ftmp2);
1293         felem_reduce(ftmp6, tmp);
1294         /* ftmp6[i] < 2^101 */
1295     } else {
1296         /*
1297          * We'll assume z2 = 1 (special case z2 = 0 is handled later)
1298          */
1299 
1300         /* u1 = ftmp3 = x1*z2z2 */
1301         felem_assign(ftmp3, x1);
1302         /* ftmp3[i] < 2^106 */
1303 
1304         /* ftmp5 = 2z1z2 */
1305         felem_assign(ftmp5, z1);
1306         felem_scalar(ftmp5, 2);
1307         /* ftmp5[i] < 2*2^106 = 2^107 */
1308 
1309         /* s1 = ftmp2 = y1 * z2**3 */
1310         felem_assign(ftmp6, y1);
1311         /* ftmp6[i] < 2^106 */
1312     }
1313 
1314     /* u2 = x2*z1z1 */
1315     smallfelem_mul(tmp, x2, small1);
1316     felem_reduce(ftmp4, tmp);
1317 
1318     /* h = ftmp4 = u2 - u1 */
1319     felem_diff_zero107(ftmp4, ftmp3);
1320     /* ftmp4[i] < 2^107 + 2^101 < 2^108 */
1321     felem_shrink(small4, ftmp4);
1322 
1323     x_equal = smallfelem_is_zero(small4);
1324 
1325     /* z_out = ftmp5 * h */
1326     felem_small_mul(tmp, small4, ftmp5);
1327     felem_reduce(z_out, tmp);
1328     /* z_out[i] < 2^101 */
1329 
1330     /* ftmp = z1 * z1z1 */
1331     smallfelem_mul(tmp, small1, small3);
1332     felem_reduce(ftmp, tmp);
1333 
1334     /* s2 = tmp = y2 * z1**3 */
1335     felem_small_mul(tmp, y2, ftmp);
1336     felem_reduce(ftmp5, tmp);
1337 
1338     /* r = ftmp5 = (s2 - s1)*2 */
1339     felem_diff_zero107(ftmp5, ftmp6);
1340     /* ftmp5[i] < 2^107 + 2^107 = 2^108 */
1341     felem_scalar(ftmp5, 2);
1342     /* ftmp5[i] < 2^109 */
1343     felem_shrink(small1, ftmp5);
1344     y_equal = smallfelem_is_zero(small1);
1345 
1346     /*
1347      * The formulae are incorrect if the points are equal, in affine coordinates
1348      * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this
1349      * happens.
1350      *
1351      * We use bitwise operations to avoid potential side-channels introduced by
1352      * the short-circuiting behaviour of boolean operators.
1353      *
1354      * The special case of either point being the point at infinity (z1 and/or
1355      * z2 are zero), is handled separately later on in this function, so we
1356      * avoid jumping to point_double here in those special cases.
1357      */
1358     points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero));
1359 
1360     if (points_equal) {
1361         /*
1362          * This is obviously not constant-time but, as mentioned before, this
1363          * case never happens during single point multiplication, so there is no
1364          * timing leak for ECDH or ECDSA signing.
1365          */
1366         point_double(x3, y3, z3, x1, y1, z1);
1367         return;
1368     }
1369 
1370     /* I = ftmp = (2h)**2 */
1371     felem_assign(ftmp, ftmp4);
1372     felem_scalar(ftmp, 2);
1373     /* ftmp[i] < 2*2^108 = 2^109 */
1374     felem_square(tmp, ftmp);
1375     felem_reduce(ftmp, tmp);
1376 
1377     /* J = ftmp2 = h * I */
1378     felem_mul(tmp, ftmp4, ftmp);
1379     felem_reduce(ftmp2, tmp);
1380 
1381     /* V = ftmp4 = U1 * I */
1382     felem_mul(tmp, ftmp3, ftmp);
1383     felem_reduce(ftmp4, tmp);
1384 
1385     /* x_out = r**2 - J - 2V */
1386     smallfelem_square(tmp, small1);
1387     felem_reduce(x_out, tmp);
1388     felem_assign(ftmp3, ftmp4);
1389     felem_scalar(ftmp4, 2);
1390     felem_sum(ftmp4, ftmp2);
1391     /* ftmp4[i] < 2*2^101 + 2^101 < 2^103 */
1392     felem_diff(x_out, ftmp4);
1393     /* x_out[i] < 2^105 + 2^101 */
1394 
1395     /* y_out = r(V-x_out) - 2 * s1 * J */
1396     felem_diff_zero107(ftmp3, x_out);
1397     /* ftmp3[i] < 2^107 + 2^101 < 2^108 */
1398     felem_small_mul(tmp, small1, ftmp3);
1399     felem_mul(tmp2, ftmp6, ftmp2);
1400     longfelem_scalar(tmp2, 2);
1401     /* tmp2[i] < 2*2^67 = 2^68 */
1402     longfelem_diff(tmp, tmp2);
1403     /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
1404     felem_reduce_zero105(y_out, tmp);
1405     /* y_out[i] < 2^106 */
1406 
1407     copy_small_conditional(x_out, x2, z1_is_zero);
1408     copy_conditional(x_out, x1, z2_is_zero);
1409     copy_small_conditional(y_out, y2, z1_is_zero);
1410     copy_conditional(y_out, y1, z2_is_zero);
1411     copy_small_conditional(z_out, z2, z1_is_zero);
1412     copy_conditional(z_out, z1, z2_is_zero);
1413     felem_assign(x3, x_out);
1414     felem_assign(y3, y_out);
1415     felem_assign(z3, z_out);
1416 }
1417 
1418 /*
1419  * point_add_small is the same as point_add, except that it operates on
1420  * smallfelems
1421  */
point_add_small(smallfelem x3,smallfelem y3,smallfelem z3,smallfelem x1,smallfelem y1,smallfelem z1,smallfelem x2,smallfelem y2,smallfelem z2)1422 static void point_add_small(smallfelem x3, smallfelem y3, smallfelem z3,
1423                             smallfelem x1, smallfelem y1, smallfelem z1,
1424                             smallfelem x2, smallfelem y2, smallfelem z2)
1425 {
1426     felem felem_x3, felem_y3, felem_z3;
1427     felem felem_x1, felem_y1, felem_z1;
1428     smallfelem_expand(felem_x1, x1);
1429     smallfelem_expand(felem_y1, y1);
1430     smallfelem_expand(felem_z1, z1);
1431     point_add(felem_x3, felem_y3, felem_z3, felem_x1, felem_y1, felem_z1, 0,
1432               x2, y2, z2);
1433     felem_shrink(x3, felem_x3);
1434     felem_shrink(y3, felem_y3);
1435     felem_shrink(z3, felem_z3);
1436 }
1437 
1438 /*-
1439  * Base point pre computation
1440  * --------------------------
1441  *
1442  * Two different sorts of precomputed tables are used in the following code.
1443  * Each contain various points on the curve, where each point is three field
1444  * elements (x, y, z).
1445  *
1446  * For the base point table, z is usually 1 (0 for the point at infinity).
1447  * This table has 2 * 16 elements, starting with the following:
1448  * index | bits    | point
1449  * ------+---------+------------------------------
1450  *     0 | 0 0 0 0 | 0G
1451  *     1 | 0 0 0 1 | 1G
1452  *     2 | 0 0 1 0 | 2^64G
1453  *     3 | 0 0 1 1 | (2^64 + 1)G
1454  *     4 | 0 1 0 0 | 2^128G
1455  *     5 | 0 1 0 1 | (2^128 + 1)G
1456  *     6 | 0 1 1 0 | (2^128 + 2^64)G
1457  *     7 | 0 1 1 1 | (2^128 + 2^64 + 1)G
1458  *     8 | 1 0 0 0 | 2^192G
1459  *     9 | 1 0 0 1 | (2^192 + 1)G
1460  *    10 | 1 0 1 0 | (2^192 + 2^64)G
1461  *    11 | 1 0 1 1 | (2^192 + 2^64 + 1)G
1462  *    12 | 1 1 0 0 | (2^192 + 2^128)G
1463  *    13 | 1 1 0 1 | (2^192 + 2^128 + 1)G
1464  *    14 | 1 1 1 0 | (2^192 + 2^128 + 2^64)G
1465  *    15 | 1 1 1 1 | (2^192 + 2^128 + 2^64 + 1)G
1466  * followed by a copy of this with each element multiplied by 2^32.
1467  *
1468  * The reason for this is so that we can clock bits into four different
1469  * locations when doing simple scalar multiplies against the base point,
1470  * and then another four locations using the second 16 elements.
1471  *
1472  * Tables for other points have table[i] = iG for i in 0 .. 16. */
1473 
1474 /* gmul is the table of precomputed base points */
1475 static const smallfelem gmul[2][16][3] = {
1476     {{{0, 0, 0, 0},
1477       {0, 0, 0, 0},
1478       {0, 0, 0, 0}},
1479      {{0xf4a13945d898c296, 0x77037d812deb33a0, 0xf8bce6e563a440f2,
1480        0x6b17d1f2e12c4247},
1481       {0xcbb6406837bf51f5, 0x2bce33576b315ece, 0x8ee7eb4a7c0f9e16,
1482        0x4fe342e2fe1a7f9b},
1483       {1, 0, 0, 0}},
1484      {{0x90e75cb48e14db63, 0x29493baaad651f7e, 0x8492592e326e25de,
1485        0x0fa822bc2811aaa5},
1486       {0xe41124545f462ee7, 0x34b1a65050fe82f5, 0x6f4ad4bcb3df188b,
1487        0xbff44ae8f5dba80d},
1488       {1, 0, 0, 0}},
1489      {{0x93391ce2097992af, 0xe96c98fd0d35f1fa, 0xb257c0de95e02789,
1490        0x300a4bbc89d6726f},
1491       {0xaa54a291c08127a0, 0x5bb1eeada9d806a5, 0x7f1ddb25ff1e3c6f,
1492        0x72aac7e0d09b4644},
1493       {1, 0, 0, 0}},
1494      {{0x57c84fc9d789bd85, 0xfc35ff7dc297eac3, 0xfb982fd588c6766e,
1495        0x447d739beedb5e67},
1496       {0x0c7e33c972e25b32, 0x3d349b95a7fae500, 0xe12e9d953a4aaff7,
1497        0x2d4825ab834131ee},
1498       {1, 0, 0, 0}},
1499      {{0x13949c932a1d367f, 0xef7fbd2b1a0a11b7, 0xddc6068bb91dfc60,
1500        0xef9519328a9c72ff},
1501       {0x196035a77376d8a8, 0x23183b0895ca1740, 0xc1ee9807022c219c,
1502        0x611e9fc37dbb2c9b},
1503       {1, 0, 0, 0}},
1504      {{0xcae2b1920b57f4bc, 0x2936df5ec6c9bc36, 0x7dea6482e11238bf,
1505        0x550663797b51f5d8},
1506       {0x44ffe216348a964c, 0x9fb3d576dbdefbe1, 0x0afa40018d9d50e5,
1507        0x157164848aecb851},
1508       {1, 0, 0, 0}},
1509      {{0xe48ecafffc5cde01, 0x7ccd84e70d715f26, 0xa2e8f483f43e4391,
1510        0xeb5d7745b21141ea},
1511       {0xcac917e2731a3479, 0x85f22cfe2844b645, 0x0990e6a158006cee,
1512        0xeafd72ebdbecc17b},
1513       {1, 0, 0, 0}},
1514      {{0x6cf20ffb313728be, 0x96439591a3c6b94a, 0x2736ff8344315fc5,
1515        0xa6d39677a7849276},
1516       {0xf2bab833c357f5f4, 0x824a920c2284059b, 0x66b8babd2d27ecdf,
1517        0x674f84749b0b8816},
1518       {1, 0, 0, 0}},
1519      {{0x2df48c04677c8a3e, 0x74e02f080203a56b, 0x31855f7db8c7fedb,
1520        0x4e769e7672c9ddad},
1521       {0xa4c36165b824bbb0, 0xfb9ae16f3b9122a5, 0x1ec0057206947281,
1522        0x42b99082de830663},
1523       {1, 0, 0, 0}},
1524      {{0x6ef95150dda868b9, 0xd1f89e799c0ce131, 0x7fdc1ca008a1c478,
1525        0x78878ef61c6ce04d},
1526       {0x9c62b9121fe0d976, 0x6ace570ebde08d4f, 0xde53142c12309def,
1527        0xb6cb3f5d7b72c321},
1528       {1, 0, 0, 0}},
1529      {{0x7f991ed2c31a3573, 0x5b82dd5bd54fb496, 0x595c5220812ffcae,
1530        0x0c88bc4d716b1287},
1531       {0x3a57bf635f48aca8, 0x7c8181f4df2564f3, 0x18d1b5b39c04e6aa,
1532        0xdd5ddea3f3901dc6},
1533       {1, 0, 0, 0}},
1534      {{0xe96a79fb3e72ad0c, 0x43a0a28c42ba792f, 0xefe0a423083e49f3,
1535        0x68f344af6b317466},
1536       {0xcdfe17db3fb24d4a, 0x668bfc2271f5c626, 0x604ed93c24d67ff3,
1537        0x31b9c405f8540a20},
1538       {1, 0, 0, 0}},
1539      {{0xd36b4789a2582e7f, 0x0d1a10144ec39c28, 0x663c62c3edbad7a0,
1540        0x4052bf4b6f461db9},
1541       {0x235a27c3188d25eb, 0xe724f33999bfcc5b, 0x862be6bd71d70cc8,
1542        0xfecf4d5190b0fc61},
1543       {1, 0, 0, 0}},
1544      {{0x74346c10a1d4cfac, 0xafdf5cc08526a7a4, 0x123202a8f62bff7a,
1545        0x1eddbae2c802e41a},
1546       {0x8fa0af2dd603f844, 0x36e06b7e4c701917, 0x0c45f45273db33a0,
1547        0x43104d86560ebcfc},
1548       {1, 0, 0, 0}},
1549      {{0x9615b5110d1d78e5, 0x66b0de3225c4744b, 0x0a4a46fb6aaf363a,
1550        0xb48e26b484f7a21c},
1551       {0x06ebb0f621a01b2d, 0xc004e4048b7b0f98, 0x64131bcdfed6f668,
1552        0xfac015404d4d3dab},
1553       {1, 0, 0, 0}}},
1554     {{{0, 0, 0, 0},
1555       {0, 0, 0, 0},
1556       {0, 0, 0, 0}},
1557      {{0x3a5a9e22185a5943, 0x1ab919365c65dfb6, 0x21656b32262c71da,
1558        0x7fe36b40af22af89},
1559       {0xd50d152c699ca101, 0x74b3d5867b8af212, 0x9f09f40407dca6f1,
1560        0xe697d45825b63624},
1561       {1, 0, 0, 0}},
1562      {{0xa84aa9397512218e, 0xe9a521b074ca0141, 0x57880b3a18a2e902,
1563        0x4a5b506612a677a6},
1564       {0x0beada7a4c4f3840, 0x626db15419e26d9d, 0xc42604fbe1627d40,
1565        0xeb13461ceac089f1},
1566       {1, 0, 0, 0}},
1567      {{0xf9faed0927a43281, 0x5e52c4144103ecbc, 0xc342967aa815c857,
1568        0x0781b8291c6a220a},
1569       {0x5a8343ceeac55f80, 0x88f80eeee54a05e3, 0x97b2a14f12916434,
1570        0x690cde8df0151593},
1571       {1, 0, 0, 0}},
1572      {{0xaee9c75df7f82f2a, 0x9e4c35874afdf43a, 0xf5622df437371326,
1573        0x8a535f566ec73617},
1574       {0xc5f9a0ac223094b7, 0xcde533864c8c7669, 0x37e02819085a92bf,
1575        0x0455c08468b08bd7},
1576       {1, 0, 0, 0}},
1577      {{0x0c0a6e2c9477b5d9, 0xf9a4bf62876dc444, 0x5050a949b6cdc279,
1578        0x06bada7ab77f8276},
1579       {0xc8b4aed1ea48dac9, 0xdebd8a4b7ea1070f, 0x427d49101366eb70,
1580        0x5b476dfd0e6cb18a},
1581       {1, 0, 0, 0}},
1582      {{0x7c5c3e44278c340a, 0x4d54606812d66f3b, 0x29a751b1ae23c5d8,
1583        0x3e29864e8a2ec908},
1584       {0x142d2a6626dbb850, 0xad1744c4765bd780, 0x1f150e68e322d1ed,
1585        0x239b90ea3dc31e7e},
1586       {1, 0, 0, 0}},
1587      {{0x78c416527a53322a, 0x305dde6709776f8e, 0xdbcab759f8862ed4,
1588        0x820f4dd949f72ff7},
1589       {0x6cc544a62b5debd4, 0x75be5d937b4e8cc4, 0x1b481b1b215c14d3,
1590        0x140406ec783a05ec},
1591       {1, 0, 0, 0}},
1592      {{0x6a703f10e895df07, 0xfd75f3fa01876bd8, 0xeb5b06e70ce08ffe,
1593        0x68f6b8542783dfee},
1594       {0x90c76f8a78712655, 0xcf5293d2f310bf7f, 0xfbc8044dfda45028,
1595        0xcbe1feba92e40ce6},
1596       {1, 0, 0, 0}},
1597      {{0xe998ceea4396e4c1, 0xfc82ef0b6acea274, 0x230f729f2250e927,
1598        0xd0b2f94d2f420109},
1599       {0x4305adddb38d4966, 0x10b838f8624c3b45, 0x7db2636658954e7a,
1600        0x971459828b0719e5},
1601       {1, 0, 0, 0}},
1602      {{0x4bd6b72623369fc9, 0x57f2929e53d0b876, 0xc2d5cba4f2340687,
1603        0x961610004a866aba},
1604       {0x49997bcd2e407a5e, 0x69ab197d92ddcb24, 0x2cf1f2438fe5131c,
1605        0x7acb9fadcee75e44},
1606       {1, 0, 0, 0}},
1607      {{0x254e839423d2d4c0, 0xf57f0c917aea685b, 0xa60d880f6f75aaea,
1608        0x24eb9acca333bf5b},
1609       {0xe3de4ccb1cda5dea, 0xfeef9341c51a6b4f, 0x743125f88bac4c4d,
1610        0x69f891c5acd079cc},
1611       {1, 0, 0, 0}},
1612      {{0xeee44b35702476b5, 0x7ed031a0e45c2258, 0xb422d1e7bd6f8514,
1613        0xe51f547c5972a107},
1614       {0xa25bcd6fc9cf343d, 0x8ca922ee097c184e, 0xa62f98b3a9fe9a06,
1615        0x1c309a2b25bb1387},
1616       {1, 0, 0, 0}},
1617      {{0x9295dbeb1967c459, 0xb00148833472c98e, 0xc504977708011828,
1618        0x20b87b8aa2c4e503},
1619       {0x3063175de057c277, 0x1bd539338fe582dd, 0x0d11adef5f69a044,
1620        0xf5c6fa49919776be},
1621       {1, 0, 0, 0}},
1622      {{0x8c944e760fd59e11, 0x3876cba1102fad5f, 0xa454c3fad83faa56,
1623        0x1ed7d1b9332010b9},
1624       {0xa1011a270024b889, 0x05e4d0dcac0cd344, 0x52b520f0eb6a2a24,
1625        0x3a2b03f03217257a},
1626       {1, 0, 0, 0}},
1627      {{0xf20fc2afdf1d043d, 0xf330240db58d5a62, 0xfc7d229ca0058c3b,
1628        0x15fee545c78dd9f6},
1629       {0x501e82885bc98cda, 0x41ef80e5d046ac04, 0x557d9f49461210fb,
1630        0x4ab5b6b2b8753f81},
1631       {1, 0, 0, 0}}}
1632 };
1633 
1634 /*
1635  * select_point selects the |idx|th point from a precomputation table and
1636  * copies it to out.
1637  */
select_point(const u64 idx,unsigned int size,const smallfelem pre_comp[16][3],smallfelem out[3])1638 static void select_point(const u64 idx, unsigned int size,
1639                          const smallfelem pre_comp[16][3], smallfelem out[3])
1640 {
1641     unsigned i, j;
1642     u64 *outlimbs = &out[0][0];
1643 
1644     memset(out, 0, sizeof(*out) * 3);
1645 
1646     for (i = 0; i < size; i++) {
1647         const u64 *inlimbs = (u64 *)&pre_comp[i][0][0];
1648         u64 mask = i ^ idx;
1649         mask |= mask >> 4;
1650         mask |= mask >> 2;
1651         mask |= mask >> 1;
1652         mask &= 1;
1653         mask--;
1654         for (j = 0; j < NLIMBS * 3; j++)
1655             outlimbs[j] |= inlimbs[j] & mask;
1656     }
1657 }
1658 
1659 /* get_bit returns the |i|th bit in |in| */
get_bit(const felem_bytearray in,int i)1660 static char get_bit(const felem_bytearray in, int i)
1661 {
1662     if ((i < 0) || (i >= 256))
1663         return 0;
1664     return (in[i >> 3] >> (i & 7)) & 1;
1665 }
1666 
1667 /*
1668  * Interleaved point multiplication using precomputed point multiples: The
1669  * small point multiples 0*P, 1*P, ..., 17*P are in pre_comp[], the scalars
1670  * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1671  * generator, using certain (large) precomputed multiples in g_pre_comp.
1672  * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1673  */
batch_mul(felem x_out,felem y_out,felem z_out,const felem_bytearray scalars[],const unsigned num_points,const u8 * g_scalar,const int mixed,const smallfelem pre_comp[][17][3],const smallfelem g_pre_comp[2][16][3])1674 static void batch_mul(felem x_out, felem y_out, felem z_out,
1675                       const felem_bytearray scalars[],
1676                       const unsigned num_points, const u8 *g_scalar,
1677                       const int mixed, const smallfelem pre_comp[][17][3],
1678                       const smallfelem g_pre_comp[2][16][3])
1679 {
1680     int i, skip;
1681     unsigned num, gen_mul = (g_scalar != NULL);
1682     felem nq[3], ftmp;
1683     smallfelem tmp[3];
1684     u64 bits;
1685     u8 sign, digit;
1686 
1687     /* set nq to the point at infinity */
1688     memset(nq, 0, sizeof(nq));
1689 
1690     /*
1691      * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1692      * of the generator (two in each of the last 32 rounds) and additions of
1693      * other points multiples (every 5th round).
1694      */
1695     skip = 1;                   /* save two point operations in the first
1696                                  * round */
1697     for (i = (num_points ? 255 : 31); i >= 0; --i) {
1698         /* double */
1699         if (!skip)
1700             point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1701 
1702         /* add multiples of the generator */
1703         if (gen_mul && (i <= 31)) {
1704             /* first, look 32 bits upwards */
1705             bits = get_bit(g_scalar, i + 224) << 3;
1706             bits |= get_bit(g_scalar, i + 160) << 2;
1707             bits |= get_bit(g_scalar, i + 96) << 1;
1708             bits |= get_bit(g_scalar, i + 32);
1709             /* select the point to add, in constant time */
1710             select_point(bits, 16, g_pre_comp[1], tmp);
1711 
1712             if (!skip) {
1713                 /* Arg 1 below is for "mixed" */
1714                 point_add(nq[0], nq[1], nq[2],
1715                           nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1716             } else {
1717                 smallfelem_expand(nq[0], tmp[0]);
1718                 smallfelem_expand(nq[1], tmp[1]);
1719                 smallfelem_expand(nq[2], tmp[2]);
1720                 skip = 0;
1721             }
1722 
1723             /* second, look at the current position */
1724             bits = get_bit(g_scalar, i + 192) << 3;
1725             bits |= get_bit(g_scalar, i + 128) << 2;
1726             bits |= get_bit(g_scalar, i + 64) << 1;
1727             bits |= get_bit(g_scalar, i);
1728             /* select the point to add, in constant time */
1729             select_point(bits, 16, g_pre_comp[0], tmp);
1730             /* Arg 1 below is for "mixed" */
1731             point_add(nq[0], nq[1], nq[2],
1732                       nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1733         }
1734 
1735         /* do other additions every 5 doublings */
1736         if (num_points && (i % 5 == 0)) {
1737             /* loop over all scalars */
1738             for (num = 0; num < num_points; ++num) {
1739                 bits = get_bit(scalars[num], i + 4) << 5;
1740                 bits |= get_bit(scalars[num], i + 3) << 4;
1741                 bits |= get_bit(scalars[num], i + 2) << 3;
1742                 bits |= get_bit(scalars[num], i + 1) << 2;
1743                 bits |= get_bit(scalars[num], i) << 1;
1744                 bits |= get_bit(scalars[num], i - 1);
1745                 ossl_ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1746 
1747                 /*
1748                  * select the point to add or subtract, in constant time
1749                  */
1750                 select_point(digit, 17, pre_comp[num], tmp);
1751                 smallfelem_neg(ftmp, tmp[1]); /* (X, -Y, Z) is the negative
1752                                                * point */
1753                 copy_small_conditional(ftmp, tmp[1], (((limb) sign) - 1));
1754                 felem_contract(tmp[1], ftmp);
1755 
1756                 if (!skip) {
1757                     point_add(nq[0], nq[1], nq[2],
1758                               nq[0], nq[1], nq[2],
1759                               mixed, tmp[0], tmp[1], tmp[2]);
1760                 } else {
1761                     smallfelem_expand(nq[0], tmp[0]);
1762                     smallfelem_expand(nq[1], tmp[1]);
1763                     smallfelem_expand(nq[2], tmp[2]);
1764                     skip = 0;
1765                 }
1766             }
1767         }
1768     }
1769     felem_assign(x_out, nq[0]);
1770     felem_assign(y_out, nq[1]);
1771     felem_assign(z_out, nq[2]);
1772 }
1773 
1774 /* Precomputation for the group generator. */
1775 struct nistp256_pre_comp_st {
1776     smallfelem g_pre_comp[2][16][3];
1777     CRYPTO_REF_COUNT references;
1778 };
1779 
EC_GFp_nistp256_method(void)1780 const EC_METHOD *EC_GFp_nistp256_method(void)
1781 {
1782     static const EC_METHOD ret = {
1783         EC_FLAGS_DEFAULT_OCT,
1784         NID_X9_62_prime_field,
1785         ossl_ec_GFp_nistp256_group_init,
1786         ossl_ec_GFp_simple_group_finish,
1787         ossl_ec_GFp_simple_group_clear_finish,
1788         ossl_ec_GFp_nist_group_copy,
1789         ossl_ec_GFp_nistp256_group_set_curve,
1790         ossl_ec_GFp_simple_group_get_curve,
1791         ossl_ec_GFp_simple_group_get_degree,
1792         ossl_ec_group_simple_order_bits,
1793         ossl_ec_GFp_simple_group_check_discriminant,
1794         ossl_ec_GFp_simple_point_init,
1795         ossl_ec_GFp_simple_point_finish,
1796         ossl_ec_GFp_simple_point_clear_finish,
1797         ossl_ec_GFp_simple_point_copy,
1798         ossl_ec_GFp_simple_point_set_to_infinity,
1799         ossl_ec_GFp_simple_point_set_affine_coordinates,
1800         ossl_ec_GFp_nistp256_point_get_affine_coordinates,
1801         0 /* point_set_compressed_coordinates */ ,
1802         0 /* point2oct */ ,
1803         0 /* oct2point */ ,
1804         ossl_ec_GFp_simple_add,
1805         ossl_ec_GFp_simple_dbl,
1806         ossl_ec_GFp_simple_invert,
1807         ossl_ec_GFp_simple_is_at_infinity,
1808         ossl_ec_GFp_simple_is_on_curve,
1809         ossl_ec_GFp_simple_cmp,
1810         ossl_ec_GFp_simple_make_affine,
1811         ossl_ec_GFp_simple_points_make_affine,
1812         ossl_ec_GFp_nistp256_points_mul,
1813         ossl_ec_GFp_nistp256_precompute_mult,
1814         ossl_ec_GFp_nistp256_have_precompute_mult,
1815         ossl_ec_GFp_nist_field_mul,
1816         ossl_ec_GFp_nist_field_sqr,
1817         0 /* field_div */ ,
1818         ossl_ec_GFp_simple_field_inv,
1819         0 /* field_encode */ ,
1820         0 /* field_decode */ ,
1821         0,                      /* field_set_to_one */
1822         ossl_ec_key_simple_priv2oct,
1823         ossl_ec_key_simple_oct2priv,
1824         0, /* set private */
1825         ossl_ec_key_simple_generate_key,
1826         ossl_ec_key_simple_check_key,
1827         ossl_ec_key_simple_generate_public_key,
1828         0, /* keycopy */
1829         0, /* keyfinish */
1830         ossl_ecdh_simple_compute_key,
1831         ossl_ecdsa_simple_sign_setup,
1832         ossl_ecdsa_simple_sign_sig,
1833         ossl_ecdsa_simple_verify_sig,
1834         0, /* field_inverse_mod_ord */
1835         0, /* blind_coordinates */
1836         0, /* ladder_pre */
1837         0, /* ladder_step */
1838         0  /* ladder_post */
1839     };
1840 
1841     return &ret;
1842 }
1843 
1844 /******************************************************************************/
1845 /*
1846  * FUNCTIONS TO MANAGE PRECOMPUTATION
1847  */
1848 
nistp256_pre_comp_new(void)1849 static NISTP256_PRE_COMP *nistp256_pre_comp_new(void)
1850 {
1851     NISTP256_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1852 
1853     if (ret == NULL)
1854         return ret;
1855 
1856     if (!CRYPTO_NEW_REF(&ret->references, 1)) {
1857         OPENSSL_free(ret);
1858         return NULL;
1859     }
1860     return ret;
1861 }
1862 
EC_nistp256_pre_comp_dup(NISTP256_PRE_COMP * p)1863 NISTP256_PRE_COMP *EC_nistp256_pre_comp_dup(NISTP256_PRE_COMP *p)
1864 {
1865     int i;
1866     if (p != NULL)
1867         CRYPTO_UP_REF(&p->references, &i);
1868     return p;
1869 }
1870 
EC_nistp256_pre_comp_free(NISTP256_PRE_COMP * pre)1871 void EC_nistp256_pre_comp_free(NISTP256_PRE_COMP *pre)
1872 {
1873     int i;
1874 
1875     if (pre == NULL)
1876         return;
1877 
1878     CRYPTO_DOWN_REF(&pre->references, &i);
1879     REF_PRINT_COUNT("EC_nistp256", pre);
1880     if (i > 0)
1881         return;
1882     REF_ASSERT_ISNT(i < 0);
1883 
1884     CRYPTO_FREE_REF(&pre->references);
1885     OPENSSL_free(pre);
1886 }
1887 
1888 /******************************************************************************/
1889 /*
1890  * OPENSSL EC_METHOD FUNCTIONS
1891  */
1892 
ossl_ec_GFp_nistp256_group_init(EC_GROUP * group)1893 int ossl_ec_GFp_nistp256_group_init(EC_GROUP *group)
1894 {
1895     int ret;
1896     ret = ossl_ec_GFp_simple_group_init(group);
1897     group->a_is_minus3 = 1;
1898     return ret;
1899 }
1900 
ossl_ec_GFp_nistp256_group_set_curve(EC_GROUP * group,const BIGNUM * p,const BIGNUM * a,const BIGNUM * b,BN_CTX * ctx)1901 int ossl_ec_GFp_nistp256_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1902                                          const BIGNUM *a, const BIGNUM *b,
1903                                          BN_CTX *ctx)
1904 {
1905     int ret = 0;
1906     BIGNUM *curve_p, *curve_a, *curve_b;
1907 #ifndef FIPS_MODULE
1908     BN_CTX *new_ctx = NULL;
1909 
1910     if (ctx == NULL)
1911         ctx = new_ctx = BN_CTX_new();
1912 #endif
1913     if (ctx == NULL)
1914         return 0;
1915 
1916     BN_CTX_start(ctx);
1917     curve_p = BN_CTX_get(ctx);
1918     curve_a = BN_CTX_get(ctx);
1919     curve_b = BN_CTX_get(ctx);
1920     if (curve_b == NULL)
1921         goto err;
1922     BN_bin2bn(nistp256_curve_params[0], sizeof(felem_bytearray), curve_p);
1923     BN_bin2bn(nistp256_curve_params[1], sizeof(felem_bytearray), curve_a);
1924     BN_bin2bn(nistp256_curve_params[2], sizeof(felem_bytearray), curve_b);
1925     if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1926         ERR_raise(ERR_LIB_EC, EC_R_WRONG_CURVE_PARAMETERS);
1927         goto err;
1928     }
1929     group->field_mod_func = BN_nist_mod_256;
1930     ret = ossl_ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1931  err:
1932     BN_CTX_end(ctx);
1933 #ifndef FIPS_MODULE
1934     BN_CTX_free(new_ctx);
1935 #endif
1936     return ret;
1937 }
1938 
1939 /*
1940  * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1941  * (X/Z^2, Y/Z^3)
1942  */
ossl_ec_GFp_nistp256_point_get_affine_coordinates(const EC_GROUP * group,const EC_POINT * point,BIGNUM * x,BIGNUM * y,BN_CTX * ctx)1943 int ossl_ec_GFp_nistp256_point_get_affine_coordinates(const EC_GROUP *group,
1944                                                       const EC_POINT *point,
1945                                                       BIGNUM *x, BIGNUM *y,
1946                                                       BN_CTX *ctx)
1947 {
1948     felem z1, z2, x_in, y_in;
1949     smallfelem x_out, y_out;
1950     longfelem tmp;
1951 
1952     if (EC_POINT_is_at_infinity(group, point)) {
1953         ERR_raise(ERR_LIB_EC, EC_R_POINT_AT_INFINITY);
1954         return 0;
1955     }
1956     if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1957         (!BN_to_felem(z1, point->Z)))
1958         return 0;
1959     felem_inv(z2, z1);
1960     felem_square(tmp, z2);
1961     felem_reduce(z1, tmp);
1962     felem_mul(tmp, x_in, z1);
1963     felem_reduce(x_in, tmp);
1964     felem_contract(x_out, x_in);
1965     if (x != NULL) {
1966         if (!smallfelem_to_BN(x, x_out)) {
1967             ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1968             return 0;
1969         }
1970     }
1971     felem_mul(tmp, z1, z2);
1972     felem_reduce(z1, tmp);
1973     felem_mul(tmp, y_in, z1);
1974     felem_reduce(y_in, tmp);
1975     felem_contract(y_out, y_in);
1976     if (y != NULL) {
1977         if (!smallfelem_to_BN(y, y_out)) {
1978             ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1979             return 0;
1980         }
1981     }
1982     return 1;
1983 }
1984 
1985 /* points below is of size |num|, and tmp_smallfelems is of size |num+1| */
make_points_affine(size_t num,smallfelem points[][3],smallfelem tmp_smallfelems[])1986 static void make_points_affine(size_t num, smallfelem points[][3],
1987                                smallfelem tmp_smallfelems[])
1988 {
1989     /*
1990      * Runs in constant time, unless an input is the point at infinity (which
1991      * normally shouldn't happen).
1992      */
1993     ossl_ec_GFp_nistp_points_make_affine_internal(num,
1994                                                   points,
1995                                                   sizeof(smallfelem),
1996                                                   tmp_smallfelems,
1997                                                   (void (*)(void *))smallfelem_one,
1998                                                   smallfelem_is_zero_int,
1999                                                   (void (*)(void *, const void *))
2000                                                   smallfelem_assign,
2001                                                   (void (*)(void *, const void *))
2002                                                   smallfelem_square_contract,
2003                                                   (void (*)
2004                                                    (void *, const void *,
2005                                                     const void *))
2006                                                   smallfelem_mul_contract,
2007                                                   (void (*)(void *, const void *))
2008                                                   smallfelem_inv_contract,
2009                                                   /* nothing to contract */
2010                                                   (void (*)(void *, const void *))
2011                                                   smallfelem_assign);
2012 }
2013 
2014 /*
2015  * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
2016  * values Result is stored in r (r can equal one of the inputs).
2017  */
ossl_ec_GFp_nistp256_points_mul(const EC_GROUP * group,EC_POINT * r,const BIGNUM * scalar,size_t num,const EC_POINT * points[],const BIGNUM * scalars[],BN_CTX * ctx)2018 int ossl_ec_GFp_nistp256_points_mul(const EC_GROUP *group, EC_POINT *r,
2019                                     const BIGNUM *scalar, size_t num,
2020                                     const EC_POINT *points[],
2021                                     const BIGNUM *scalars[], BN_CTX *ctx)
2022 {
2023     int ret = 0;
2024     int j;
2025     int mixed = 0;
2026     BIGNUM *x, *y, *z, *tmp_scalar;
2027     felem_bytearray g_secret;
2028     felem_bytearray *secrets = NULL;
2029     smallfelem (*pre_comp)[17][3] = NULL;
2030     smallfelem *tmp_smallfelems = NULL;
2031     unsigned i;
2032     int num_bytes;
2033     int have_pre_comp = 0;
2034     size_t num_points = num;
2035     smallfelem x_in, y_in, z_in;
2036     felem x_out, y_out, z_out;
2037     NISTP256_PRE_COMP *pre = NULL;
2038     const smallfelem(*g_pre_comp)[16][3] = NULL;
2039     EC_POINT *generator = NULL;
2040     const EC_POINT *p = NULL;
2041     const BIGNUM *p_scalar = NULL;
2042 
2043     BN_CTX_start(ctx);
2044     x = BN_CTX_get(ctx);
2045     y = BN_CTX_get(ctx);
2046     z = BN_CTX_get(ctx);
2047     tmp_scalar = BN_CTX_get(ctx);
2048     if (tmp_scalar == NULL)
2049         goto err;
2050 
2051     if (scalar != NULL) {
2052         pre = group->pre_comp.nistp256;
2053         if (pre)
2054             /* we have precomputation, try to use it */
2055             g_pre_comp = (const smallfelem(*)[16][3])pre->g_pre_comp;
2056         else
2057             /* try to use the standard precomputation */
2058             g_pre_comp = &gmul[0];
2059         generator = EC_POINT_new(group);
2060         if (generator == NULL)
2061             goto err;
2062         /* get the generator from precomputation */
2063         if (!smallfelem_to_BN(x, g_pre_comp[0][1][0]) ||
2064             !smallfelem_to_BN(y, g_pre_comp[0][1][1]) ||
2065             !smallfelem_to_BN(z, g_pre_comp[0][1][2])) {
2066             ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
2067             goto err;
2068         }
2069         if (!ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group,
2070                                                                 generator,
2071                                                                 x, y, z, ctx))
2072             goto err;
2073         if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
2074             /* precomputation matches generator */
2075             have_pre_comp = 1;
2076         else
2077             /*
2078              * we don't have valid precomputation: treat the generator as a
2079              * random point
2080              */
2081             num_points++;
2082     }
2083     if (num_points > 0) {
2084         if (num_points >= 3) {
2085             /*
2086              * unless we precompute multiples for just one or two points,
2087              * converting those into affine form is time well spent
2088              */
2089             mixed = 1;
2090         }
2091         secrets = OPENSSL_malloc(sizeof(*secrets) * num_points);
2092         pre_comp = OPENSSL_malloc(sizeof(*pre_comp) * num_points);
2093         if (mixed)
2094             tmp_smallfelems =
2095               OPENSSL_malloc(sizeof(*tmp_smallfelems) * (num_points * 17 + 1));
2096         if ((secrets == NULL) || (pre_comp == NULL)
2097             || (mixed && (tmp_smallfelems == NULL)))
2098             goto err;
2099 
2100         /*
2101          * we treat NULL scalars as 0, and NULL points as points at infinity,
2102          * i.e., they contribute nothing to the linear combination
2103          */
2104         memset(secrets, 0, sizeof(*secrets) * num_points);
2105         memset(pre_comp, 0, sizeof(*pre_comp) * num_points);
2106         for (i = 0; i < num_points; ++i) {
2107             if (i == num) {
2108                 /*
2109                  * we didn't have a valid precomputation, so we pick the
2110                  * generator
2111                  */
2112                 p = EC_GROUP_get0_generator(group);
2113                 p_scalar = scalar;
2114             } else {
2115                 /* the i^th point */
2116                 p = points[i];
2117                 p_scalar = scalars[i];
2118             }
2119             if ((p_scalar != NULL) && (p != NULL)) {
2120                 /* reduce scalar to 0 <= scalar < 2^256 */
2121                 if ((BN_num_bits(p_scalar) > 256)
2122                     || (BN_is_negative(p_scalar))) {
2123                     /*
2124                      * this is an unusual input, and we don't guarantee
2125                      * constant-timeness
2126                      */
2127                     if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
2128                         ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
2129                         goto err;
2130                     }
2131                     num_bytes = BN_bn2lebinpad(tmp_scalar,
2132                                                secrets[i], sizeof(secrets[i]));
2133                 } else {
2134                     num_bytes = BN_bn2lebinpad(p_scalar,
2135                                                secrets[i], sizeof(secrets[i]));
2136                 }
2137                 if (num_bytes < 0) {
2138                     ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
2139                     goto err;
2140                 }
2141                 /* precompute multiples */
2142                 if ((!BN_to_felem(x_out, p->X)) ||
2143                     (!BN_to_felem(y_out, p->Y)) ||
2144                     (!BN_to_felem(z_out, p->Z)))
2145                     goto err;
2146                 felem_shrink(pre_comp[i][1][0], x_out);
2147                 felem_shrink(pre_comp[i][1][1], y_out);
2148                 felem_shrink(pre_comp[i][1][2], z_out);
2149                 for (j = 2; j <= 16; ++j) {
2150                     if (j & 1) {
2151                         point_add_small(pre_comp[i][j][0], pre_comp[i][j][1],
2152                                         pre_comp[i][j][2], pre_comp[i][1][0],
2153                                         pre_comp[i][1][1], pre_comp[i][1][2],
2154                                         pre_comp[i][j - 1][0],
2155                                         pre_comp[i][j - 1][1],
2156                                         pre_comp[i][j - 1][2]);
2157                     } else {
2158                         point_double_small(pre_comp[i][j][0],
2159                                            pre_comp[i][j][1],
2160                                            pre_comp[i][j][2],
2161                                            pre_comp[i][j / 2][0],
2162                                            pre_comp[i][j / 2][1],
2163                                            pre_comp[i][j / 2][2]);
2164                     }
2165                 }
2166             }
2167         }
2168         if (mixed)
2169             make_points_affine(num_points * 17, pre_comp[0], tmp_smallfelems);
2170     }
2171 
2172     /* the scalar for the generator */
2173     if ((scalar != NULL) && (have_pre_comp)) {
2174         memset(g_secret, 0, sizeof(g_secret));
2175         /* reduce scalar to 0 <= scalar < 2^256 */
2176         if ((BN_num_bits(scalar) > 256) || (BN_is_negative(scalar))) {
2177             /*
2178              * this is an unusual input, and we don't guarantee
2179              * constant-timeness
2180              */
2181             if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
2182                 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
2183                 goto err;
2184             }
2185             num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));
2186         } else {
2187             num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret));
2188         }
2189         /* do the multiplication with generator precomputation */
2190         batch_mul(x_out, y_out, z_out,
2191                   (const felem_bytearray(*))secrets, num_points,
2192                   g_secret,
2193                   mixed, (const smallfelem(*)[17][3])pre_comp, g_pre_comp);
2194     } else {
2195         /* do the multiplication without generator precomputation */
2196         batch_mul(x_out, y_out, z_out,
2197                   (const felem_bytearray(*))secrets, num_points,
2198                   NULL, mixed, (const smallfelem(*)[17][3])pre_comp, NULL);
2199     }
2200     /* reduce the output to its unique minimal representation */
2201     felem_contract(x_in, x_out);
2202     felem_contract(y_in, y_out);
2203     felem_contract(z_in, z_out);
2204     if ((!smallfelem_to_BN(x, x_in)) || (!smallfelem_to_BN(y, y_in)) ||
2205         (!smallfelem_to_BN(z, z_in))) {
2206         ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
2207         goto err;
2208     }
2209     ret = ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group, r, x, y, z,
2210                                                              ctx);
2211 
2212  err:
2213     BN_CTX_end(ctx);
2214     EC_POINT_free(generator);
2215     OPENSSL_free(secrets);
2216     OPENSSL_free(pre_comp);
2217     OPENSSL_free(tmp_smallfelems);
2218     return ret;
2219 }
2220 
ossl_ec_GFp_nistp256_precompute_mult(EC_GROUP * group,BN_CTX * ctx)2221 int ossl_ec_GFp_nistp256_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
2222 {
2223     int ret = 0;
2224     NISTP256_PRE_COMP *pre = NULL;
2225     int i, j;
2226     BIGNUM *x, *y;
2227     EC_POINT *generator = NULL;
2228     smallfelem tmp_smallfelems[32];
2229     felem x_tmp, y_tmp, z_tmp;
2230 #ifndef FIPS_MODULE
2231     BN_CTX *new_ctx = NULL;
2232 #endif
2233 
2234     /* throw away old precomputation */
2235     EC_pre_comp_free(group);
2236 
2237 #ifndef FIPS_MODULE
2238     if (ctx == NULL)
2239         ctx = new_ctx = BN_CTX_new();
2240 #endif
2241     if (ctx == NULL)
2242         return 0;
2243 
2244     BN_CTX_start(ctx);
2245     x = BN_CTX_get(ctx);
2246     y = BN_CTX_get(ctx);
2247     if (y == NULL)
2248         goto err;
2249     /* get the generator */
2250     if (group->generator == NULL)
2251         goto err;
2252     generator = EC_POINT_new(group);
2253     if (generator == NULL)
2254         goto err;
2255     BN_bin2bn(nistp256_curve_params[3], sizeof(felem_bytearray), x);
2256     BN_bin2bn(nistp256_curve_params[4], sizeof(felem_bytearray), y);
2257     if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))
2258         goto err;
2259     if ((pre = nistp256_pre_comp_new()) == NULL)
2260         goto err;
2261     /*
2262      * if the generator is the standard one, use built-in precomputation
2263      */
2264     if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
2265         memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
2266         goto done;
2267     }
2268     if ((!BN_to_felem(x_tmp, group->generator->X)) ||
2269         (!BN_to_felem(y_tmp, group->generator->Y)) ||
2270         (!BN_to_felem(z_tmp, group->generator->Z)))
2271         goto err;
2272     felem_shrink(pre->g_pre_comp[0][1][0], x_tmp);
2273     felem_shrink(pre->g_pre_comp[0][1][1], y_tmp);
2274     felem_shrink(pre->g_pre_comp[0][1][2], z_tmp);
2275     /*
2276      * compute 2^64*G, 2^128*G, 2^192*G for the first table, 2^32*G, 2^96*G,
2277      * 2^160*G, 2^224*G for the second one
2278      */
2279     for (i = 1; i <= 8; i <<= 1) {
2280         point_double_small(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
2281                            pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
2282                            pre->g_pre_comp[0][i][1],
2283                            pre->g_pre_comp[0][i][2]);
2284         for (j = 0; j < 31; ++j) {
2285             point_double_small(pre->g_pre_comp[1][i][0],
2286                                pre->g_pre_comp[1][i][1],
2287                                pre->g_pre_comp[1][i][2],
2288                                pre->g_pre_comp[1][i][0],
2289                                pre->g_pre_comp[1][i][1],
2290                                pre->g_pre_comp[1][i][2]);
2291         }
2292         if (i == 8)
2293             break;
2294         point_double_small(pre->g_pre_comp[0][2 * i][0],
2295                            pre->g_pre_comp[0][2 * i][1],
2296                            pre->g_pre_comp[0][2 * i][2],
2297                            pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
2298                            pre->g_pre_comp[1][i][2]);
2299         for (j = 0; j < 31; ++j) {
2300             point_double_small(pre->g_pre_comp[0][2 * i][0],
2301                                pre->g_pre_comp[0][2 * i][1],
2302                                pre->g_pre_comp[0][2 * i][2],
2303                                pre->g_pre_comp[0][2 * i][0],
2304                                pre->g_pre_comp[0][2 * i][1],
2305                                pre->g_pre_comp[0][2 * i][2]);
2306         }
2307     }
2308     for (i = 0; i < 2; i++) {
2309         /* g_pre_comp[i][0] is the point at infinity */
2310         memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
2311         /* the remaining multiples */
2312         /* 2^64*G + 2^128*G resp. 2^96*G + 2^160*G */
2313         point_add_small(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
2314                         pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
2315                         pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
2316                         pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
2317                         pre->g_pre_comp[i][2][2]);
2318         /* 2^64*G + 2^192*G resp. 2^96*G + 2^224*G */
2319         point_add_small(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
2320                         pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
2321                         pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2322                         pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
2323                         pre->g_pre_comp[i][2][2]);
2324         /* 2^128*G + 2^192*G resp. 2^160*G + 2^224*G */
2325         point_add_small(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
2326                         pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
2327                         pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2328                         pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
2329                         pre->g_pre_comp[i][4][2]);
2330         /*
2331          * 2^64*G + 2^128*G + 2^192*G resp. 2^96*G + 2^160*G + 2^224*G
2332          */
2333         point_add_small(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
2334                         pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
2335                         pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
2336                         pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
2337                         pre->g_pre_comp[i][2][2]);
2338         for (j = 1; j < 8; ++j) {
2339             /* odd multiples: add G resp. 2^32*G */
2340             point_add_small(pre->g_pre_comp[i][2 * j + 1][0],
2341                             pre->g_pre_comp[i][2 * j + 1][1],
2342                             pre->g_pre_comp[i][2 * j + 1][2],
2343                             pre->g_pre_comp[i][2 * j][0],
2344                             pre->g_pre_comp[i][2 * j][1],
2345                             pre->g_pre_comp[i][2 * j][2],
2346                             pre->g_pre_comp[i][1][0],
2347                             pre->g_pre_comp[i][1][1],
2348                             pre->g_pre_comp[i][1][2]);
2349         }
2350     }
2351     make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_smallfelems);
2352 
2353  done:
2354     SETPRECOMP(group, nistp256, pre);
2355     pre = NULL;
2356     ret = 1;
2357 
2358  err:
2359     BN_CTX_end(ctx);
2360     EC_POINT_free(generator);
2361 #ifndef FIPS_MODULE
2362     BN_CTX_free(new_ctx);
2363 #endif
2364     EC_nistp256_pre_comp_free(pre);
2365     return ret;
2366 }
2367 
ossl_ec_GFp_nistp256_have_precompute_mult(const EC_GROUP * group)2368 int ossl_ec_GFp_nistp256_have_precompute_mult(const EC_GROUP *group)
2369 {
2370     return HAVEPRECOMP(group, nistp256);
2371 }
2372