1 /*
2 * Copyright 2011-2024 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the Apache License 2.0 (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
8 */
9
10 /* Copyright 2011 Google Inc.
11 *
12 * Licensed under the Apache License, Version 2.0 (the "License");
13 *
14 * you may not use this file except in compliance with the License.
15 * You may obtain a copy of the License at
16 *
17 * http://www.apache.org/licenses/LICENSE-2.0
18 *
19 * Unless required by applicable law or agreed to in writing, software
20 * distributed under the License is distributed on an "AS IS" BASIS,
21 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22 * See the License for the specific language governing permissions and
23 * limitations under the License.
24 */
25
26 /*
27 * ECDSA low level APIs are deprecated for public use, but still ok for
28 * internal use.
29 */
30 #include "internal/deprecated.h"
31
32 /*
33 * A 64-bit implementation of the NIST P-256 elliptic curve point multiplication
34 *
35 * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c.
36 * Otherwise based on Emilia's P224 work, which was inspired by my curve25519
37 * work which got its smarts from Daniel J. Bernstein's work on the same.
38 */
39
40 #include <openssl/opensslconf.h>
41
42 #include <stdint.h>
43 #include <string.h>
44 #include <openssl/err.h>
45 #include "ec_local.h"
46
47 #include "internal/numbers.h"
48
49 #ifndef INT128_MAX
50 # error "Your compiler doesn't appear to support 128-bit integer types"
51 #endif
52
53 typedef uint8_t u8;
54 typedef uint32_t u32;
55 typedef uint64_t u64;
56
57 /*
58 * The underlying field. P256 operates over GF(2^256-2^224+2^192+2^96-1). We
59 * can serialize an element of this field into 32 bytes. We call this an
60 * felem_bytearray.
61 */
62
63 typedef u8 felem_bytearray[32];
64
65 /*
66 * These are the parameters of P256, taken from FIPS 186-3, page 86. These
67 * values are big-endian.
68 */
69 static const felem_bytearray nistp256_curve_params[5] = {
70 {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* p */
71 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
72 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
73 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
74 {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* a = -3 */
75 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
76 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
77 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc},
78 {0x5a, 0xc6, 0x35, 0xd8, 0xaa, 0x3a, 0x93, 0xe7, /* b */
79 0xb3, 0xeb, 0xbd, 0x55, 0x76, 0x98, 0x86, 0xbc,
80 0x65, 0x1d, 0x06, 0xb0, 0xcc, 0x53, 0xb0, 0xf6,
81 0x3b, 0xce, 0x3c, 0x3e, 0x27, 0xd2, 0x60, 0x4b},
82 {0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47, /* x */
83 0xf8, 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2,
84 0x77, 0x03, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 0xa0,
85 0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96},
86 {0x4f, 0xe3, 0x42, 0xe2, 0xfe, 0x1a, 0x7f, 0x9b, /* y */
87 0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0x0f, 0x9e, 0x16,
88 0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce,
89 0xcb, 0xb6, 0x40, 0x68, 0x37, 0xbf, 0x51, 0xf5}
90 };
91
92 /*-
93 * The representation of field elements.
94 * ------------------------------------
95 *
96 * We represent field elements with either four 128-bit values, eight 128-bit
97 * values, or four 64-bit values. The field element represented is:
98 * v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + v[3]*2^192 (mod p)
99 * or:
100 * v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + ... + v[7]*2^448 (mod p)
101 *
102 * 128-bit values are called 'limbs'. Since the limbs are spaced only 64 bits
103 * apart, but are 128-bits wide, the most significant bits of each limb overlap
104 * with the least significant bits of the next.
105 *
106 * A field element with four limbs is an 'felem'. One with eight limbs is a
107 * 'longfelem'
108 *
109 * A field element with four, 64-bit values is called a 'smallfelem'. Small
110 * values are used as intermediate values before multiplication.
111 */
112
113 #define NLIMBS 4
114
115 typedef uint128_t limb;
116 typedef limb felem[NLIMBS];
117 typedef limb longfelem[NLIMBS * 2];
118 typedef u64 smallfelem[NLIMBS];
119
120 /* This is the value of the prime as four 64-bit words, little-endian. */
121 static const u64 kPrime[4] = {
122 0xfffffffffffffffful, 0xffffffff, 0, 0xffffffff00000001ul
123 };
124 static const u64 bottom63bits = 0x7ffffffffffffffful;
125
126 /*
127 * bin32_to_felem takes a little-endian byte array and converts it into felem
128 * form. This assumes that the CPU is little-endian.
129 */
bin32_to_felem(felem out,const u8 in[32])130 static void bin32_to_felem(felem out, const u8 in[32])
131 {
132 out[0] = *((u64 *)&in[0]);
133 out[1] = *((u64 *)&in[8]);
134 out[2] = *((u64 *)&in[16]);
135 out[3] = *((u64 *)&in[24]);
136 }
137
138 /*
139 * smallfelem_to_bin32 takes a smallfelem and serializes into a little
140 * endian, 32 byte array. This assumes that the CPU is little-endian.
141 */
smallfelem_to_bin32(u8 out[32],const smallfelem in)142 static void smallfelem_to_bin32(u8 out[32], const smallfelem in)
143 {
144 *((u64 *)&out[0]) = in[0];
145 *((u64 *)&out[8]) = in[1];
146 *((u64 *)&out[16]) = in[2];
147 *((u64 *)&out[24]) = in[3];
148 }
149
150 /* BN_to_felem converts an OpenSSL BIGNUM into an felem */
BN_to_felem(felem out,const BIGNUM * bn)151 static int BN_to_felem(felem out, const BIGNUM *bn)
152 {
153 felem_bytearray b_out;
154 int num_bytes;
155
156 if (BN_is_negative(bn)) {
157 ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
158 return 0;
159 }
160 num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out));
161 if (num_bytes < 0) {
162 ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
163 return 0;
164 }
165 bin32_to_felem(out, b_out);
166 return 1;
167 }
168
169 /* felem_to_BN converts an felem into an OpenSSL BIGNUM */
smallfelem_to_BN(BIGNUM * out,const smallfelem in)170 static BIGNUM *smallfelem_to_BN(BIGNUM *out, const smallfelem in)
171 {
172 felem_bytearray b_out;
173 smallfelem_to_bin32(b_out, in);
174 return BN_lebin2bn(b_out, sizeof(b_out), out);
175 }
176
177 /*-
178 * Field operations
179 * ----------------
180 */
181
smallfelem_one(smallfelem out)182 static void smallfelem_one(smallfelem out)
183 {
184 out[0] = 1;
185 out[1] = 0;
186 out[2] = 0;
187 out[3] = 0;
188 }
189
smallfelem_assign(smallfelem out,const smallfelem in)190 static void smallfelem_assign(smallfelem out, const smallfelem in)
191 {
192 out[0] = in[0];
193 out[1] = in[1];
194 out[2] = in[2];
195 out[3] = in[3];
196 }
197
felem_assign(felem out,const felem in)198 static void felem_assign(felem out, const felem in)
199 {
200 out[0] = in[0];
201 out[1] = in[1];
202 out[2] = in[2];
203 out[3] = in[3];
204 }
205
206 /* felem_sum sets out = out + in. */
felem_sum(felem out,const felem in)207 static void felem_sum(felem out, const felem in)
208 {
209 out[0] += in[0];
210 out[1] += in[1];
211 out[2] += in[2];
212 out[3] += in[3];
213 }
214
215 /* felem_small_sum sets out = out + in. */
felem_small_sum(felem out,const smallfelem in)216 static void felem_small_sum(felem out, const smallfelem in)
217 {
218 out[0] += in[0];
219 out[1] += in[1];
220 out[2] += in[2];
221 out[3] += in[3];
222 }
223
224 /* felem_scalar sets out = out * scalar */
felem_scalar(felem out,const u64 scalar)225 static void felem_scalar(felem out, const u64 scalar)
226 {
227 out[0] *= scalar;
228 out[1] *= scalar;
229 out[2] *= scalar;
230 out[3] *= scalar;
231 }
232
233 /* longfelem_scalar sets out = out * scalar */
longfelem_scalar(longfelem out,const u64 scalar)234 static void longfelem_scalar(longfelem out, const u64 scalar)
235 {
236 out[0] *= scalar;
237 out[1] *= scalar;
238 out[2] *= scalar;
239 out[3] *= scalar;
240 out[4] *= scalar;
241 out[5] *= scalar;
242 out[6] *= scalar;
243 out[7] *= scalar;
244 }
245
246 #define two105m41m9 (((limb)1) << 105) - (((limb)1) << 41) - (((limb)1) << 9)
247 #define two105 (((limb)1) << 105)
248 #define two105m41p9 (((limb)1) << 105) - (((limb)1) << 41) + (((limb)1) << 9)
249
250 /* zero105 is 0 mod p */
251 static const felem zero105 =
252 { two105m41m9, two105, two105m41p9, two105m41p9 };
253
254 /*-
255 * smallfelem_neg sets |out| to |-small|
256 * On exit:
257 * out[i] < out[i] + 2^105
258 */
smallfelem_neg(felem out,const smallfelem small)259 static void smallfelem_neg(felem out, const smallfelem small)
260 {
261 /* In order to prevent underflow, we subtract from 0 mod p. */
262 out[0] = zero105[0] - small[0];
263 out[1] = zero105[1] - small[1];
264 out[2] = zero105[2] - small[2];
265 out[3] = zero105[3] - small[3];
266 }
267
268 /*-
269 * felem_diff subtracts |in| from |out|
270 * On entry:
271 * in[i] < 2^104
272 * On exit:
273 * out[i] < out[i] + 2^105
274 */
felem_diff(felem out,const felem in)275 static void felem_diff(felem out, const felem in)
276 {
277 /*
278 * In order to prevent underflow, we add 0 mod p before subtracting.
279 */
280 out[0] += zero105[0];
281 out[1] += zero105[1];
282 out[2] += zero105[2];
283 out[3] += zero105[3];
284
285 out[0] -= in[0];
286 out[1] -= in[1];
287 out[2] -= in[2];
288 out[3] -= in[3];
289 }
290
291 #define two107m43m11 (((limb)1) << 107) - (((limb)1) << 43) - (((limb)1) << 11)
292 #define two107 (((limb)1) << 107)
293 #define two107m43p11 (((limb)1) << 107) - (((limb)1) << 43) + (((limb)1) << 11)
294
295 /* zero107 is 0 mod p */
296 static const felem zero107 = {
297 two107m43m11, two107, two107m43p11, two107m43p11
298 };
299
300 /*-
301 * An alternative felem_diff for larger inputs |in|
302 * felem_diff_zero107 subtracts |in| from |out|
303 * On entry:
304 * in[i] < 2^106
305 * On exit:
306 * out[i] < out[i] + 2^107
307 */
felem_diff_zero107(felem out,const felem in)308 static void felem_diff_zero107(felem out, const felem in)
309 {
310 /*
311 * In order to prevent underflow, we add 0 mod p before subtracting.
312 */
313 out[0] += zero107[0];
314 out[1] += zero107[1];
315 out[2] += zero107[2];
316 out[3] += zero107[3];
317
318 out[0] -= in[0];
319 out[1] -= in[1];
320 out[2] -= in[2];
321 out[3] -= in[3];
322 }
323
324 /*-
325 * longfelem_diff subtracts |in| from |out|
326 * On entry:
327 * in[i] < 7*2^67
328 * On exit:
329 * out[i] < out[i] + 2^70 + 2^40
330 */
longfelem_diff(longfelem out,const longfelem in)331 static void longfelem_diff(longfelem out, const longfelem in)
332 {
333 static const limb two70m8p6 =
334 (((limb) 1) << 70) - (((limb) 1) << 8) + (((limb) 1) << 6);
335 static const limb two70p40 = (((limb) 1) << 70) + (((limb) 1) << 40);
336 static const limb two70 = (((limb) 1) << 70);
337 static const limb two70m40m38p6 =
338 (((limb) 1) << 70) - (((limb) 1) << 40) - (((limb) 1) << 38) +
339 (((limb) 1) << 6);
340 static const limb two70m6 = (((limb) 1) << 70) - (((limb) 1) << 6);
341
342 /* add 0 mod p to avoid underflow */
343 out[0] += two70m8p6;
344 out[1] += two70p40;
345 out[2] += two70;
346 out[3] += two70m40m38p6;
347 out[4] += two70m6;
348 out[5] += two70m6;
349 out[6] += two70m6;
350 out[7] += two70m6;
351
352 /* in[i] < 7*2^67 < 2^70 - 2^40 - 2^38 + 2^6 */
353 out[0] -= in[0];
354 out[1] -= in[1];
355 out[2] -= in[2];
356 out[3] -= in[3];
357 out[4] -= in[4];
358 out[5] -= in[5];
359 out[6] -= in[6];
360 out[7] -= in[7];
361 }
362
363 #define two64m0 (((limb)1) << 64) - 1
364 #define two110p32m0 (((limb)1) << 110) + (((limb)1) << 32) - 1
365 #define two64m46 (((limb)1) << 64) - (((limb)1) << 46)
366 #define two64m32 (((limb)1) << 64) - (((limb)1) << 32)
367
368 /* zero110 is 0 mod p */
369 static const felem zero110 = { two64m0, two110p32m0, two64m46, two64m32 };
370
371 /*-
372 * felem_shrink converts an felem into a smallfelem. The result isn't quite
373 * minimal as the value may be greater than p.
374 *
375 * On entry:
376 * in[i] < 2^109
377 * On exit:
378 * out[i] < 2^64
379 */
felem_shrink(smallfelem out,const felem in)380 static void felem_shrink(smallfelem out, const felem in)
381 {
382 felem tmp;
383 u64 a, b, mask;
384 u64 high, low;
385 static const u64 kPrime3Test = 0x7fffffff00000001ul; /* 2^63 - 2^32 + 1 */
386
387 /* Carry 2->3 */
388 tmp[3] = zero110[3] + in[3] + ((u64)(in[2] >> 64));
389 /* tmp[3] < 2^110 */
390
391 tmp[2] = zero110[2] + (u64)in[2];
392 tmp[0] = zero110[0] + in[0];
393 tmp[1] = zero110[1] + in[1];
394 /* tmp[0] < 2**110, tmp[1] < 2^111, tmp[2] < 2**65 */
395
396 /*
397 * We perform two partial reductions where we eliminate the high-word of
398 * tmp[3]. We don't update the other words till the end.
399 */
400 a = tmp[3] >> 64; /* a < 2^46 */
401 tmp[3] = (u64)tmp[3];
402 tmp[3] -= a;
403 tmp[3] += ((limb) a) << 32;
404 /* tmp[3] < 2^79 */
405
406 b = a;
407 a = tmp[3] >> 64; /* a < 2^15 */
408 b += a; /* b < 2^46 + 2^15 < 2^47 */
409 tmp[3] = (u64)tmp[3];
410 tmp[3] -= a;
411 tmp[3] += ((limb) a) << 32;
412 /* tmp[3] < 2^64 + 2^47 */
413
414 /*
415 * This adjusts the other two words to complete the two partial
416 * reductions.
417 */
418 tmp[0] += b;
419 tmp[1] -= (((limb) b) << 32);
420
421 /*
422 * In order to make space in tmp[3] for the carry from 2 -> 3, we
423 * conditionally subtract kPrime if tmp[3] is large enough.
424 */
425 high = (u64)(tmp[3] >> 64);
426 /* As tmp[3] < 2^65, high is either 1 or 0 */
427 high = 0 - high;
428 /*-
429 * high is:
430 * all ones if the high word of tmp[3] is 1
431 * all zeros if the high word of tmp[3] if 0
432 */
433 low = (u64)tmp[3];
434 mask = 0 - (low >> 63);
435 /*-
436 * mask is:
437 * all ones if the MSB of low is 1
438 * all zeros if the MSB of low if 0
439 */
440 low &= bottom63bits;
441 low -= kPrime3Test;
442 /* if low was greater than kPrime3Test then the MSB is zero */
443 low = ~low;
444 low = 0 - (low >> 63);
445 /*-
446 * low is:
447 * all ones if low was > kPrime3Test
448 * all zeros if low was <= kPrime3Test
449 */
450 mask = (mask & low) | high;
451 tmp[0] -= mask & kPrime[0];
452 tmp[1] -= mask & kPrime[1];
453 /* kPrime[2] is zero, so omitted */
454 tmp[3] -= mask & kPrime[3];
455 /* tmp[3] < 2**64 - 2**32 + 1 */
456
457 tmp[1] += ((u64)(tmp[0] >> 64));
458 tmp[0] = (u64)tmp[0];
459 tmp[2] += ((u64)(tmp[1] >> 64));
460 tmp[1] = (u64)tmp[1];
461 tmp[3] += ((u64)(tmp[2] >> 64));
462 tmp[2] = (u64)tmp[2];
463 /* tmp[i] < 2^64 */
464
465 out[0] = tmp[0];
466 out[1] = tmp[1];
467 out[2] = tmp[2];
468 out[3] = tmp[3];
469 }
470
471 /* smallfelem_expand converts a smallfelem to an felem */
smallfelem_expand(felem out,const smallfelem in)472 static void smallfelem_expand(felem out, const smallfelem in)
473 {
474 out[0] = in[0];
475 out[1] = in[1];
476 out[2] = in[2];
477 out[3] = in[3];
478 }
479
480 /*-
481 * smallfelem_square sets |out| = |small|^2
482 * On entry:
483 * small[i] < 2^64
484 * On exit:
485 * out[i] < 7 * 2^64 < 2^67
486 */
smallfelem_square(longfelem out,const smallfelem small)487 static void smallfelem_square(longfelem out, const smallfelem small)
488 {
489 limb a;
490 u64 high, low;
491
492 a = ((uint128_t) small[0]) * small[0];
493 low = a;
494 high = a >> 64;
495 out[0] = low;
496 out[1] = high;
497
498 a = ((uint128_t) small[0]) * small[1];
499 low = a;
500 high = a >> 64;
501 out[1] += low;
502 out[1] += low;
503 out[2] = high;
504
505 a = ((uint128_t) small[0]) * small[2];
506 low = a;
507 high = a >> 64;
508 out[2] += low;
509 out[2] *= 2;
510 out[3] = high;
511
512 a = ((uint128_t) small[0]) * small[3];
513 low = a;
514 high = a >> 64;
515 out[3] += low;
516 out[4] = high;
517
518 a = ((uint128_t) small[1]) * small[2];
519 low = a;
520 high = a >> 64;
521 out[3] += low;
522 out[3] *= 2;
523 out[4] += high;
524
525 a = ((uint128_t) small[1]) * small[1];
526 low = a;
527 high = a >> 64;
528 out[2] += low;
529 out[3] += high;
530
531 a = ((uint128_t) small[1]) * small[3];
532 low = a;
533 high = a >> 64;
534 out[4] += low;
535 out[4] *= 2;
536 out[5] = high;
537
538 a = ((uint128_t) small[2]) * small[3];
539 low = a;
540 high = a >> 64;
541 out[5] += low;
542 out[5] *= 2;
543 out[6] = high;
544 out[6] += high;
545
546 a = ((uint128_t) small[2]) * small[2];
547 low = a;
548 high = a >> 64;
549 out[4] += low;
550 out[5] += high;
551
552 a = ((uint128_t) small[3]) * small[3];
553 low = a;
554 high = a >> 64;
555 out[6] += low;
556 out[7] = high;
557 }
558
559 /*-
560 * felem_square sets |out| = |in|^2
561 * On entry:
562 * in[i] < 2^109
563 * On exit:
564 * out[i] < 7 * 2^64 < 2^67
565 */
felem_square(longfelem out,const felem in)566 static void felem_square(longfelem out, const felem in)
567 {
568 u64 small[4];
569 felem_shrink(small, in);
570 smallfelem_square(out, small);
571 }
572
573 /*-
574 * smallfelem_mul sets |out| = |small1| * |small2|
575 * On entry:
576 * small1[i] < 2^64
577 * small2[i] < 2^64
578 * On exit:
579 * out[i] < 7 * 2^64 < 2^67
580 */
smallfelem_mul(longfelem out,const smallfelem small1,const smallfelem small2)581 static void smallfelem_mul(longfelem out, const smallfelem small1,
582 const smallfelem small2)
583 {
584 limb a;
585 u64 high, low;
586
587 a = ((uint128_t) small1[0]) * small2[0];
588 low = a;
589 high = a >> 64;
590 out[0] = low;
591 out[1] = high;
592
593 a = ((uint128_t) small1[0]) * small2[1];
594 low = a;
595 high = a >> 64;
596 out[1] += low;
597 out[2] = high;
598
599 a = ((uint128_t) small1[1]) * small2[0];
600 low = a;
601 high = a >> 64;
602 out[1] += low;
603 out[2] += high;
604
605 a = ((uint128_t) small1[0]) * small2[2];
606 low = a;
607 high = a >> 64;
608 out[2] += low;
609 out[3] = high;
610
611 a = ((uint128_t) small1[1]) * small2[1];
612 low = a;
613 high = a >> 64;
614 out[2] += low;
615 out[3] += high;
616
617 a = ((uint128_t) small1[2]) * small2[0];
618 low = a;
619 high = a >> 64;
620 out[2] += low;
621 out[3] += high;
622
623 a = ((uint128_t) small1[0]) * small2[3];
624 low = a;
625 high = a >> 64;
626 out[3] += low;
627 out[4] = high;
628
629 a = ((uint128_t) small1[1]) * small2[2];
630 low = a;
631 high = a >> 64;
632 out[3] += low;
633 out[4] += high;
634
635 a = ((uint128_t) small1[2]) * small2[1];
636 low = a;
637 high = a >> 64;
638 out[3] += low;
639 out[4] += high;
640
641 a = ((uint128_t) small1[3]) * small2[0];
642 low = a;
643 high = a >> 64;
644 out[3] += low;
645 out[4] += high;
646
647 a = ((uint128_t) small1[1]) * small2[3];
648 low = a;
649 high = a >> 64;
650 out[4] += low;
651 out[5] = high;
652
653 a = ((uint128_t) small1[2]) * small2[2];
654 low = a;
655 high = a >> 64;
656 out[4] += low;
657 out[5] += high;
658
659 a = ((uint128_t) small1[3]) * small2[1];
660 low = a;
661 high = a >> 64;
662 out[4] += low;
663 out[5] += high;
664
665 a = ((uint128_t) small1[2]) * small2[3];
666 low = a;
667 high = a >> 64;
668 out[5] += low;
669 out[6] = high;
670
671 a = ((uint128_t) small1[3]) * small2[2];
672 low = a;
673 high = a >> 64;
674 out[5] += low;
675 out[6] += high;
676
677 a = ((uint128_t) small1[3]) * small2[3];
678 low = a;
679 high = a >> 64;
680 out[6] += low;
681 out[7] = high;
682 }
683
684 /*-
685 * felem_mul sets |out| = |in1| * |in2|
686 * On entry:
687 * in1[i] < 2^109
688 * in2[i] < 2^109
689 * On exit:
690 * out[i] < 7 * 2^64 < 2^67
691 */
felem_mul(longfelem out,const felem in1,const felem in2)692 static void felem_mul(longfelem out, const felem in1, const felem in2)
693 {
694 smallfelem small1, small2;
695 felem_shrink(small1, in1);
696 felem_shrink(small2, in2);
697 smallfelem_mul(out, small1, small2);
698 }
699
700 /*-
701 * felem_small_mul sets |out| = |small1| * |in2|
702 * On entry:
703 * small1[i] < 2^64
704 * in2[i] < 2^109
705 * On exit:
706 * out[i] < 7 * 2^64 < 2^67
707 */
felem_small_mul(longfelem out,const smallfelem small1,const felem in2)708 static void felem_small_mul(longfelem out, const smallfelem small1,
709 const felem in2)
710 {
711 smallfelem small2;
712 felem_shrink(small2, in2);
713 smallfelem_mul(out, small1, small2);
714 }
715
716 #define two100m36m4 (((limb)1) << 100) - (((limb)1) << 36) - (((limb)1) << 4)
717 #define two100 (((limb)1) << 100)
718 #define two100m36p4 (((limb)1) << 100) - (((limb)1) << 36) + (((limb)1) << 4)
719 /* zero100 is 0 mod p */
720 static const felem zero100 =
721 { two100m36m4, two100, two100m36p4, two100m36p4 };
722
723 /*-
724 * Internal function for the different flavours of felem_reduce.
725 * felem_reduce_ reduces the higher coefficients in[4]-in[7].
726 * On entry:
727 * out[0] >= in[6] + 2^32*in[6] + in[7] + 2^32*in[7]
728 * out[1] >= in[7] + 2^32*in[4]
729 * out[2] >= in[5] + 2^32*in[5]
730 * out[3] >= in[4] + 2^32*in[5] + 2^32*in[6]
731 * On exit:
732 * out[0] <= out[0] + in[4] + 2^32*in[5]
733 * out[1] <= out[1] + in[5] + 2^33*in[6]
734 * out[2] <= out[2] + in[7] + 2*in[6] + 2^33*in[7]
735 * out[3] <= out[3] + 2^32*in[4] + 3*in[7]
736 */
felem_reduce_(felem out,const longfelem in)737 static void felem_reduce_(felem out, const longfelem in)
738 {
739 int128_t c;
740 /* combine common terms from below */
741 c = in[4] + (in[5] << 32);
742 out[0] += c;
743 out[3] -= c;
744
745 c = in[5] - in[7];
746 out[1] += c;
747 out[2] -= c;
748
749 /* the remaining terms */
750 /* 256: [(0,1),(96,-1),(192,-1),(224,1)] */
751 out[1] -= (in[4] << 32);
752 out[3] += (in[4] << 32);
753
754 /* 320: [(32,1),(64,1),(128,-1),(160,-1),(224,-1)] */
755 out[2] -= (in[5] << 32);
756
757 /* 384: [(0,-1),(32,-1),(96,2),(128,2),(224,-1)] */
758 out[0] -= in[6];
759 out[0] -= (in[6] << 32);
760 out[1] += (in[6] << 33);
761 out[2] += (in[6] * 2);
762 out[3] -= (in[6] << 32);
763
764 /* 448: [(0,-1),(32,-1),(64,-1),(128,1),(160,2),(192,3)] */
765 out[0] -= in[7];
766 out[0] -= (in[7] << 32);
767 out[2] += (in[7] << 33);
768 out[3] += (in[7] * 3);
769 }
770
771 /*-
772 * felem_reduce converts a longfelem into an felem.
773 * To be called directly after felem_square or felem_mul.
774 * On entry:
775 * in[0] < 2^64, in[1] < 3*2^64, in[2] < 5*2^64, in[3] < 7*2^64
776 * in[4] < 7*2^64, in[5] < 5*2^64, in[6] < 3*2^64, in[7] < 2*64
777 * On exit:
778 * out[i] < 2^101
779 */
felem_reduce(felem out,const longfelem in)780 static void felem_reduce(felem out, const longfelem in)
781 {
782 out[0] = zero100[0] + in[0];
783 out[1] = zero100[1] + in[1];
784 out[2] = zero100[2] + in[2];
785 out[3] = zero100[3] + in[3];
786
787 felem_reduce_(out, in);
788
789 /*-
790 * out[0] > 2^100 - 2^36 - 2^4 - 3*2^64 - 3*2^96 - 2^64 - 2^96 > 0
791 * out[1] > 2^100 - 2^64 - 7*2^96 > 0
792 * out[2] > 2^100 - 2^36 + 2^4 - 5*2^64 - 5*2^96 > 0
793 * out[3] > 2^100 - 2^36 + 2^4 - 7*2^64 - 5*2^96 - 3*2^96 > 0
794 *
795 * out[0] < 2^100 + 2^64 + 7*2^64 + 5*2^96 < 2^101
796 * out[1] < 2^100 + 3*2^64 + 5*2^64 + 3*2^97 < 2^101
797 * out[2] < 2^100 + 5*2^64 + 2^64 + 3*2^65 + 2^97 < 2^101
798 * out[3] < 2^100 + 7*2^64 + 7*2^96 + 3*2^64 < 2^101
799 */
800 }
801
802 /*-
803 * felem_reduce_zero105 converts a larger longfelem into an felem.
804 * On entry:
805 * in[0] < 2^71
806 * On exit:
807 * out[i] < 2^106
808 */
felem_reduce_zero105(felem out,const longfelem in)809 static void felem_reduce_zero105(felem out, const longfelem in)
810 {
811 out[0] = zero105[0] + in[0];
812 out[1] = zero105[1] + in[1];
813 out[2] = zero105[2] + in[2];
814 out[3] = zero105[3] + in[3];
815
816 felem_reduce_(out, in);
817
818 /*-
819 * out[0] > 2^105 - 2^41 - 2^9 - 2^71 - 2^103 - 2^71 - 2^103 > 0
820 * out[1] > 2^105 - 2^71 - 2^103 > 0
821 * out[2] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 > 0
822 * out[3] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 - 2^103 > 0
823 *
824 * out[0] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
825 * out[1] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
826 * out[2] < 2^105 + 2^71 + 2^71 + 2^71 + 2^103 < 2^106
827 * out[3] < 2^105 + 2^71 + 2^103 + 2^71 < 2^106
828 */
829 }
830
831 /*
832 * subtract_u64 sets *result = *result - v and *carry to one if the
833 * subtraction underflowed.
834 */
subtract_u64(u64 * result,u64 * carry,u64 v)835 static void subtract_u64(u64 *result, u64 *carry, u64 v)
836 {
837 uint128_t r = *result;
838 r -= v;
839 *carry = (r >> 64) & 1;
840 *result = (u64)r;
841 }
842
843 /*
844 * felem_contract converts |in| to its unique, minimal representation. On
845 * entry: in[i] < 2^109
846 */
felem_contract(smallfelem out,const felem in)847 static void felem_contract(smallfelem out, const felem in)
848 {
849 unsigned i;
850 u64 all_equal_so_far = 0, result = 0, carry;
851
852 felem_shrink(out, in);
853 /* small is minimal except that the value might be > p */
854
855 all_equal_so_far--;
856 /*
857 * We are doing a constant time test if out >= kPrime. We need to compare
858 * each u64, from most-significant to least significant. For each one, if
859 * all words so far have been equal (m is all ones) then a non-equal
860 * result is the answer. Otherwise we continue.
861 */
862 for (i = 3; i < 4; i--) {
863 u64 equal;
864 uint128_t a = ((uint128_t) kPrime[i]) - out[i];
865 /*
866 * if out[i] > kPrime[i] then a will underflow and the high 64-bits
867 * will all be set.
868 */
869 result |= all_equal_so_far & ((u64)(a >> 64));
870
871 /*
872 * if kPrime[i] == out[i] then |equal| will be all zeros and the
873 * decrement will make it all ones.
874 */
875 equal = kPrime[i] ^ out[i];
876 equal--;
877 equal &= equal << 32;
878 equal &= equal << 16;
879 equal &= equal << 8;
880 equal &= equal << 4;
881 equal &= equal << 2;
882 equal &= equal << 1;
883 equal = 0 - (equal >> 63);
884
885 all_equal_so_far &= equal;
886 }
887
888 /*
889 * if all_equal_so_far is still all ones then the two values are equal
890 * and so out >= kPrime is true.
891 */
892 result |= all_equal_so_far;
893
894 /* if out >= kPrime then we subtract kPrime. */
895 subtract_u64(&out[0], &carry, result & kPrime[0]);
896 subtract_u64(&out[1], &carry, carry);
897 subtract_u64(&out[2], &carry, carry);
898 subtract_u64(&out[3], &carry, carry);
899
900 subtract_u64(&out[1], &carry, result & kPrime[1]);
901 subtract_u64(&out[2], &carry, carry);
902 subtract_u64(&out[3], &carry, carry);
903
904 subtract_u64(&out[2], &carry, result & kPrime[2]);
905 subtract_u64(&out[3], &carry, carry);
906
907 subtract_u64(&out[3], &carry, result & kPrime[3]);
908 }
909
smallfelem_square_contract(smallfelem out,const smallfelem in)910 static void smallfelem_square_contract(smallfelem out, const smallfelem in)
911 {
912 longfelem longtmp;
913 felem tmp;
914
915 smallfelem_square(longtmp, in);
916 felem_reduce(tmp, longtmp);
917 felem_contract(out, tmp);
918 }
919
smallfelem_mul_contract(smallfelem out,const smallfelem in1,const smallfelem in2)920 static void smallfelem_mul_contract(smallfelem out, const smallfelem in1,
921 const smallfelem in2)
922 {
923 longfelem longtmp;
924 felem tmp;
925
926 smallfelem_mul(longtmp, in1, in2);
927 felem_reduce(tmp, longtmp);
928 felem_contract(out, tmp);
929 }
930
931 /*-
932 * felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0
933 * otherwise.
934 * On entry:
935 * small[i] < 2^64
936 */
smallfelem_is_zero(const smallfelem small)937 static limb smallfelem_is_zero(const smallfelem small)
938 {
939 limb result;
940 u64 is_p;
941
942 u64 is_zero = small[0] | small[1] | small[2] | small[3];
943 is_zero--;
944 is_zero &= is_zero << 32;
945 is_zero &= is_zero << 16;
946 is_zero &= is_zero << 8;
947 is_zero &= is_zero << 4;
948 is_zero &= is_zero << 2;
949 is_zero &= is_zero << 1;
950 is_zero = 0 - (is_zero >> 63);
951
952 is_p = (small[0] ^ kPrime[0]) |
953 (small[1] ^ kPrime[1]) |
954 (small[2] ^ kPrime[2]) | (small[3] ^ kPrime[3]);
955 is_p--;
956 is_p &= is_p << 32;
957 is_p &= is_p << 16;
958 is_p &= is_p << 8;
959 is_p &= is_p << 4;
960 is_p &= is_p << 2;
961 is_p &= is_p << 1;
962 is_p = 0 - (is_p >> 63);
963
964 is_zero |= is_p;
965
966 result = is_zero;
967 result |= ((limb) is_zero) << 64;
968 return result;
969 }
970
smallfelem_is_zero_int(const void * small)971 static int smallfelem_is_zero_int(const void *small)
972 {
973 return (int)(smallfelem_is_zero(small) & ((limb) 1));
974 }
975
976 /*-
977 * felem_inv calculates |out| = |in|^{-1}
978 *
979 * Based on Fermat's Little Theorem:
980 * a^p = a (mod p)
981 * a^{p-1} = 1 (mod p)
982 * a^{p-2} = a^{-1} (mod p)
983 */
felem_inv(felem out,const felem in)984 static void felem_inv(felem out, const felem in)
985 {
986 felem ftmp, ftmp2;
987 /* each e_I will hold |in|^{2^I - 1} */
988 felem e2, e4, e8, e16, e32, e64;
989 longfelem tmp;
990 unsigned i;
991
992 felem_square(tmp, in);
993 felem_reduce(ftmp, tmp); /* 2^1 */
994 felem_mul(tmp, in, ftmp);
995 felem_reduce(ftmp, tmp); /* 2^2 - 2^0 */
996 felem_assign(e2, ftmp);
997 felem_square(tmp, ftmp);
998 felem_reduce(ftmp, tmp); /* 2^3 - 2^1 */
999 felem_square(tmp, ftmp);
1000 felem_reduce(ftmp, tmp); /* 2^4 - 2^2 */
1001 felem_mul(tmp, ftmp, e2);
1002 felem_reduce(ftmp, tmp); /* 2^4 - 2^0 */
1003 felem_assign(e4, ftmp);
1004 felem_square(tmp, ftmp);
1005 felem_reduce(ftmp, tmp); /* 2^5 - 2^1 */
1006 felem_square(tmp, ftmp);
1007 felem_reduce(ftmp, tmp); /* 2^6 - 2^2 */
1008 felem_square(tmp, ftmp);
1009 felem_reduce(ftmp, tmp); /* 2^7 - 2^3 */
1010 felem_square(tmp, ftmp);
1011 felem_reduce(ftmp, tmp); /* 2^8 - 2^4 */
1012 felem_mul(tmp, ftmp, e4);
1013 felem_reduce(ftmp, tmp); /* 2^8 - 2^0 */
1014 felem_assign(e8, ftmp);
1015 for (i = 0; i < 8; i++) {
1016 felem_square(tmp, ftmp);
1017 felem_reduce(ftmp, tmp);
1018 } /* 2^16 - 2^8 */
1019 felem_mul(tmp, ftmp, e8);
1020 felem_reduce(ftmp, tmp); /* 2^16 - 2^0 */
1021 felem_assign(e16, ftmp);
1022 for (i = 0; i < 16; i++) {
1023 felem_square(tmp, ftmp);
1024 felem_reduce(ftmp, tmp);
1025 } /* 2^32 - 2^16 */
1026 felem_mul(tmp, ftmp, e16);
1027 felem_reduce(ftmp, tmp); /* 2^32 - 2^0 */
1028 felem_assign(e32, ftmp);
1029 for (i = 0; i < 32; i++) {
1030 felem_square(tmp, ftmp);
1031 felem_reduce(ftmp, tmp);
1032 } /* 2^64 - 2^32 */
1033 felem_assign(e64, ftmp);
1034 felem_mul(tmp, ftmp, in);
1035 felem_reduce(ftmp, tmp); /* 2^64 - 2^32 + 2^0 */
1036 for (i = 0; i < 192; i++) {
1037 felem_square(tmp, ftmp);
1038 felem_reduce(ftmp, tmp);
1039 } /* 2^256 - 2^224 + 2^192 */
1040
1041 felem_mul(tmp, e64, e32);
1042 felem_reduce(ftmp2, tmp); /* 2^64 - 2^0 */
1043 for (i = 0; i < 16; i++) {
1044 felem_square(tmp, ftmp2);
1045 felem_reduce(ftmp2, tmp);
1046 } /* 2^80 - 2^16 */
1047 felem_mul(tmp, ftmp2, e16);
1048 felem_reduce(ftmp2, tmp); /* 2^80 - 2^0 */
1049 for (i = 0; i < 8; i++) {
1050 felem_square(tmp, ftmp2);
1051 felem_reduce(ftmp2, tmp);
1052 } /* 2^88 - 2^8 */
1053 felem_mul(tmp, ftmp2, e8);
1054 felem_reduce(ftmp2, tmp); /* 2^88 - 2^0 */
1055 for (i = 0; i < 4; i++) {
1056 felem_square(tmp, ftmp2);
1057 felem_reduce(ftmp2, tmp);
1058 } /* 2^92 - 2^4 */
1059 felem_mul(tmp, ftmp2, e4);
1060 felem_reduce(ftmp2, tmp); /* 2^92 - 2^0 */
1061 felem_square(tmp, ftmp2);
1062 felem_reduce(ftmp2, tmp); /* 2^93 - 2^1 */
1063 felem_square(tmp, ftmp2);
1064 felem_reduce(ftmp2, tmp); /* 2^94 - 2^2 */
1065 felem_mul(tmp, ftmp2, e2);
1066 felem_reduce(ftmp2, tmp); /* 2^94 - 2^0 */
1067 felem_square(tmp, ftmp2);
1068 felem_reduce(ftmp2, tmp); /* 2^95 - 2^1 */
1069 felem_square(tmp, ftmp2);
1070 felem_reduce(ftmp2, tmp); /* 2^96 - 2^2 */
1071 felem_mul(tmp, ftmp2, in);
1072 felem_reduce(ftmp2, tmp); /* 2^96 - 3 */
1073
1074 felem_mul(tmp, ftmp2, ftmp);
1075 felem_reduce(out, tmp); /* 2^256 - 2^224 + 2^192 + 2^96 - 3 */
1076 }
1077
smallfelem_inv_contract(smallfelem out,const smallfelem in)1078 static void smallfelem_inv_contract(smallfelem out, const smallfelem in)
1079 {
1080 felem tmp;
1081
1082 smallfelem_expand(tmp, in);
1083 felem_inv(tmp, tmp);
1084 felem_contract(out, tmp);
1085 }
1086
1087 /*-
1088 * Group operations
1089 * ----------------
1090 *
1091 * Building on top of the field operations we have the operations on the
1092 * elliptic curve group itself. Points on the curve are represented in Jacobian
1093 * coordinates
1094 */
1095
1096 /*-
1097 * point_double calculates 2*(x_in, y_in, z_in)
1098 *
1099 * The method is taken from:
1100 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
1101 *
1102 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
1103 * while x_out == y_in is not (maybe this works, but it's not tested).
1104 */
1105 static void
point_double(felem x_out,felem y_out,felem z_out,const felem x_in,const felem y_in,const felem z_in)1106 point_double(felem x_out, felem y_out, felem z_out,
1107 const felem x_in, const felem y_in, const felem z_in)
1108 {
1109 longfelem tmp, tmp2;
1110 felem delta, gamma, beta, alpha, ftmp, ftmp2;
1111 smallfelem small1, small2;
1112
1113 felem_assign(ftmp, x_in);
1114 /* ftmp[i] < 2^106 */
1115 felem_assign(ftmp2, x_in);
1116 /* ftmp2[i] < 2^106 */
1117
1118 /* delta = z^2 */
1119 felem_square(tmp, z_in);
1120 felem_reduce(delta, tmp);
1121 /* delta[i] < 2^101 */
1122
1123 /* gamma = y^2 */
1124 felem_square(tmp, y_in);
1125 felem_reduce(gamma, tmp);
1126 /* gamma[i] < 2^101 */
1127 felem_shrink(small1, gamma);
1128
1129 /* beta = x*gamma */
1130 felem_small_mul(tmp, small1, x_in);
1131 felem_reduce(beta, tmp);
1132 /* beta[i] < 2^101 */
1133
1134 /* alpha = 3*(x-delta)*(x+delta) */
1135 felem_diff(ftmp, delta);
1136 /* ftmp[i] < 2^105 + 2^106 < 2^107 */
1137 felem_sum(ftmp2, delta);
1138 /* ftmp2[i] < 2^105 + 2^106 < 2^107 */
1139 felem_scalar(ftmp2, 3);
1140 /* ftmp2[i] < 3 * 2^107 < 2^109 */
1141 felem_mul(tmp, ftmp, ftmp2);
1142 felem_reduce(alpha, tmp);
1143 /* alpha[i] < 2^101 */
1144 felem_shrink(small2, alpha);
1145
1146 /* x' = alpha^2 - 8*beta */
1147 smallfelem_square(tmp, small2);
1148 felem_reduce(x_out, tmp);
1149 felem_assign(ftmp, beta);
1150 felem_scalar(ftmp, 8);
1151 /* ftmp[i] < 8 * 2^101 = 2^104 */
1152 felem_diff(x_out, ftmp);
1153 /* x_out[i] < 2^105 + 2^101 < 2^106 */
1154
1155 /* z' = (y + z)^2 - gamma - delta */
1156 felem_sum(delta, gamma);
1157 /* delta[i] < 2^101 + 2^101 = 2^102 */
1158 felem_assign(ftmp, y_in);
1159 felem_sum(ftmp, z_in);
1160 /* ftmp[i] < 2^106 + 2^106 = 2^107 */
1161 felem_square(tmp, ftmp);
1162 felem_reduce(z_out, tmp);
1163 felem_diff(z_out, delta);
1164 /* z_out[i] < 2^105 + 2^101 < 2^106 */
1165
1166 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
1167 felem_scalar(beta, 4);
1168 /* beta[i] < 4 * 2^101 = 2^103 */
1169 felem_diff_zero107(beta, x_out);
1170 /* beta[i] < 2^107 + 2^103 < 2^108 */
1171 felem_small_mul(tmp, small2, beta);
1172 /* tmp[i] < 7 * 2^64 < 2^67 */
1173 smallfelem_square(tmp2, small1);
1174 /* tmp2[i] < 7 * 2^64 */
1175 longfelem_scalar(tmp2, 8);
1176 /* tmp2[i] < 8 * 7 * 2^64 = 7 * 2^67 */
1177 longfelem_diff(tmp, tmp2);
1178 /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
1179 felem_reduce_zero105(y_out, tmp);
1180 /* y_out[i] < 2^106 */
1181 }
1182
1183 /*
1184 * point_double_small is the same as point_double, except that it operates on
1185 * smallfelems
1186 */
1187 static void
point_double_small(smallfelem x_out,smallfelem y_out,smallfelem z_out,const smallfelem x_in,const smallfelem y_in,const smallfelem z_in)1188 point_double_small(smallfelem x_out, smallfelem y_out, smallfelem z_out,
1189 const smallfelem x_in, const smallfelem y_in,
1190 const smallfelem z_in)
1191 {
1192 felem felem_x_out, felem_y_out, felem_z_out;
1193 felem felem_x_in, felem_y_in, felem_z_in;
1194
1195 smallfelem_expand(felem_x_in, x_in);
1196 smallfelem_expand(felem_y_in, y_in);
1197 smallfelem_expand(felem_z_in, z_in);
1198 point_double(felem_x_out, felem_y_out, felem_z_out,
1199 felem_x_in, felem_y_in, felem_z_in);
1200 felem_shrink(x_out, felem_x_out);
1201 felem_shrink(y_out, felem_y_out);
1202 felem_shrink(z_out, felem_z_out);
1203 }
1204
1205 /* copy_conditional copies in to out iff mask is all ones. */
copy_conditional(felem out,const felem in,limb mask)1206 static void copy_conditional(felem out, const felem in, limb mask)
1207 {
1208 unsigned i;
1209 for (i = 0; i < NLIMBS; ++i) {
1210 const limb tmp = mask & (in[i] ^ out[i]);
1211 out[i] ^= tmp;
1212 }
1213 }
1214
1215 /* copy_small_conditional copies in to out iff mask is all ones. */
copy_small_conditional(felem out,const smallfelem in,limb mask)1216 static void copy_small_conditional(felem out, const smallfelem in, limb mask)
1217 {
1218 unsigned i;
1219 const u64 mask64 = mask;
1220 for (i = 0; i < NLIMBS; ++i) {
1221 out[i] = ((limb) (in[i] & mask64)) | (out[i] & ~mask);
1222 }
1223 }
1224
1225 /*-
1226 * point_add calculates (x1, y1, z1) + (x2, y2, z2)
1227 *
1228 * The method is taken from:
1229 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
1230 * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
1231 *
1232 * This function includes a branch for checking whether the two input points
1233 * are equal, (while not equal to the point at infinity). This case never
1234 * happens during single point multiplication, so there is no timing leak for
1235 * ECDH or ECDSA signing.
1236 */
point_add(felem x3,felem y3,felem z3,const felem x1,const felem y1,const felem z1,const int mixed,const smallfelem x2,const smallfelem y2,const smallfelem z2)1237 static void point_add(felem x3, felem y3, felem z3,
1238 const felem x1, const felem y1, const felem z1,
1239 const int mixed, const smallfelem x2,
1240 const smallfelem y2, const smallfelem z2)
1241 {
1242 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
1243 longfelem tmp, tmp2;
1244 smallfelem small1, small2, small3, small4, small5;
1245 limb x_equal, y_equal, z1_is_zero, z2_is_zero;
1246 limb points_equal;
1247
1248 felem_shrink(small3, z1);
1249
1250 z1_is_zero = smallfelem_is_zero(small3);
1251 z2_is_zero = smallfelem_is_zero(z2);
1252
1253 /* ftmp = z1z1 = z1**2 */
1254 smallfelem_square(tmp, small3);
1255 felem_reduce(ftmp, tmp);
1256 /* ftmp[i] < 2^101 */
1257 felem_shrink(small1, ftmp);
1258
1259 if (!mixed) {
1260 /* ftmp2 = z2z2 = z2**2 */
1261 smallfelem_square(tmp, z2);
1262 felem_reduce(ftmp2, tmp);
1263 /* ftmp2[i] < 2^101 */
1264 felem_shrink(small2, ftmp2);
1265
1266 felem_shrink(small5, x1);
1267
1268 /* u1 = ftmp3 = x1*z2z2 */
1269 smallfelem_mul(tmp, small5, small2);
1270 felem_reduce(ftmp3, tmp);
1271 /* ftmp3[i] < 2^101 */
1272
1273 /* ftmp5 = z1 + z2 */
1274 felem_assign(ftmp5, z1);
1275 felem_small_sum(ftmp5, z2);
1276 /* ftmp5[i] < 2^107 */
1277
1278 /* ftmp5 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2 */
1279 felem_square(tmp, ftmp5);
1280 felem_reduce(ftmp5, tmp);
1281 /* ftmp2 = z2z2 + z1z1 */
1282 felem_sum(ftmp2, ftmp);
1283 /* ftmp2[i] < 2^101 + 2^101 = 2^102 */
1284 felem_diff(ftmp5, ftmp2);
1285 /* ftmp5[i] < 2^105 + 2^101 < 2^106 */
1286
1287 /* ftmp2 = z2 * z2z2 */
1288 smallfelem_mul(tmp, small2, z2);
1289 felem_reduce(ftmp2, tmp);
1290
1291 /* s1 = ftmp2 = y1 * z2**3 */
1292 felem_mul(tmp, y1, ftmp2);
1293 felem_reduce(ftmp6, tmp);
1294 /* ftmp6[i] < 2^101 */
1295 } else {
1296 /*
1297 * We'll assume z2 = 1 (special case z2 = 0 is handled later)
1298 */
1299
1300 /* u1 = ftmp3 = x1*z2z2 */
1301 felem_assign(ftmp3, x1);
1302 /* ftmp3[i] < 2^106 */
1303
1304 /* ftmp5 = 2z1z2 */
1305 felem_assign(ftmp5, z1);
1306 felem_scalar(ftmp5, 2);
1307 /* ftmp5[i] < 2*2^106 = 2^107 */
1308
1309 /* s1 = ftmp2 = y1 * z2**3 */
1310 felem_assign(ftmp6, y1);
1311 /* ftmp6[i] < 2^106 */
1312 }
1313
1314 /* u2 = x2*z1z1 */
1315 smallfelem_mul(tmp, x2, small1);
1316 felem_reduce(ftmp4, tmp);
1317
1318 /* h = ftmp4 = u2 - u1 */
1319 felem_diff_zero107(ftmp4, ftmp3);
1320 /* ftmp4[i] < 2^107 + 2^101 < 2^108 */
1321 felem_shrink(small4, ftmp4);
1322
1323 x_equal = smallfelem_is_zero(small4);
1324
1325 /* z_out = ftmp5 * h */
1326 felem_small_mul(tmp, small4, ftmp5);
1327 felem_reduce(z_out, tmp);
1328 /* z_out[i] < 2^101 */
1329
1330 /* ftmp = z1 * z1z1 */
1331 smallfelem_mul(tmp, small1, small3);
1332 felem_reduce(ftmp, tmp);
1333
1334 /* s2 = tmp = y2 * z1**3 */
1335 felem_small_mul(tmp, y2, ftmp);
1336 felem_reduce(ftmp5, tmp);
1337
1338 /* r = ftmp5 = (s2 - s1)*2 */
1339 felem_diff_zero107(ftmp5, ftmp6);
1340 /* ftmp5[i] < 2^107 + 2^107 = 2^108 */
1341 felem_scalar(ftmp5, 2);
1342 /* ftmp5[i] < 2^109 */
1343 felem_shrink(small1, ftmp5);
1344 y_equal = smallfelem_is_zero(small1);
1345
1346 /*
1347 * The formulae are incorrect if the points are equal, in affine coordinates
1348 * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this
1349 * happens.
1350 *
1351 * We use bitwise operations to avoid potential side-channels introduced by
1352 * the short-circuiting behaviour of boolean operators.
1353 *
1354 * The special case of either point being the point at infinity (z1 and/or
1355 * z2 are zero), is handled separately later on in this function, so we
1356 * avoid jumping to point_double here in those special cases.
1357 */
1358 points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero));
1359
1360 if (points_equal) {
1361 /*
1362 * This is obviously not constant-time but, as mentioned before, this
1363 * case never happens during single point multiplication, so there is no
1364 * timing leak for ECDH or ECDSA signing.
1365 */
1366 point_double(x3, y3, z3, x1, y1, z1);
1367 return;
1368 }
1369
1370 /* I = ftmp = (2h)**2 */
1371 felem_assign(ftmp, ftmp4);
1372 felem_scalar(ftmp, 2);
1373 /* ftmp[i] < 2*2^108 = 2^109 */
1374 felem_square(tmp, ftmp);
1375 felem_reduce(ftmp, tmp);
1376
1377 /* J = ftmp2 = h * I */
1378 felem_mul(tmp, ftmp4, ftmp);
1379 felem_reduce(ftmp2, tmp);
1380
1381 /* V = ftmp4 = U1 * I */
1382 felem_mul(tmp, ftmp3, ftmp);
1383 felem_reduce(ftmp4, tmp);
1384
1385 /* x_out = r**2 - J - 2V */
1386 smallfelem_square(tmp, small1);
1387 felem_reduce(x_out, tmp);
1388 felem_assign(ftmp3, ftmp4);
1389 felem_scalar(ftmp4, 2);
1390 felem_sum(ftmp4, ftmp2);
1391 /* ftmp4[i] < 2*2^101 + 2^101 < 2^103 */
1392 felem_diff(x_out, ftmp4);
1393 /* x_out[i] < 2^105 + 2^101 */
1394
1395 /* y_out = r(V-x_out) - 2 * s1 * J */
1396 felem_diff_zero107(ftmp3, x_out);
1397 /* ftmp3[i] < 2^107 + 2^101 < 2^108 */
1398 felem_small_mul(tmp, small1, ftmp3);
1399 felem_mul(tmp2, ftmp6, ftmp2);
1400 longfelem_scalar(tmp2, 2);
1401 /* tmp2[i] < 2*2^67 = 2^68 */
1402 longfelem_diff(tmp, tmp2);
1403 /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
1404 felem_reduce_zero105(y_out, tmp);
1405 /* y_out[i] < 2^106 */
1406
1407 copy_small_conditional(x_out, x2, z1_is_zero);
1408 copy_conditional(x_out, x1, z2_is_zero);
1409 copy_small_conditional(y_out, y2, z1_is_zero);
1410 copy_conditional(y_out, y1, z2_is_zero);
1411 copy_small_conditional(z_out, z2, z1_is_zero);
1412 copy_conditional(z_out, z1, z2_is_zero);
1413 felem_assign(x3, x_out);
1414 felem_assign(y3, y_out);
1415 felem_assign(z3, z_out);
1416 }
1417
1418 /*
1419 * point_add_small is the same as point_add, except that it operates on
1420 * smallfelems
1421 */
point_add_small(smallfelem x3,smallfelem y3,smallfelem z3,smallfelem x1,smallfelem y1,smallfelem z1,smallfelem x2,smallfelem y2,smallfelem z2)1422 static void point_add_small(smallfelem x3, smallfelem y3, smallfelem z3,
1423 smallfelem x1, smallfelem y1, smallfelem z1,
1424 smallfelem x2, smallfelem y2, smallfelem z2)
1425 {
1426 felem felem_x3, felem_y3, felem_z3;
1427 felem felem_x1, felem_y1, felem_z1;
1428 smallfelem_expand(felem_x1, x1);
1429 smallfelem_expand(felem_y1, y1);
1430 smallfelem_expand(felem_z1, z1);
1431 point_add(felem_x3, felem_y3, felem_z3, felem_x1, felem_y1, felem_z1, 0,
1432 x2, y2, z2);
1433 felem_shrink(x3, felem_x3);
1434 felem_shrink(y3, felem_y3);
1435 felem_shrink(z3, felem_z3);
1436 }
1437
1438 /*-
1439 * Base point pre computation
1440 * --------------------------
1441 *
1442 * Two different sorts of precomputed tables are used in the following code.
1443 * Each contain various points on the curve, where each point is three field
1444 * elements (x, y, z).
1445 *
1446 * For the base point table, z is usually 1 (0 for the point at infinity).
1447 * This table has 2 * 16 elements, starting with the following:
1448 * index | bits | point
1449 * ------+---------+------------------------------
1450 * 0 | 0 0 0 0 | 0G
1451 * 1 | 0 0 0 1 | 1G
1452 * 2 | 0 0 1 0 | 2^64G
1453 * 3 | 0 0 1 1 | (2^64 + 1)G
1454 * 4 | 0 1 0 0 | 2^128G
1455 * 5 | 0 1 0 1 | (2^128 + 1)G
1456 * 6 | 0 1 1 0 | (2^128 + 2^64)G
1457 * 7 | 0 1 1 1 | (2^128 + 2^64 + 1)G
1458 * 8 | 1 0 0 0 | 2^192G
1459 * 9 | 1 0 0 1 | (2^192 + 1)G
1460 * 10 | 1 0 1 0 | (2^192 + 2^64)G
1461 * 11 | 1 0 1 1 | (2^192 + 2^64 + 1)G
1462 * 12 | 1 1 0 0 | (2^192 + 2^128)G
1463 * 13 | 1 1 0 1 | (2^192 + 2^128 + 1)G
1464 * 14 | 1 1 1 0 | (2^192 + 2^128 + 2^64)G
1465 * 15 | 1 1 1 1 | (2^192 + 2^128 + 2^64 + 1)G
1466 * followed by a copy of this with each element multiplied by 2^32.
1467 *
1468 * The reason for this is so that we can clock bits into four different
1469 * locations when doing simple scalar multiplies against the base point,
1470 * and then another four locations using the second 16 elements.
1471 *
1472 * Tables for other points have table[i] = iG for i in 0 .. 16. */
1473
1474 /* gmul is the table of precomputed base points */
1475 static const smallfelem gmul[2][16][3] = {
1476 {{{0, 0, 0, 0},
1477 {0, 0, 0, 0},
1478 {0, 0, 0, 0}},
1479 {{0xf4a13945d898c296, 0x77037d812deb33a0, 0xf8bce6e563a440f2,
1480 0x6b17d1f2e12c4247},
1481 {0xcbb6406837bf51f5, 0x2bce33576b315ece, 0x8ee7eb4a7c0f9e16,
1482 0x4fe342e2fe1a7f9b},
1483 {1, 0, 0, 0}},
1484 {{0x90e75cb48e14db63, 0x29493baaad651f7e, 0x8492592e326e25de,
1485 0x0fa822bc2811aaa5},
1486 {0xe41124545f462ee7, 0x34b1a65050fe82f5, 0x6f4ad4bcb3df188b,
1487 0xbff44ae8f5dba80d},
1488 {1, 0, 0, 0}},
1489 {{0x93391ce2097992af, 0xe96c98fd0d35f1fa, 0xb257c0de95e02789,
1490 0x300a4bbc89d6726f},
1491 {0xaa54a291c08127a0, 0x5bb1eeada9d806a5, 0x7f1ddb25ff1e3c6f,
1492 0x72aac7e0d09b4644},
1493 {1, 0, 0, 0}},
1494 {{0x57c84fc9d789bd85, 0xfc35ff7dc297eac3, 0xfb982fd588c6766e,
1495 0x447d739beedb5e67},
1496 {0x0c7e33c972e25b32, 0x3d349b95a7fae500, 0xe12e9d953a4aaff7,
1497 0x2d4825ab834131ee},
1498 {1, 0, 0, 0}},
1499 {{0x13949c932a1d367f, 0xef7fbd2b1a0a11b7, 0xddc6068bb91dfc60,
1500 0xef9519328a9c72ff},
1501 {0x196035a77376d8a8, 0x23183b0895ca1740, 0xc1ee9807022c219c,
1502 0x611e9fc37dbb2c9b},
1503 {1, 0, 0, 0}},
1504 {{0xcae2b1920b57f4bc, 0x2936df5ec6c9bc36, 0x7dea6482e11238bf,
1505 0x550663797b51f5d8},
1506 {0x44ffe216348a964c, 0x9fb3d576dbdefbe1, 0x0afa40018d9d50e5,
1507 0x157164848aecb851},
1508 {1, 0, 0, 0}},
1509 {{0xe48ecafffc5cde01, 0x7ccd84e70d715f26, 0xa2e8f483f43e4391,
1510 0xeb5d7745b21141ea},
1511 {0xcac917e2731a3479, 0x85f22cfe2844b645, 0x0990e6a158006cee,
1512 0xeafd72ebdbecc17b},
1513 {1, 0, 0, 0}},
1514 {{0x6cf20ffb313728be, 0x96439591a3c6b94a, 0x2736ff8344315fc5,
1515 0xa6d39677a7849276},
1516 {0xf2bab833c357f5f4, 0x824a920c2284059b, 0x66b8babd2d27ecdf,
1517 0x674f84749b0b8816},
1518 {1, 0, 0, 0}},
1519 {{0x2df48c04677c8a3e, 0x74e02f080203a56b, 0x31855f7db8c7fedb,
1520 0x4e769e7672c9ddad},
1521 {0xa4c36165b824bbb0, 0xfb9ae16f3b9122a5, 0x1ec0057206947281,
1522 0x42b99082de830663},
1523 {1, 0, 0, 0}},
1524 {{0x6ef95150dda868b9, 0xd1f89e799c0ce131, 0x7fdc1ca008a1c478,
1525 0x78878ef61c6ce04d},
1526 {0x9c62b9121fe0d976, 0x6ace570ebde08d4f, 0xde53142c12309def,
1527 0xb6cb3f5d7b72c321},
1528 {1, 0, 0, 0}},
1529 {{0x7f991ed2c31a3573, 0x5b82dd5bd54fb496, 0x595c5220812ffcae,
1530 0x0c88bc4d716b1287},
1531 {0x3a57bf635f48aca8, 0x7c8181f4df2564f3, 0x18d1b5b39c04e6aa,
1532 0xdd5ddea3f3901dc6},
1533 {1, 0, 0, 0}},
1534 {{0xe96a79fb3e72ad0c, 0x43a0a28c42ba792f, 0xefe0a423083e49f3,
1535 0x68f344af6b317466},
1536 {0xcdfe17db3fb24d4a, 0x668bfc2271f5c626, 0x604ed93c24d67ff3,
1537 0x31b9c405f8540a20},
1538 {1, 0, 0, 0}},
1539 {{0xd36b4789a2582e7f, 0x0d1a10144ec39c28, 0x663c62c3edbad7a0,
1540 0x4052bf4b6f461db9},
1541 {0x235a27c3188d25eb, 0xe724f33999bfcc5b, 0x862be6bd71d70cc8,
1542 0xfecf4d5190b0fc61},
1543 {1, 0, 0, 0}},
1544 {{0x74346c10a1d4cfac, 0xafdf5cc08526a7a4, 0x123202a8f62bff7a,
1545 0x1eddbae2c802e41a},
1546 {0x8fa0af2dd603f844, 0x36e06b7e4c701917, 0x0c45f45273db33a0,
1547 0x43104d86560ebcfc},
1548 {1, 0, 0, 0}},
1549 {{0x9615b5110d1d78e5, 0x66b0de3225c4744b, 0x0a4a46fb6aaf363a,
1550 0xb48e26b484f7a21c},
1551 {0x06ebb0f621a01b2d, 0xc004e4048b7b0f98, 0x64131bcdfed6f668,
1552 0xfac015404d4d3dab},
1553 {1, 0, 0, 0}}},
1554 {{{0, 0, 0, 0},
1555 {0, 0, 0, 0},
1556 {0, 0, 0, 0}},
1557 {{0x3a5a9e22185a5943, 0x1ab919365c65dfb6, 0x21656b32262c71da,
1558 0x7fe36b40af22af89},
1559 {0xd50d152c699ca101, 0x74b3d5867b8af212, 0x9f09f40407dca6f1,
1560 0xe697d45825b63624},
1561 {1, 0, 0, 0}},
1562 {{0xa84aa9397512218e, 0xe9a521b074ca0141, 0x57880b3a18a2e902,
1563 0x4a5b506612a677a6},
1564 {0x0beada7a4c4f3840, 0x626db15419e26d9d, 0xc42604fbe1627d40,
1565 0xeb13461ceac089f1},
1566 {1, 0, 0, 0}},
1567 {{0xf9faed0927a43281, 0x5e52c4144103ecbc, 0xc342967aa815c857,
1568 0x0781b8291c6a220a},
1569 {0x5a8343ceeac55f80, 0x88f80eeee54a05e3, 0x97b2a14f12916434,
1570 0x690cde8df0151593},
1571 {1, 0, 0, 0}},
1572 {{0xaee9c75df7f82f2a, 0x9e4c35874afdf43a, 0xf5622df437371326,
1573 0x8a535f566ec73617},
1574 {0xc5f9a0ac223094b7, 0xcde533864c8c7669, 0x37e02819085a92bf,
1575 0x0455c08468b08bd7},
1576 {1, 0, 0, 0}},
1577 {{0x0c0a6e2c9477b5d9, 0xf9a4bf62876dc444, 0x5050a949b6cdc279,
1578 0x06bada7ab77f8276},
1579 {0xc8b4aed1ea48dac9, 0xdebd8a4b7ea1070f, 0x427d49101366eb70,
1580 0x5b476dfd0e6cb18a},
1581 {1, 0, 0, 0}},
1582 {{0x7c5c3e44278c340a, 0x4d54606812d66f3b, 0x29a751b1ae23c5d8,
1583 0x3e29864e8a2ec908},
1584 {0x142d2a6626dbb850, 0xad1744c4765bd780, 0x1f150e68e322d1ed,
1585 0x239b90ea3dc31e7e},
1586 {1, 0, 0, 0}},
1587 {{0x78c416527a53322a, 0x305dde6709776f8e, 0xdbcab759f8862ed4,
1588 0x820f4dd949f72ff7},
1589 {0x6cc544a62b5debd4, 0x75be5d937b4e8cc4, 0x1b481b1b215c14d3,
1590 0x140406ec783a05ec},
1591 {1, 0, 0, 0}},
1592 {{0x6a703f10e895df07, 0xfd75f3fa01876bd8, 0xeb5b06e70ce08ffe,
1593 0x68f6b8542783dfee},
1594 {0x90c76f8a78712655, 0xcf5293d2f310bf7f, 0xfbc8044dfda45028,
1595 0xcbe1feba92e40ce6},
1596 {1, 0, 0, 0}},
1597 {{0xe998ceea4396e4c1, 0xfc82ef0b6acea274, 0x230f729f2250e927,
1598 0xd0b2f94d2f420109},
1599 {0x4305adddb38d4966, 0x10b838f8624c3b45, 0x7db2636658954e7a,
1600 0x971459828b0719e5},
1601 {1, 0, 0, 0}},
1602 {{0x4bd6b72623369fc9, 0x57f2929e53d0b876, 0xc2d5cba4f2340687,
1603 0x961610004a866aba},
1604 {0x49997bcd2e407a5e, 0x69ab197d92ddcb24, 0x2cf1f2438fe5131c,
1605 0x7acb9fadcee75e44},
1606 {1, 0, 0, 0}},
1607 {{0x254e839423d2d4c0, 0xf57f0c917aea685b, 0xa60d880f6f75aaea,
1608 0x24eb9acca333bf5b},
1609 {0xe3de4ccb1cda5dea, 0xfeef9341c51a6b4f, 0x743125f88bac4c4d,
1610 0x69f891c5acd079cc},
1611 {1, 0, 0, 0}},
1612 {{0xeee44b35702476b5, 0x7ed031a0e45c2258, 0xb422d1e7bd6f8514,
1613 0xe51f547c5972a107},
1614 {0xa25bcd6fc9cf343d, 0x8ca922ee097c184e, 0xa62f98b3a9fe9a06,
1615 0x1c309a2b25bb1387},
1616 {1, 0, 0, 0}},
1617 {{0x9295dbeb1967c459, 0xb00148833472c98e, 0xc504977708011828,
1618 0x20b87b8aa2c4e503},
1619 {0x3063175de057c277, 0x1bd539338fe582dd, 0x0d11adef5f69a044,
1620 0xf5c6fa49919776be},
1621 {1, 0, 0, 0}},
1622 {{0x8c944e760fd59e11, 0x3876cba1102fad5f, 0xa454c3fad83faa56,
1623 0x1ed7d1b9332010b9},
1624 {0xa1011a270024b889, 0x05e4d0dcac0cd344, 0x52b520f0eb6a2a24,
1625 0x3a2b03f03217257a},
1626 {1, 0, 0, 0}},
1627 {{0xf20fc2afdf1d043d, 0xf330240db58d5a62, 0xfc7d229ca0058c3b,
1628 0x15fee545c78dd9f6},
1629 {0x501e82885bc98cda, 0x41ef80e5d046ac04, 0x557d9f49461210fb,
1630 0x4ab5b6b2b8753f81},
1631 {1, 0, 0, 0}}}
1632 };
1633
1634 /*
1635 * select_point selects the |idx|th point from a precomputation table and
1636 * copies it to out.
1637 */
select_point(const u64 idx,unsigned int size,const smallfelem pre_comp[16][3],smallfelem out[3])1638 static void select_point(const u64 idx, unsigned int size,
1639 const smallfelem pre_comp[16][3], smallfelem out[3])
1640 {
1641 unsigned i, j;
1642 u64 *outlimbs = &out[0][0];
1643
1644 memset(out, 0, sizeof(*out) * 3);
1645
1646 for (i = 0; i < size; i++) {
1647 const u64 *inlimbs = (u64 *)&pre_comp[i][0][0];
1648 u64 mask = i ^ idx;
1649 mask |= mask >> 4;
1650 mask |= mask >> 2;
1651 mask |= mask >> 1;
1652 mask &= 1;
1653 mask--;
1654 for (j = 0; j < NLIMBS * 3; j++)
1655 outlimbs[j] |= inlimbs[j] & mask;
1656 }
1657 }
1658
1659 /* get_bit returns the |i|th bit in |in| */
get_bit(const felem_bytearray in,int i)1660 static char get_bit(const felem_bytearray in, int i)
1661 {
1662 if ((i < 0) || (i >= 256))
1663 return 0;
1664 return (in[i >> 3] >> (i & 7)) & 1;
1665 }
1666
1667 /*
1668 * Interleaved point multiplication using precomputed point multiples: The
1669 * small point multiples 0*P, 1*P, ..., 17*P are in pre_comp[], the scalars
1670 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1671 * generator, using certain (large) precomputed multiples in g_pre_comp.
1672 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1673 */
batch_mul(felem x_out,felem y_out,felem z_out,const felem_bytearray scalars[],const unsigned num_points,const u8 * g_scalar,const int mixed,const smallfelem pre_comp[][17][3],const smallfelem g_pre_comp[2][16][3])1674 static void batch_mul(felem x_out, felem y_out, felem z_out,
1675 const felem_bytearray scalars[],
1676 const unsigned num_points, const u8 *g_scalar,
1677 const int mixed, const smallfelem pre_comp[][17][3],
1678 const smallfelem g_pre_comp[2][16][3])
1679 {
1680 int i, skip;
1681 unsigned num, gen_mul = (g_scalar != NULL);
1682 felem nq[3], ftmp;
1683 smallfelem tmp[3];
1684 u64 bits;
1685 u8 sign, digit;
1686
1687 /* set nq to the point at infinity */
1688 memset(nq, 0, sizeof(nq));
1689
1690 /*
1691 * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1692 * of the generator (two in each of the last 32 rounds) and additions of
1693 * other points multiples (every 5th round).
1694 */
1695 skip = 1; /* save two point operations in the first
1696 * round */
1697 for (i = (num_points ? 255 : 31); i >= 0; --i) {
1698 /* double */
1699 if (!skip)
1700 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1701
1702 /* add multiples of the generator */
1703 if (gen_mul && (i <= 31)) {
1704 /* first, look 32 bits upwards */
1705 bits = get_bit(g_scalar, i + 224) << 3;
1706 bits |= get_bit(g_scalar, i + 160) << 2;
1707 bits |= get_bit(g_scalar, i + 96) << 1;
1708 bits |= get_bit(g_scalar, i + 32);
1709 /* select the point to add, in constant time */
1710 select_point(bits, 16, g_pre_comp[1], tmp);
1711
1712 if (!skip) {
1713 /* Arg 1 below is for "mixed" */
1714 point_add(nq[0], nq[1], nq[2],
1715 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1716 } else {
1717 smallfelem_expand(nq[0], tmp[0]);
1718 smallfelem_expand(nq[1], tmp[1]);
1719 smallfelem_expand(nq[2], tmp[2]);
1720 skip = 0;
1721 }
1722
1723 /* second, look at the current position */
1724 bits = get_bit(g_scalar, i + 192) << 3;
1725 bits |= get_bit(g_scalar, i + 128) << 2;
1726 bits |= get_bit(g_scalar, i + 64) << 1;
1727 bits |= get_bit(g_scalar, i);
1728 /* select the point to add, in constant time */
1729 select_point(bits, 16, g_pre_comp[0], tmp);
1730 /* Arg 1 below is for "mixed" */
1731 point_add(nq[0], nq[1], nq[2],
1732 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1733 }
1734
1735 /* do other additions every 5 doublings */
1736 if (num_points && (i % 5 == 0)) {
1737 /* loop over all scalars */
1738 for (num = 0; num < num_points; ++num) {
1739 bits = get_bit(scalars[num], i + 4) << 5;
1740 bits |= get_bit(scalars[num], i + 3) << 4;
1741 bits |= get_bit(scalars[num], i + 2) << 3;
1742 bits |= get_bit(scalars[num], i + 1) << 2;
1743 bits |= get_bit(scalars[num], i) << 1;
1744 bits |= get_bit(scalars[num], i - 1);
1745 ossl_ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1746
1747 /*
1748 * select the point to add or subtract, in constant time
1749 */
1750 select_point(digit, 17, pre_comp[num], tmp);
1751 smallfelem_neg(ftmp, tmp[1]); /* (X, -Y, Z) is the negative
1752 * point */
1753 copy_small_conditional(ftmp, tmp[1], (((limb) sign) - 1));
1754 felem_contract(tmp[1], ftmp);
1755
1756 if (!skip) {
1757 point_add(nq[0], nq[1], nq[2],
1758 nq[0], nq[1], nq[2],
1759 mixed, tmp[0], tmp[1], tmp[2]);
1760 } else {
1761 smallfelem_expand(nq[0], tmp[0]);
1762 smallfelem_expand(nq[1], tmp[1]);
1763 smallfelem_expand(nq[2], tmp[2]);
1764 skip = 0;
1765 }
1766 }
1767 }
1768 }
1769 felem_assign(x_out, nq[0]);
1770 felem_assign(y_out, nq[1]);
1771 felem_assign(z_out, nq[2]);
1772 }
1773
1774 /* Precomputation for the group generator. */
1775 struct nistp256_pre_comp_st {
1776 smallfelem g_pre_comp[2][16][3];
1777 CRYPTO_REF_COUNT references;
1778 };
1779
EC_GFp_nistp256_method(void)1780 const EC_METHOD *EC_GFp_nistp256_method(void)
1781 {
1782 static const EC_METHOD ret = {
1783 EC_FLAGS_DEFAULT_OCT,
1784 NID_X9_62_prime_field,
1785 ossl_ec_GFp_nistp256_group_init,
1786 ossl_ec_GFp_simple_group_finish,
1787 ossl_ec_GFp_simple_group_clear_finish,
1788 ossl_ec_GFp_nist_group_copy,
1789 ossl_ec_GFp_nistp256_group_set_curve,
1790 ossl_ec_GFp_simple_group_get_curve,
1791 ossl_ec_GFp_simple_group_get_degree,
1792 ossl_ec_group_simple_order_bits,
1793 ossl_ec_GFp_simple_group_check_discriminant,
1794 ossl_ec_GFp_simple_point_init,
1795 ossl_ec_GFp_simple_point_finish,
1796 ossl_ec_GFp_simple_point_clear_finish,
1797 ossl_ec_GFp_simple_point_copy,
1798 ossl_ec_GFp_simple_point_set_to_infinity,
1799 ossl_ec_GFp_simple_point_set_affine_coordinates,
1800 ossl_ec_GFp_nistp256_point_get_affine_coordinates,
1801 0 /* point_set_compressed_coordinates */ ,
1802 0 /* point2oct */ ,
1803 0 /* oct2point */ ,
1804 ossl_ec_GFp_simple_add,
1805 ossl_ec_GFp_simple_dbl,
1806 ossl_ec_GFp_simple_invert,
1807 ossl_ec_GFp_simple_is_at_infinity,
1808 ossl_ec_GFp_simple_is_on_curve,
1809 ossl_ec_GFp_simple_cmp,
1810 ossl_ec_GFp_simple_make_affine,
1811 ossl_ec_GFp_simple_points_make_affine,
1812 ossl_ec_GFp_nistp256_points_mul,
1813 ossl_ec_GFp_nistp256_precompute_mult,
1814 ossl_ec_GFp_nistp256_have_precompute_mult,
1815 ossl_ec_GFp_nist_field_mul,
1816 ossl_ec_GFp_nist_field_sqr,
1817 0 /* field_div */ ,
1818 ossl_ec_GFp_simple_field_inv,
1819 0 /* field_encode */ ,
1820 0 /* field_decode */ ,
1821 0, /* field_set_to_one */
1822 ossl_ec_key_simple_priv2oct,
1823 ossl_ec_key_simple_oct2priv,
1824 0, /* set private */
1825 ossl_ec_key_simple_generate_key,
1826 ossl_ec_key_simple_check_key,
1827 ossl_ec_key_simple_generate_public_key,
1828 0, /* keycopy */
1829 0, /* keyfinish */
1830 ossl_ecdh_simple_compute_key,
1831 ossl_ecdsa_simple_sign_setup,
1832 ossl_ecdsa_simple_sign_sig,
1833 ossl_ecdsa_simple_verify_sig,
1834 0, /* field_inverse_mod_ord */
1835 0, /* blind_coordinates */
1836 0, /* ladder_pre */
1837 0, /* ladder_step */
1838 0 /* ladder_post */
1839 };
1840
1841 return &ret;
1842 }
1843
1844 /******************************************************************************/
1845 /*
1846 * FUNCTIONS TO MANAGE PRECOMPUTATION
1847 */
1848
nistp256_pre_comp_new(void)1849 static NISTP256_PRE_COMP *nistp256_pre_comp_new(void)
1850 {
1851 NISTP256_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1852
1853 if (ret == NULL)
1854 return ret;
1855
1856 if (!CRYPTO_NEW_REF(&ret->references, 1)) {
1857 OPENSSL_free(ret);
1858 return NULL;
1859 }
1860 return ret;
1861 }
1862
EC_nistp256_pre_comp_dup(NISTP256_PRE_COMP * p)1863 NISTP256_PRE_COMP *EC_nistp256_pre_comp_dup(NISTP256_PRE_COMP *p)
1864 {
1865 int i;
1866 if (p != NULL)
1867 CRYPTO_UP_REF(&p->references, &i);
1868 return p;
1869 }
1870
EC_nistp256_pre_comp_free(NISTP256_PRE_COMP * pre)1871 void EC_nistp256_pre_comp_free(NISTP256_PRE_COMP *pre)
1872 {
1873 int i;
1874
1875 if (pre == NULL)
1876 return;
1877
1878 CRYPTO_DOWN_REF(&pre->references, &i);
1879 REF_PRINT_COUNT("EC_nistp256", pre);
1880 if (i > 0)
1881 return;
1882 REF_ASSERT_ISNT(i < 0);
1883
1884 CRYPTO_FREE_REF(&pre->references);
1885 OPENSSL_free(pre);
1886 }
1887
1888 /******************************************************************************/
1889 /*
1890 * OPENSSL EC_METHOD FUNCTIONS
1891 */
1892
ossl_ec_GFp_nistp256_group_init(EC_GROUP * group)1893 int ossl_ec_GFp_nistp256_group_init(EC_GROUP *group)
1894 {
1895 int ret;
1896 ret = ossl_ec_GFp_simple_group_init(group);
1897 group->a_is_minus3 = 1;
1898 return ret;
1899 }
1900
ossl_ec_GFp_nistp256_group_set_curve(EC_GROUP * group,const BIGNUM * p,const BIGNUM * a,const BIGNUM * b,BN_CTX * ctx)1901 int ossl_ec_GFp_nistp256_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1902 const BIGNUM *a, const BIGNUM *b,
1903 BN_CTX *ctx)
1904 {
1905 int ret = 0;
1906 BIGNUM *curve_p, *curve_a, *curve_b;
1907 #ifndef FIPS_MODULE
1908 BN_CTX *new_ctx = NULL;
1909
1910 if (ctx == NULL)
1911 ctx = new_ctx = BN_CTX_new();
1912 #endif
1913 if (ctx == NULL)
1914 return 0;
1915
1916 BN_CTX_start(ctx);
1917 curve_p = BN_CTX_get(ctx);
1918 curve_a = BN_CTX_get(ctx);
1919 curve_b = BN_CTX_get(ctx);
1920 if (curve_b == NULL)
1921 goto err;
1922 BN_bin2bn(nistp256_curve_params[0], sizeof(felem_bytearray), curve_p);
1923 BN_bin2bn(nistp256_curve_params[1], sizeof(felem_bytearray), curve_a);
1924 BN_bin2bn(nistp256_curve_params[2], sizeof(felem_bytearray), curve_b);
1925 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1926 ERR_raise(ERR_LIB_EC, EC_R_WRONG_CURVE_PARAMETERS);
1927 goto err;
1928 }
1929 group->field_mod_func = BN_nist_mod_256;
1930 ret = ossl_ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1931 err:
1932 BN_CTX_end(ctx);
1933 #ifndef FIPS_MODULE
1934 BN_CTX_free(new_ctx);
1935 #endif
1936 return ret;
1937 }
1938
1939 /*
1940 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1941 * (X/Z^2, Y/Z^3)
1942 */
ossl_ec_GFp_nistp256_point_get_affine_coordinates(const EC_GROUP * group,const EC_POINT * point,BIGNUM * x,BIGNUM * y,BN_CTX * ctx)1943 int ossl_ec_GFp_nistp256_point_get_affine_coordinates(const EC_GROUP *group,
1944 const EC_POINT *point,
1945 BIGNUM *x, BIGNUM *y,
1946 BN_CTX *ctx)
1947 {
1948 felem z1, z2, x_in, y_in;
1949 smallfelem x_out, y_out;
1950 longfelem tmp;
1951
1952 if (EC_POINT_is_at_infinity(group, point)) {
1953 ERR_raise(ERR_LIB_EC, EC_R_POINT_AT_INFINITY);
1954 return 0;
1955 }
1956 if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1957 (!BN_to_felem(z1, point->Z)))
1958 return 0;
1959 felem_inv(z2, z1);
1960 felem_square(tmp, z2);
1961 felem_reduce(z1, tmp);
1962 felem_mul(tmp, x_in, z1);
1963 felem_reduce(x_in, tmp);
1964 felem_contract(x_out, x_in);
1965 if (x != NULL) {
1966 if (!smallfelem_to_BN(x, x_out)) {
1967 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1968 return 0;
1969 }
1970 }
1971 felem_mul(tmp, z1, z2);
1972 felem_reduce(z1, tmp);
1973 felem_mul(tmp, y_in, z1);
1974 felem_reduce(y_in, tmp);
1975 felem_contract(y_out, y_in);
1976 if (y != NULL) {
1977 if (!smallfelem_to_BN(y, y_out)) {
1978 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1979 return 0;
1980 }
1981 }
1982 return 1;
1983 }
1984
1985 /* points below is of size |num|, and tmp_smallfelems is of size |num+1| */
make_points_affine(size_t num,smallfelem points[][3],smallfelem tmp_smallfelems[])1986 static void make_points_affine(size_t num, smallfelem points[][3],
1987 smallfelem tmp_smallfelems[])
1988 {
1989 /*
1990 * Runs in constant time, unless an input is the point at infinity (which
1991 * normally shouldn't happen).
1992 */
1993 ossl_ec_GFp_nistp_points_make_affine_internal(num,
1994 points,
1995 sizeof(smallfelem),
1996 tmp_smallfelems,
1997 (void (*)(void *))smallfelem_one,
1998 smallfelem_is_zero_int,
1999 (void (*)(void *, const void *))
2000 smallfelem_assign,
2001 (void (*)(void *, const void *))
2002 smallfelem_square_contract,
2003 (void (*)
2004 (void *, const void *,
2005 const void *))
2006 smallfelem_mul_contract,
2007 (void (*)(void *, const void *))
2008 smallfelem_inv_contract,
2009 /* nothing to contract */
2010 (void (*)(void *, const void *))
2011 smallfelem_assign);
2012 }
2013
2014 /*
2015 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
2016 * values Result is stored in r (r can equal one of the inputs).
2017 */
ossl_ec_GFp_nistp256_points_mul(const EC_GROUP * group,EC_POINT * r,const BIGNUM * scalar,size_t num,const EC_POINT * points[],const BIGNUM * scalars[],BN_CTX * ctx)2018 int ossl_ec_GFp_nistp256_points_mul(const EC_GROUP *group, EC_POINT *r,
2019 const BIGNUM *scalar, size_t num,
2020 const EC_POINT *points[],
2021 const BIGNUM *scalars[], BN_CTX *ctx)
2022 {
2023 int ret = 0;
2024 int j;
2025 int mixed = 0;
2026 BIGNUM *x, *y, *z, *tmp_scalar;
2027 felem_bytearray g_secret;
2028 felem_bytearray *secrets = NULL;
2029 smallfelem (*pre_comp)[17][3] = NULL;
2030 smallfelem *tmp_smallfelems = NULL;
2031 unsigned i;
2032 int num_bytes;
2033 int have_pre_comp = 0;
2034 size_t num_points = num;
2035 smallfelem x_in, y_in, z_in;
2036 felem x_out, y_out, z_out;
2037 NISTP256_PRE_COMP *pre = NULL;
2038 const smallfelem(*g_pre_comp)[16][3] = NULL;
2039 EC_POINT *generator = NULL;
2040 const EC_POINT *p = NULL;
2041 const BIGNUM *p_scalar = NULL;
2042
2043 BN_CTX_start(ctx);
2044 x = BN_CTX_get(ctx);
2045 y = BN_CTX_get(ctx);
2046 z = BN_CTX_get(ctx);
2047 tmp_scalar = BN_CTX_get(ctx);
2048 if (tmp_scalar == NULL)
2049 goto err;
2050
2051 if (scalar != NULL) {
2052 pre = group->pre_comp.nistp256;
2053 if (pre)
2054 /* we have precomputation, try to use it */
2055 g_pre_comp = (const smallfelem(*)[16][3])pre->g_pre_comp;
2056 else
2057 /* try to use the standard precomputation */
2058 g_pre_comp = &gmul[0];
2059 generator = EC_POINT_new(group);
2060 if (generator == NULL)
2061 goto err;
2062 /* get the generator from precomputation */
2063 if (!smallfelem_to_BN(x, g_pre_comp[0][1][0]) ||
2064 !smallfelem_to_BN(y, g_pre_comp[0][1][1]) ||
2065 !smallfelem_to_BN(z, g_pre_comp[0][1][2])) {
2066 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
2067 goto err;
2068 }
2069 if (!ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group,
2070 generator,
2071 x, y, z, ctx))
2072 goto err;
2073 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
2074 /* precomputation matches generator */
2075 have_pre_comp = 1;
2076 else
2077 /*
2078 * we don't have valid precomputation: treat the generator as a
2079 * random point
2080 */
2081 num_points++;
2082 }
2083 if (num_points > 0) {
2084 if (num_points >= 3) {
2085 /*
2086 * unless we precompute multiples for just one or two points,
2087 * converting those into affine form is time well spent
2088 */
2089 mixed = 1;
2090 }
2091 secrets = OPENSSL_malloc(sizeof(*secrets) * num_points);
2092 pre_comp = OPENSSL_malloc(sizeof(*pre_comp) * num_points);
2093 if (mixed)
2094 tmp_smallfelems =
2095 OPENSSL_malloc(sizeof(*tmp_smallfelems) * (num_points * 17 + 1));
2096 if ((secrets == NULL) || (pre_comp == NULL)
2097 || (mixed && (tmp_smallfelems == NULL)))
2098 goto err;
2099
2100 /*
2101 * we treat NULL scalars as 0, and NULL points as points at infinity,
2102 * i.e., they contribute nothing to the linear combination
2103 */
2104 memset(secrets, 0, sizeof(*secrets) * num_points);
2105 memset(pre_comp, 0, sizeof(*pre_comp) * num_points);
2106 for (i = 0; i < num_points; ++i) {
2107 if (i == num) {
2108 /*
2109 * we didn't have a valid precomputation, so we pick the
2110 * generator
2111 */
2112 p = EC_GROUP_get0_generator(group);
2113 p_scalar = scalar;
2114 } else {
2115 /* the i^th point */
2116 p = points[i];
2117 p_scalar = scalars[i];
2118 }
2119 if ((p_scalar != NULL) && (p != NULL)) {
2120 /* reduce scalar to 0 <= scalar < 2^256 */
2121 if ((BN_num_bits(p_scalar) > 256)
2122 || (BN_is_negative(p_scalar))) {
2123 /*
2124 * this is an unusual input, and we don't guarantee
2125 * constant-timeness
2126 */
2127 if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
2128 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
2129 goto err;
2130 }
2131 num_bytes = BN_bn2lebinpad(tmp_scalar,
2132 secrets[i], sizeof(secrets[i]));
2133 } else {
2134 num_bytes = BN_bn2lebinpad(p_scalar,
2135 secrets[i], sizeof(secrets[i]));
2136 }
2137 if (num_bytes < 0) {
2138 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
2139 goto err;
2140 }
2141 /* precompute multiples */
2142 if ((!BN_to_felem(x_out, p->X)) ||
2143 (!BN_to_felem(y_out, p->Y)) ||
2144 (!BN_to_felem(z_out, p->Z)))
2145 goto err;
2146 felem_shrink(pre_comp[i][1][0], x_out);
2147 felem_shrink(pre_comp[i][1][1], y_out);
2148 felem_shrink(pre_comp[i][1][2], z_out);
2149 for (j = 2; j <= 16; ++j) {
2150 if (j & 1) {
2151 point_add_small(pre_comp[i][j][0], pre_comp[i][j][1],
2152 pre_comp[i][j][2], pre_comp[i][1][0],
2153 pre_comp[i][1][1], pre_comp[i][1][2],
2154 pre_comp[i][j - 1][0],
2155 pre_comp[i][j - 1][1],
2156 pre_comp[i][j - 1][2]);
2157 } else {
2158 point_double_small(pre_comp[i][j][0],
2159 pre_comp[i][j][1],
2160 pre_comp[i][j][2],
2161 pre_comp[i][j / 2][0],
2162 pre_comp[i][j / 2][1],
2163 pre_comp[i][j / 2][2]);
2164 }
2165 }
2166 }
2167 }
2168 if (mixed)
2169 make_points_affine(num_points * 17, pre_comp[0], tmp_smallfelems);
2170 }
2171
2172 /* the scalar for the generator */
2173 if ((scalar != NULL) && (have_pre_comp)) {
2174 memset(g_secret, 0, sizeof(g_secret));
2175 /* reduce scalar to 0 <= scalar < 2^256 */
2176 if ((BN_num_bits(scalar) > 256) || (BN_is_negative(scalar))) {
2177 /*
2178 * this is an unusual input, and we don't guarantee
2179 * constant-timeness
2180 */
2181 if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
2182 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
2183 goto err;
2184 }
2185 num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));
2186 } else {
2187 num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret));
2188 }
2189 /* do the multiplication with generator precomputation */
2190 batch_mul(x_out, y_out, z_out,
2191 (const felem_bytearray(*))secrets, num_points,
2192 g_secret,
2193 mixed, (const smallfelem(*)[17][3])pre_comp, g_pre_comp);
2194 } else {
2195 /* do the multiplication without generator precomputation */
2196 batch_mul(x_out, y_out, z_out,
2197 (const felem_bytearray(*))secrets, num_points,
2198 NULL, mixed, (const smallfelem(*)[17][3])pre_comp, NULL);
2199 }
2200 /* reduce the output to its unique minimal representation */
2201 felem_contract(x_in, x_out);
2202 felem_contract(y_in, y_out);
2203 felem_contract(z_in, z_out);
2204 if ((!smallfelem_to_BN(x, x_in)) || (!smallfelem_to_BN(y, y_in)) ||
2205 (!smallfelem_to_BN(z, z_in))) {
2206 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
2207 goto err;
2208 }
2209 ret = ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group, r, x, y, z,
2210 ctx);
2211
2212 err:
2213 BN_CTX_end(ctx);
2214 EC_POINT_free(generator);
2215 OPENSSL_free(secrets);
2216 OPENSSL_free(pre_comp);
2217 OPENSSL_free(tmp_smallfelems);
2218 return ret;
2219 }
2220
ossl_ec_GFp_nistp256_precompute_mult(EC_GROUP * group,BN_CTX * ctx)2221 int ossl_ec_GFp_nistp256_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
2222 {
2223 int ret = 0;
2224 NISTP256_PRE_COMP *pre = NULL;
2225 int i, j;
2226 BIGNUM *x, *y;
2227 EC_POINT *generator = NULL;
2228 smallfelem tmp_smallfelems[32];
2229 felem x_tmp, y_tmp, z_tmp;
2230 #ifndef FIPS_MODULE
2231 BN_CTX *new_ctx = NULL;
2232 #endif
2233
2234 /* throw away old precomputation */
2235 EC_pre_comp_free(group);
2236
2237 #ifndef FIPS_MODULE
2238 if (ctx == NULL)
2239 ctx = new_ctx = BN_CTX_new();
2240 #endif
2241 if (ctx == NULL)
2242 return 0;
2243
2244 BN_CTX_start(ctx);
2245 x = BN_CTX_get(ctx);
2246 y = BN_CTX_get(ctx);
2247 if (y == NULL)
2248 goto err;
2249 /* get the generator */
2250 if (group->generator == NULL)
2251 goto err;
2252 generator = EC_POINT_new(group);
2253 if (generator == NULL)
2254 goto err;
2255 BN_bin2bn(nistp256_curve_params[3], sizeof(felem_bytearray), x);
2256 BN_bin2bn(nistp256_curve_params[4], sizeof(felem_bytearray), y);
2257 if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))
2258 goto err;
2259 if ((pre = nistp256_pre_comp_new()) == NULL)
2260 goto err;
2261 /*
2262 * if the generator is the standard one, use built-in precomputation
2263 */
2264 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
2265 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
2266 goto done;
2267 }
2268 if ((!BN_to_felem(x_tmp, group->generator->X)) ||
2269 (!BN_to_felem(y_tmp, group->generator->Y)) ||
2270 (!BN_to_felem(z_tmp, group->generator->Z)))
2271 goto err;
2272 felem_shrink(pre->g_pre_comp[0][1][0], x_tmp);
2273 felem_shrink(pre->g_pre_comp[0][1][1], y_tmp);
2274 felem_shrink(pre->g_pre_comp[0][1][2], z_tmp);
2275 /*
2276 * compute 2^64*G, 2^128*G, 2^192*G for the first table, 2^32*G, 2^96*G,
2277 * 2^160*G, 2^224*G for the second one
2278 */
2279 for (i = 1; i <= 8; i <<= 1) {
2280 point_double_small(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
2281 pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
2282 pre->g_pre_comp[0][i][1],
2283 pre->g_pre_comp[0][i][2]);
2284 for (j = 0; j < 31; ++j) {
2285 point_double_small(pre->g_pre_comp[1][i][0],
2286 pre->g_pre_comp[1][i][1],
2287 pre->g_pre_comp[1][i][2],
2288 pre->g_pre_comp[1][i][0],
2289 pre->g_pre_comp[1][i][1],
2290 pre->g_pre_comp[1][i][2]);
2291 }
2292 if (i == 8)
2293 break;
2294 point_double_small(pre->g_pre_comp[0][2 * i][0],
2295 pre->g_pre_comp[0][2 * i][1],
2296 pre->g_pre_comp[0][2 * i][2],
2297 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
2298 pre->g_pre_comp[1][i][2]);
2299 for (j = 0; j < 31; ++j) {
2300 point_double_small(pre->g_pre_comp[0][2 * i][0],
2301 pre->g_pre_comp[0][2 * i][1],
2302 pre->g_pre_comp[0][2 * i][2],
2303 pre->g_pre_comp[0][2 * i][0],
2304 pre->g_pre_comp[0][2 * i][1],
2305 pre->g_pre_comp[0][2 * i][2]);
2306 }
2307 }
2308 for (i = 0; i < 2; i++) {
2309 /* g_pre_comp[i][0] is the point at infinity */
2310 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
2311 /* the remaining multiples */
2312 /* 2^64*G + 2^128*G resp. 2^96*G + 2^160*G */
2313 point_add_small(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
2314 pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
2315 pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
2316 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
2317 pre->g_pre_comp[i][2][2]);
2318 /* 2^64*G + 2^192*G resp. 2^96*G + 2^224*G */
2319 point_add_small(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
2320 pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
2321 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2322 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
2323 pre->g_pre_comp[i][2][2]);
2324 /* 2^128*G + 2^192*G resp. 2^160*G + 2^224*G */
2325 point_add_small(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
2326 pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
2327 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2328 pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
2329 pre->g_pre_comp[i][4][2]);
2330 /*
2331 * 2^64*G + 2^128*G + 2^192*G resp. 2^96*G + 2^160*G + 2^224*G
2332 */
2333 point_add_small(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
2334 pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
2335 pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
2336 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
2337 pre->g_pre_comp[i][2][2]);
2338 for (j = 1; j < 8; ++j) {
2339 /* odd multiples: add G resp. 2^32*G */
2340 point_add_small(pre->g_pre_comp[i][2 * j + 1][0],
2341 pre->g_pre_comp[i][2 * j + 1][1],
2342 pre->g_pre_comp[i][2 * j + 1][2],
2343 pre->g_pre_comp[i][2 * j][0],
2344 pre->g_pre_comp[i][2 * j][1],
2345 pre->g_pre_comp[i][2 * j][2],
2346 pre->g_pre_comp[i][1][0],
2347 pre->g_pre_comp[i][1][1],
2348 pre->g_pre_comp[i][1][2]);
2349 }
2350 }
2351 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_smallfelems);
2352
2353 done:
2354 SETPRECOMP(group, nistp256, pre);
2355 pre = NULL;
2356 ret = 1;
2357
2358 err:
2359 BN_CTX_end(ctx);
2360 EC_POINT_free(generator);
2361 #ifndef FIPS_MODULE
2362 BN_CTX_free(new_ctx);
2363 #endif
2364 EC_nistp256_pre_comp_free(pre);
2365 return ret;
2366 }
2367
ossl_ec_GFp_nistp256_have_precompute_mult(const EC_GROUP * group)2368 int ossl_ec_GFp_nistp256_have_precompute_mult(const EC_GROUP *group)
2369 {
2370 return HAVEPRECOMP(group, nistp256);
2371 }
2372