1 /*
2 * Copyright 2011-2024 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the Apache License 2.0 (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
8 */
9
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <openssl/crypto.h>
14 #ifdef __APPLE__
15 #include <sys/sysctl.h>
16 #else
17 #include <setjmp.h>
18 #include <signal.h>
19 #endif
20 #include "internal/cryptlib.h"
21 #ifdef _WIN32
22 #include <windows.h>
23 #else
24 #include <unistd.h>
25 #endif
26 #include "arm_arch.h"
27
28 unsigned int OPENSSL_armcap_P = 0;
29 unsigned int OPENSSL_arm_midr = 0;
30 unsigned int OPENSSL_armv8_rsa_neonized = 0;
31
32 #ifdef _WIN32
OPENSSL_cpuid_setup(void)33 void OPENSSL_cpuid_setup(void)
34 {
35 OPENSSL_armcap_P |= ARMV7_NEON;
36 OPENSSL_armv8_rsa_neonized = 1;
37 if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) {
38 // These are all covered by one call in Windows
39 OPENSSL_armcap_P |= ARMV8_AES;
40 OPENSSL_armcap_P |= ARMV8_PMULL;
41 OPENSSL_armcap_P |= ARMV8_SHA1;
42 OPENSSL_armcap_P |= ARMV8_SHA256;
43 }
44 }
45
OPENSSL_rdtsc(void)46 uint32_t OPENSSL_rdtsc(void)
47 {
48 return 0;
49 }
50 #elif __ARM_MAX_ARCH__ < 7
OPENSSL_cpuid_setup(void)51 void OPENSSL_cpuid_setup(void)
52 {
53 }
54
OPENSSL_rdtsc(void)55 uint32_t OPENSSL_rdtsc(void)
56 {
57 return 0;
58 }
59 #else /* !_WIN32 && __ARM_MAX_ARCH__ >= 7 */
60
61 /* 3 ways of handling things here: __APPLE__, getauxval() or SIGILL detect */
62
63 /* First determine if getauxval() is available (OSSL_IMPLEMENT_GETAUXVAL) */
64
65 # if defined(__GNUC__) && __GNUC__>=2
66 void OPENSSL_cpuid_setup(void) __attribute__ ((constructor));
67 # endif
68
69 # if defined(__GLIBC__) && defined(__GLIBC_PREREQ)
70 # if __GLIBC_PREREQ(2, 16)
71 # include <sys/auxv.h>
72 # define OSSL_IMPLEMENT_GETAUXVAL
73 # endif
74 # elif defined(__ANDROID_API__)
75 /* see https://developer.android.google.cn/ndk/guides/cpu-features */
76 # if __ANDROID_API__ >= 18
77 # include <sys/auxv.h>
78 # define OSSL_IMPLEMENT_GETAUXVAL
79 # endif
80 # endif
81 # if defined(__FreeBSD__) || defined(__OpenBSD__)
82 # include <sys/param.h>
83 # if (defined(__FreeBSD__) && __FreeBSD_version >= 1200000) || \
84 (defined(__OpenBSD__) && OpenBSD >= 202409)
85 # include <sys/auxv.h>
86 # define OSSL_IMPLEMENT_GETAUXVAL
87
getauxval(unsigned long key)88 static unsigned long getauxval(unsigned long key)
89 {
90 unsigned long val = 0ul;
91
92 if (elf_aux_info((int)key, &val, sizeof(val)) != 0)
93 return 0ul;
94
95 return val;
96 }
97 # endif
98 # endif
99
100 /*
101 * Android: according to https://developer.android.com/ndk/guides/cpu-features,
102 * getauxval is supported starting with API level 18
103 */
104 # if defined(__ANDROID__) && defined(__ANDROID_API__) && __ANDROID_API__ >= 18
105 # include <sys/auxv.h>
106 # define OSSL_IMPLEMENT_GETAUXVAL
107 # endif
108
109 /*
110 * ARM puts the feature bits for Crypto Extensions in AT_HWCAP2, whereas
111 * AArch64 used AT_HWCAP.
112 */
113 # ifndef AT_HWCAP
114 # define AT_HWCAP 16
115 # endif
116 # ifndef AT_HWCAP2
117 # define AT_HWCAP2 26
118 # endif
119 # if defined(__arm__) || defined (__arm)
120 # define OSSL_HWCAP AT_HWCAP
121 # define OSSL_HWCAP_NEON (1 << 12)
122
123 # define OSSL_HWCAP_CE AT_HWCAP2
124 # define OSSL_HWCAP_CE_AES (1 << 0)
125 # define OSSL_HWCAP_CE_PMULL (1 << 1)
126 # define OSSL_HWCAP_CE_SHA1 (1 << 2)
127 # define OSSL_HWCAP_CE_SHA256 (1 << 3)
128 # elif defined(__aarch64__)
129 # define OSSL_HWCAP AT_HWCAP
130 # define OSSL_HWCAP_NEON (1 << 1)
131
132 # define OSSL_HWCAP_CE AT_HWCAP
133 # define OSSL_HWCAP_CE_AES (1 << 3)
134 # define OSSL_HWCAP_CE_PMULL (1 << 4)
135 # define OSSL_HWCAP_CE_SHA1 (1 << 5)
136 # define OSSL_HWCAP_CE_SHA256 (1 << 6)
137 # define OSSL_HWCAP_CPUID (1 << 11)
138 # define OSSL_HWCAP_SHA3 (1 << 17)
139 # define OSSL_HWCAP_CE_SM3 (1 << 18)
140 # define OSSL_HWCAP_CE_SM4 (1 << 19)
141 # define OSSL_HWCAP_CE_SHA512 (1 << 21)
142 # define OSSL_HWCAP_SVE (1 << 22)
143 /* AT_HWCAP2 */
144 # define OSSL_HWCAP2 26
145 # define OSSL_HWCAP2_SVE2 (1 << 1)
146 # define OSSL_HWCAP2_RNG (1 << 16)
147 # endif
148
149 uint32_t _armv7_tick(void);
150
OPENSSL_rdtsc(void)151 uint32_t OPENSSL_rdtsc(void)
152 {
153 if (OPENSSL_armcap_P & ARMV7_TICK)
154 return _armv7_tick();
155 else
156 return 0;
157 }
158
159 # ifdef __aarch64__
160 size_t OPENSSL_rndr_asm(unsigned char *buf, size_t len);
161 size_t OPENSSL_rndrrs_asm(unsigned char *buf, size_t len);
162
163 size_t OPENSSL_rndr_bytes(unsigned char *buf, size_t len);
164 size_t OPENSSL_rndrrs_bytes(unsigned char *buf, size_t len);
165
OPENSSL_rndr_wrapper(size_t (* func)(unsigned char *,size_t),unsigned char * buf,size_t len)166 static size_t OPENSSL_rndr_wrapper(size_t (*func)(unsigned char *, size_t), unsigned char *buf, size_t len)
167 {
168 size_t buffer_size = 0;
169 int i;
170
171 for (i = 0; i < 8; i++) {
172 buffer_size = func(buf, len);
173 if (buffer_size == len)
174 break;
175 usleep(5000); /* 5000 microseconds (5 milliseconds) */
176 }
177 return buffer_size;
178 }
179
OPENSSL_rndr_bytes(unsigned char * buf,size_t len)180 size_t OPENSSL_rndr_bytes(unsigned char *buf, size_t len)
181 {
182 return OPENSSL_rndr_wrapper(OPENSSL_rndr_asm, buf, len);
183 }
184
OPENSSL_rndrrs_bytes(unsigned char * buf,size_t len)185 size_t OPENSSL_rndrrs_bytes(unsigned char *buf, size_t len)
186 {
187 return OPENSSL_rndr_wrapper(OPENSSL_rndrrs_asm, buf, len);
188 }
189 # endif
190
191 # if !defined(__APPLE__) && !defined(OSSL_IMPLEMENT_GETAUXVAL)
192 static sigset_t all_masked;
193
194 static sigjmp_buf ill_jmp;
ill_handler(int sig)195 static void ill_handler(int sig)
196 {
197 siglongjmp(ill_jmp, sig);
198 }
199
200 /*
201 * Following subroutines could have been inlined, but not all
202 * ARM compilers support inline assembler, and we'd then have to
203 * worry about the compiler optimising out the detection code...
204 */
205 void _armv7_neon_probe(void);
206 void _armv8_aes_probe(void);
207 void _armv8_sha1_probe(void);
208 void _armv8_sha256_probe(void);
209 void _armv8_pmull_probe(void);
210 # ifdef __aarch64__
211 void _armv8_sm3_probe(void);
212 void _armv8_sm4_probe(void);
213 void _armv8_sha512_probe(void);
214 void _armv8_eor3_probe(void);
215 void _armv8_sve_probe(void);
216 void _armv8_sve2_probe(void);
217 void _armv8_rng_probe(void);
218 # endif
219 # endif /* !__APPLE__ && !OSSL_IMPLEMENT_GETAUXVAL */
220
221 /* We only call _armv8_cpuid_probe() if (OPENSSL_armcap_P & ARMV8_CPUID) != 0 */
222 unsigned int _armv8_cpuid_probe(void);
223
224 # if defined(__APPLE__)
225 /*
226 * Checks the specified integer sysctl, returning `value` if it's 1, otherwise returning 0.
227 */
sysctl_query(const char * name,unsigned int value)228 static unsigned int sysctl_query(const char *name, unsigned int value)
229 {
230 unsigned int sys_value = 0;
231 size_t len = sizeof(sys_value);
232
233 return (sysctlbyname(name, &sys_value, &len, NULL, 0) == 0 && sys_value == 1) ? value : 0;
234 }
235 # elif !defined(OSSL_IMPLEMENT_GETAUXVAL)
236 /*
237 * Calls a provided probe function, which may SIGILL. If it doesn't, return `value`, otherwise return 0.
238 */
arm_probe_for(void (* probe)(void),volatile unsigned int value)239 static unsigned int arm_probe_for(void (*probe)(void), volatile unsigned int value)
240 {
241 if (sigsetjmp(ill_jmp, 1) == 0) {
242 probe();
243 return value;
244 } else {
245 /* The probe function gave us SIGILL */
246 return 0;
247 }
248 }
249 # endif
250
OPENSSL_cpuid_setup(void)251 void OPENSSL_cpuid_setup(void)
252 {
253 const char *e;
254 # if !defined(__APPLE__) && !defined(OSSL_IMPLEMENT_GETAUXVAL)
255 struct sigaction ill_oact, ill_act;
256 sigset_t oset;
257 # endif
258 static int trigger = 0;
259
260 if (trigger)
261 return;
262 trigger = 1;
263
264 OPENSSL_armcap_P = 0;
265
266 if ((e = getenv("OPENSSL_armcap"))) {
267 OPENSSL_armcap_P = (unsigned int)strtoul(e, NULL, 0);
268 return;
269 }
270
271 # if defined(__APPLE__)
272 # if !defined(__aarch64__)
273 /*
274 * Capability probing by catching SIGILL appears to be problematic
275 * on iOS. But since Apple universe is "monocultural", it's actually
276 * possible to simply set pre-defined processor capability mask.
277 */
278 if (1) {
279 OPENSSL_armcap_P = ARMV7_NEON;
280 return;
281 }
282 # else
283 {
284 /*
285 * From
286 * https://github.com/llvm/llvm-project/blob/412237dcd07e5a2afbb1767858262a5f037149a3/llvm/lib/Target/AArch64/AArch64.td#L719
287 * all of these have been available on 64-bit Apple Silicon from the
288 * beginning (the A7).
289 */
290 OPENSSL_armcap_P |= ARMV7_NEON | ARMV8_PMULL | ARMV8_AES | ARMV8_SHA1 | ARMV8_SHA256;
291
292 /* More recent extensions are indicated by sysctls */
293 OPENSSL_armcap_P |= sysctl_query("hw.optional.armv8_2_sha512", ARMV8_SHA512);
294 OPENSSL_armcap_P |= sysctl_query("hw.optional.armv8_2_sha3", ARMV8_SHA3);
295
296 if (OPENSSL_armcap_P & ARMV8_SHA3) {
297 char uarch[64];
298
299 size_t len = sizeof(uarch);
300 if ((sysctlbyname("machdep.cpu.brand_string", uarch, &len, NULL, 0) == 0) &&
301 ((strncmp(uarch, "Apple M1", 8) == 0) ||
302 (strncmp(uarch, "Apple M2", 8) == 0) ||
303 (strncmp(uarch, "Apple M3", 8) == 0))) {
304 OPENSSL_armcap_P |= ARMV8_UNROLL8_EOR3;
305 OPENSSL_armcap_P |= ARMV8_HAVE_SHA3_AND_WORTH_USING;
306 }
307 }
308 }
309 # endif /* __aarch64__ */
310
311 # elif defined(OSSL_IMPLEMENT_GETAUXVAL)
312
313 if (getauxval(OSSL_HWCAP) & OSSL_HWCAP_NEON) {
314 unsigned long hwcap = getauxval(OSSL_HWCAP_CE);
315
316 OPENSSL_armcap_P |= ARMV7_NEON;
317
318 if (hwcap & OSSL_HWCAP_CE_AES)
319 OPENSSL_armcap_P |= ARMV8_AES;
320
321 if (hwcap & OSSL_HWCAP_CE_PMULL)
322 OPENSSL_armcap_P |= ARMV8_PMULL;
323
324 if (hwcap & OSSL_HWCAP_CE_SHA1)
325 OPENSSL_armcap_P |= ARMV8_SHA1;
326
327 if (hwcap & OSSL_HWCAP_CE_SHA256)
328 OPENSSL_armcap_P |= ARMV8_SHA256;
329
330 # ifdef __aarch64__
331 if (hwcap & OSSL_HWCAP_CE_SM4)
332 OPENSSL_armcap_P |= ARMV8_SM4;
333
334 if (hwcap & OSSL_HWCAP_CE_SHA512)
335 OPENSSL_armcap_P |= ARMV8_SHA512;
336
337 if (hwcap & OSSL_HWCAP_CPUID)
338 OPENSSL_armcap_P |= ARMV8_CPUID;
339
340 if (hwcap & OSSL_HWCAP_CE_SM3)
341 OPENSSL_armcap_P |= ARMV8_SM3;
342 if (hwcap & OSSL_HWCAP_SHA3)
343 OPENSSL_armcap_P |= ARMV8_SHA3;
344 # endif
345 }
346 # ifdef __aarch64__
347 if (getauxval(OSSL_HWCAP) & OSSL_HWCAP_SVE)
348 OPENSSL_armcap_P |= ARMV8_SVE;
349
350 if (getauxval(OSSL_HWCAP2) & OSSL_HWCAP2_SVE2)
351 OPENSSL_armcap_P |= ARMV8_SVE2;
352
353 if (getauxval(OSSL_HWCAP2) & OSSL_HWCAP2_RNG)
354 OPENSSL_armcap_P |= ARMV8_RNG;
355 # endif
356
357 # else /* !__APPLE__ && !OSSL_IMPLEMENT_GETAUXVAL */
358
359 /* If all else fails, do brute force SIGILL-based feature detection */
360
361 sigfillset(&all_masked);
362 sigdelset(&all_masked, SIGILL);
363 sigdelset(&all_masked, SIGTRAP);
364 sigdelset(&all_masked, SIGFPE);
365 sigdelset(&all_masked, SIGBUS);
366 sigdelset(&all_masked, SIGSEGV);
367
368 memset(&ill_act, 0, sizeof(ill_act));
369 ill_act.sa_handler = ill_handler;
370 ill_act.sa_mask = all_masked;
371
372 sigprocmask(SIG_SETMASK, &ill_act.sa_mask, &oset);
373 sigaction(SIGILL, &ill_act, &ill_oact);
374
375 OPENSSL_armcap_P |= arm_probe_for(_armv7_neon_probe, ARMV7_NEON);
376
377 if (OPENSSL_armcap_P & ARMV7_NEON) {
378
379 OPENSSL_armcap_P |= arm_probe_for(_armv8_pmull_probe, ARMV8_PMULL | ARMV8_AES);
380 if (!(OPENSSL_armcap_P & ARMV8_AES)) {
381 OPENSSL_armcap_P |= arm_probe_for(_armv8_aes_probe, ARMV8_AES);
382 }
383
384 OPENSSL_armcap_P |= arm_probe_for(_armv8_sha1_probe, ARMV8_SHA1);
385 OPENSSL_armcap_P |= arm_probe_for(_armv8_sha256_probe, ARMV8_SHA256);
386
387 # if defined(__aarch64__)
388 OPENSSL_armcap_P |= arm_probe_for(_armv8_sm3_probe, ARMV8_SM3);
389 OPENSSL_armcap_P |= arm_probe_for(_armv8_sm4_probe, ARMV8_SM4);
390 OPENSSL_armcap_P |= arm_probe_for(_armv8_sha512_probe, ARMV8_SHA512);
391 OPENSSL_armcap_P |= arm_probe_for(_armv8_eor3_probe, ARMV8_SHA3);
392 # endif
393 }
394 # ifdef __aarch64__
395 OPENSSL_armcap_P |= arm_probe_for(_armv8_sve_probe, ARMV8_SVE);
396 OPENSSL_armcap_P |= arm_probe_for(_armv8_sve2_probe, ARMV8_SVE2);
397 OPENSSL_armcap_P |= arm_probe_for(_armv8_rng_probe, ARMV8_RNG);
398 # endif
399
400 /*
401 * Probing for ARMV7_TICK is known to produce unreliable results,
402 * so we only use the feature when the user explicitly enables it
403 * with OPENSSL_armcap.
404 */
405
406 sigaction(SIGILL, &ill_oact, NULL);
407 sigprocmask(SIG_SETMASK, &oset, NULL);
408
409 # endif /* __APPLE__, OSSL_IMPLEMENT_GETAUXVAL */
410
411 # ifdef __aarch64__
412 if (OPENSSL_armcap_P & ARMV8_CPUID)
413 OPENSSL_arm_midr = _armv8_cpuid_probe();
414
415 if ((MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72) ||
416 MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_N1)) &&
417 (OPENSSL_armcap_P & ARMV7_NEON)) {
418 OPENSSL_armv8_rsa_neonized = 1;
419 }
420 if ((MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_V1) ||
421 MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_N2) ||
422 MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_MICROSOFT, MICROSOFT_CPU_PART_COBALT_100) ||
423 MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_V2) ||
424 MIDR_IMPLEMENTER(OPENSSL_arm_midr) == ARM_CPU_IMP_AMPERE) &&
425 (OPENSSL_armcap_P & ARMV8_SHA3))
426 OPENSSL_armcap_P |= ARMV8_UNROLL8_EOR3;
427 if ((MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_V1) ||
428 MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_V2) ||
429 MIDR_IMPLEMENTER(OPENSSL_arm_midr) == ARM_CPU_IMP_AMPERE) &&
430 (OPENSSL_armcap_P & ARMV8_SHA3))
431 OPENSSL_armcap_P |= ARMV8_UNROLL12_EOR3;
432 if ((MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM) ||
433 MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM) ||
434 MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_PRO) ||
435 MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_PRO) ||
436 MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_MAX) ||
437 MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_MAX) ||
438 MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE) ||
439 MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD) ||
440 MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_PRO) ||
441 MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_PRO) ||
442 MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_MAX) ||
443 MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_MAX)) &&
444 (OPENSSL_armcap_P & ARMV8_SHA3))
445 OPENSSL_armcap_P |= ARMV8_HAVE_SHA3_AND_WORTH_USING;
446 # endif
447 }
448 #endif /* _WIN32, __ARM_MAX_ARCH__ >= 7 */
449