1#! /usr/bin/env perl 2# Copyright 2005-2021 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the Apache License 2.0 (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9 10# $output is the last argument if it looks like a file (it has an extension) 11# $flavour is the first argument if it doesn't look like a file 12$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 13$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 14 15$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); 16 17$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 18( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or 19( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or 20die "can't locate x86_64-xlate.pl"; 21 22open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"" 23 or die "can't call $xlate: $!"; 24*STDOUT=*OUT; 25 26($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order 27 ("%rdi","%rsi","%rdx","%rcx"); # Unix order 28 29print<<___; 30#include crypto/cryptlib.h 31.extern OPENSSL_cpuid_setup 32.hidden OPENSSL_cpuid_setup 33.section .init 34 call OPENSSL_cpuid_setup 35 36.hidden OPENSSL_ia32cap_P 37.comm OPENSSL_ia32cap_P,40,4 # <--Should match with internal/cryptlib.h OPENSSL_IA32CAP_P_MAX_INDEXES 38.text 39 40.globl OPENSSL_atomic_add 41.type OPENSSL_atomic_add,\@abi-omnipotent 42.align 16 43OPENSSL_atomic_add: 44.cfi_startproc 45 endbranch 46 movl ($arg1),%eax 47.Lspin: leaq ($arg2,%rax),%r8 48 .byte 0xf0 # lock 49 cmpxchgl %r8d,($arg1) 50 jne .Lspin 51 movl %r8d,%eax 52 .byte 0x48,0x98 # cltq/cdqe 53 ret 54.cfi_endproc 55.size OPENSSL_atomic_add,.-OPENSSL_atomic_add 56 57.globl OPENSSL_rdtsc 58.type OPENSSL_rdtsc,\@abi-omnipotent 59.align 16 60OPENSSL_rdtsc: 61.cfi_startproc 62 endbranch 63 rdtsc 64 shl \$32,%rdx 65 or %rdx,%rax 66 ret 67.cfi_endproc 68.size OPENSSL_rdtsc,.-OPENSSL_rdtsc 69 70.globl OPENSSL_ia32_cpuid 71.type OPENSSL_ia32_cpuid,\@function,1 72.align 16 73OPENSSL_ia32_cpuid: 74.cfi_startproc 75 endbranch 76 mov %rbx,%r8 # save %rbx 77.cfi_register %rbx,%r8 78 79 xor %eax,%eax 80 mov %rax,8(%rdi) # clear extended feature flags 81 cpuid 82 mov %eax,%r11d # max value for standard query level 83 84 xor %eax,%eax 85 cmp \$0x756e6547,%ebx # "Genu" 86 setne %al 87 mov %eax,%r9d 88 cmp \$0x49656e69,%edx # "ineI" 89 setne %al 90 or %eax,%r9d 91 cmp \$0x6c65746e,%ecx # "ntel" 92 setne %al 93 or %eax,%r9d # 0 indicates Intel CPU 94 jz .Lintel 95 96 cmp \$0x68747541,%ebx # "Auth" 97 setne %al 98 mov %eax,%r10d 99 cmp \$0x69746E65,%edx # "enti" 100 setne %al 101 or %eax,%r10d 102 cmp \$0x444D4163,%ecx # "cAMD" 103 setne %al 104 or %eax,%r10d # 0 indicates AMD CPU 105 jnz .Lintel 106 107 # AMD specific 108 mov \$0x80000000,%eax 109 cpuid 110 cmp \$0x80000001,%eax 111 jb .Lintel 112 mov %eax,%r10d 113 mov \$0x80000001,%eax 114 cpuid 115 or %ecx,%r9d 116 and \$0x00000801,%r9d # isolate AMD XOP bit, 1<<11 117 118 cmp \$0x80000008,%r10d 119 jb .Lintel 120 121 mov \$0x80000008,%eax 122 cpuid 123 movzb %cl,%r10 # number of cores - 1 124 inc %r10 # number of cores 125 126 mov \$1,%eax 127 cpuid 128 bt \$28,%edx # test hyper-threading bit 129 jnc .Lgeneric 130 shr \$16,%ebx # number of logical processors 131 cmp %r10b,%bl 132 ja .Lgeneric 133 and \$0xefffffff,%edx # ~(1<<28) 134 jmp .Lgeneric 135 136.Lintel: 137 cmp \$4,%r11d 138 mov \$-1,%r10d 139 jb .Lnocacheinfo 140 141 mov \$4,%eax 142 mov \$0,%ecx # query L1D 143 cpuid 144 mov %eax,%r10d 145 shr \$14,%r10d 146 and \$0xfff,%r10d # number of cores -1 per L1D 147 148.Lnocacheinfo: 149 mov \$1,%eax 150 cpuid 151 movd %eax,%xmm0 # put aside processor id 152 and \$0xbfefffff,%edx # force reserved bits to 0 153 cmp \$0,%r9d 154 jne .Lnotintel 155 or \$0x40000000,%edx # set reserved bit#30 on Intel CPUs 156 and \$15,%ah 157 cmp \$15,%ah # examine Family ID 158 jne .LnotP4 159 or \$0x00100000,%edx # set reserved bit#20 to engage RC4_CHAR 160.LnotP4: 161 cmp \$6,%ah 162 jne .Lnotintel 163 and \$0x0fff0ff0,%eax 164 cmp \$0x00050670,%eax # Knights Landing 165 je .Lknights 166 cmp \$0x00080650,%eax # Knights Mill (according to sde) 167 jne .Lnotintel 168.Lknights: 169 and \$0xfbffffff,%ecx # clear XSAVE flag to mimic Silvermont 170 171.Lnotintel: 172 bt \$28,%edx # test hyper-threading bit 173 jnc .Lgeneric 174 and \$0xefffffff,%edx # ~(1<<28) 175 cmp \$0,%r10d 176 je .Lgeneric 177 178 or \$0x10000000,%edx # 1<<28 179 shr \$16,%ebx 180 cmp \$1,%bl # see if cache is shared 181 ja .Lgeneric 182 and \$0xefffffff,%edx # ~(1<<28) 183.Lgeneric: 184 and \$0x00000800,%r9d # isolate AMD XOP flag 185 and \$0xfffff7ff,%ecx 186 or %ecx,%r9d # merge AMD XOP flag 187 188 mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx 189 190 cmp \$7,%r11d 191 jb .Lno_extended_info 192 mov \$7,%eax 193 xor %ecx,%ecx 194 cpuid 195 movd %eax,%xmm1 # put aside leaf 07H Max Sub-leaves 196 bt \$26,%r9d # check XSAVE bit, cleared on Knights 197 jc .Lnotknights 198 and \$0xfff7ffff,%ebx # clear ADCX/ADOX flag 199.Lnotknights: 200 movd %xmm0,%eax # restore processor id 201 and \$0x0fff0ff0,%eax 202 cmp \$0x00050650,%eax # Skylake-X 203 jne .Lnotskylakex 204 and \$0xfffeffff,%ebx # ~(1<<16) 205 # suppress AVX512F flag on Skylake-X 206 207.Lnotskylakex: # save extended feature flags 208 mov %ebx,8(%rdi) # save cpuid(EAX=0x7, ECX=0x0).EBX to OPENSSL_ia32cap_P[2] 209 mov %ecx,12(%rdi) # save cpuid(EAX=0x7, ECX=0x0).ECX to OPENSSL_ia32cap_P[3] 210 mov %edx,16(%rdi) # save cpuid(EAX=0x7, ECX=0x0).EDX to OPENSSL_ia32cap_P[4] 211 212 movd %xmm1,%eax # Restore leaf 07H Max Sub-leaves 213 cmp \$0x1,%eax # Do we have cpuid(EAX=0x7, ECX=0x1)? 214 jb .Lno_extended_info 215 mov \$0x7,%eax 216 mov \$0x1,%ecx 217 cpuid # cpuid(EAX=0x7, ECX=0x1) 218 mov %eax,20(%rdi) # save cpuid(EAX=0x7, ECX=0x1).EAX to OPENSSL_ia32cap_P[5] 219 mov %edx,24(%rdi) # save cpuid(EAX=0x7, ECX=0x1).EDX to OPENSSL_ia32cap_P[6] 220 mov %ebx,28(%rdi) # save cpuid(EAX=0x7, ECX=0x1).EBX to OPENSSL_ia32cap_P[7] 221 mov %ecx,32(%rdi) # save cpuid(EAX=0x7, ECX=0x1).ECX to OPENSSL_ia32cap_P[8] 222 223 and \$0x80000,%edx # Mask cpuid(EAX=0x7, ECX=0x1).EDX bit 19 to detect AVX10 support 224 cmp \$0x0,%edx 225 je .Lno_extended_info 226 mov \$0x24,%eax # Have AVX10 Support, query for details 227 mov \$0x0,%ecx 228 cpuid # cpuid(EAX=0x24, ECX=0x0) AVX10 Leaf 229 mov %ebx,36(%rdi) # save cpuid(EAX=0x24, ECX=0x0).EBX to OPENSSL_ia32cap_P[9] 230 231.Lno_extended_info: 232 233 bt \$27,%r9d # check OSXSAVE bit 234 jnc .Lclear_avx 235 xor %ecx,%ecx # XCR0 236 .byte 0x0f,0x01,0xd0 # xgetbv 237 and \$0xe6,%eax # isolate XMM, YMM and ZMM state support 238 cmp \$0xe6,%eax 239 je .Ldone 240 andl \$0x3fdeffff,8(%rdi) # ~(1<<31|1<<30|1<<21|1<<16) 241 # clear AVX512F+BW+VL+IFMA, all of 242 # them are EVEX-encoded, which requires 243 # ZMM state support even if one uses 244 # only XMM and YMM :-( 245 and \$6,%eax # isolate XMM and YMM state support 246 cmp \$6,%eax 247 je .Ldone 248.Lclear_avx: 249 andl \$0xff7fffff,20(%rdi) # ~(1<<23) 250 # clear AVXIFMA, which is VEX-encoded 251 # and requires YMM state support 252 mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11) 253 and %eax,%r9d # clear AVX, FMA and AMD XOP bits 254 mov \$0x3fdeffdf,%eax # ~(1<<31|1<<30|1<<21|1<<16|1<<5) 255 and %eax,8(%rdi) # clear AVX2 and AVX512* bits 256.Ldone: 257 shl \$32,%r9 258 mov %r10d,%eax 259 mov %r8,%rbx # restore %rbx 260.cfi_restore %rbx 261 or %r9,%rax 262 ret 263.cfi_endproc 264.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid 265 266.globl OPENSSL_cleanse 267.type OPENSSL_cleanse,\@abi-omnipotent 268.align 16 269OPENSSL_cleanse: 270.cfi_startproc 271 endbranch 272 xor %rax,%rax 273 cmp \$15,$arg2 274 jae .Lot 275 cmp \$0,$arg2 276 je .Lret 277.Little: 278 mov %al,($arg1) 279 sub \$1,$arg2 280 lea 1($arg1),$arg1 281 jnz .Little 282.Lret: 283 ret 284.align 16 285.Lot: 286 test \$7,$arg1 287 jz .Laligned 288 mov %al,($arg1) 289 lea -1($arg2),$arg2 290 lea 1($arg1),$arg1 291 jmp .Lot 292.Laligned: 293 mov %rax,($arg1) 294 lea -8($arg2),$arg2 295 test \$-8,$arg2 296 lea 8($arg1),$arg1 297 jnz .Laligned 298 cmp \$0,$arg2 299 jne .Little 300 ret 301.cfi_endproc 302.size OPENSSL_cleanse,.-OPENSSL_cleanse 303 304.globl CRYPTO_memcmp 305.type CRYPTO_memcmp,\@abi-omnipotent 306.align 16 307CRYPTO_memcmp: 308.cfi_startproc 309 endbranch 310 xor %rax,%rax 311 xor %r10,%r10 312 cmp \$0,$arg3 313 je .Lno_data 314 cmp \$16,$arg3 315 jne .Loop_cmp 316 mov ($arg1),%r10 317 mov 8($arg1),%r11 318 mov \$1,$arg3 319 xor ($arg2),%r10 320 xor 8($arg2),%r11 321 or %r11,%r10 322 cmovnz $arg3,%rax 323 ret 324 325.align 16 326.Loop_cmp: 327 mov ($arg1),%r10b 328 lea 1($arg1),$arg1 329 xor ($arg2),%r10b 330 lea 1($arg2),$arg2 331 or %r10b,%al 332 dec $arg3 333 jnz .Loop_cmp 334 neg %rax 335 shr \$63,%rax 336.Lno_data: 337 ret 338.cfi_endproc 339.size CRYPTO_memcmp,.-CRYPTO_memcmp 340___ 341 342print<<___ if (!$win64); 343.globl OPENSSL_wipe_cpu 344.type OPENSSL_wipe_cpu,\@abi-omnipotent 345.align 16 346OPENSSL_wipe_cpu: 347.cfi_startproc 348 endbranch 349 pxor %xmm0,%xmm0 350 pxor %xmm1,%xmm1 351 pxor %xmm2,%xmm2 352 pxor %xmm3,%xmm3 353 pxor %xmm4,%xmm4 354 pxor %xmm5,%xmm5 355 pxor %xmm6,%xmm6 356 pxor %xmm7,%xmm7 357 pxor %xmm8,%xmm8 358 pxor %xmm9,%xmm9 359 pxor %xmm10,%xmm10 360 pxor %xmm11,%xmm11 361 pxor %xmm12,%xmm12 362 pxor %xmm13,%xmm13 363 pxor %xmm14,%xmm14 364 pxor %xmm15,%xmm15 365 xorq %rcx,%rcx 366 xorq %rdx,%rdx 367 xorq %rsi,%rsi 368 xorq %rdi,%rdi 369 xorq %r8,%r8 370 xorq %r9,%r9 371 xorq %r10,%r10 372 xorq %r11,%r11 373 leaq 8(%rsp),%rax 374 ret 375.cfi_endproc 376.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu 377___ 378print<<___ if ($win64); 379.globl OPENSSL_wipe_cpu 380.type OPENSSL_wipe_cpu,\@abi-omnipotent 381.align 16 382OPENSSL_wipe_cpu: 383 pxor %xmm0,%xmm0 384 pxor %xmm1,%xmm1 385 pxor %xmm2,%xmm2 386 pxor %xmm3,%xmm3 387 pxor %xmm4,%xmm4 388 pxor %xmm5,%xmm5 389 xorq %rcx,%rcx 390 xorq %rdx,%rdx 391 xorq %r8,%r8 392 xorq %r9,%r9 393 xorq %r10,%r10 394 xorq %r11,%r11 395 leaq 8(%rsp),%rax 396 ret 397.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu 398___ 399{ 400my $out="%r10"; 401my $cnt="%rcx"; 402my $max="%r11"; 403my $lasttick="%r8d"; 404my $lastdiff="%r9d"; 405my $redzone=win64?8:-8; 406 407print<<___; 408.globl OPENSSL_instrument_bus 409.type OPENSSL_instrument_bus,\@abi-omnipotent 410.align 16 411OPENSSL_instrument_bus: 412.cfi_startproc 413 endbranch 414 mov $arg1,$out # tribute to Win64 415 mov $arg2,$cnt 416 mov $arg2,$max 417 418 rdtsc # collect 1st tick 419 mov %eax,$lasttick # lasttick = tick 420 mov \$0,$lastdiff # lastdiff = 0 421 clflush ($out) 422 .byte 0xf0 # lock 423 add $lastdiff,($out) 424 jmp .Loop 425.align 16 426.Loop: rdtsc 427 mov %eax,%edx 428 sub $lasttick,%eax 429 mov %edx,$lasttick 430 mov %eax,$lastdiff 431 clflush ($out) 432 .byte 0xf0 # lock 433 add %eax,($out) 434 lea 4($out),$out 435 sub \$1,$cnt 436 jnz .Loop 437 438 mov $max,%rax 439 ret 440.cfi_endproc 441.size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus 442 443.globl OPENSSL_instrument_bus2 444.type OPENSSL_instrument_bus2,\@abi-omnipotent 445.align 16 446OPENSSL_instrument_bus2: 447.cfi_startproc 448 endbranch 449 mov $arg1,$out # tribute to Win64 450 mov $arg2,$cnt 451 mov $arg3,$max 452 mov $cnt,$redzone(%rsp) 453 454 rdtsc # collect 1st tick 455 mov %eax,$lasttick # lasttick = tick 456 mov \$0,$lastdiff # lastdiff = 0 457 458 clflush ($out) 459 .byte 0xf0 # lock 460 add $lastdiff,($out) 461 462 rdtsc # collect 1st diff 463 mov %eax,%edx 464 sub $lasttick,%eax # diff 465 mov %edx,$lasttick # lasttick = tick 466 mov %eax,$lastdiff # lastdiff = diff 467.Loop2: 468 clflush ($out) 469 .byte 0xf0 # lock 470 add %eax,($out) # accumulate diff 471 472 sub \$1,$max 473 jz .Ldone2 474 475 rdtsc 476 mov %eax,%edx 477 sub $lasttick,%eax # diff 478 mov %edx,$lasttick # lasttick = tick 479 cmp $lastdiff,%eax 480 mov %eax,$lastdiff # lastdiff = diff 481 mov \$0,%edx 482 setne %dl 483 sub %rdx,$cnt # conditional --$cnt 484 lea ($out,%rdx,4),$out # conditional ++$out 485 jnz .Loop2 486 487.Ldone2: 488 mov $redzone(%rsp),%rax 489 sub $cnt,%rax 490 ret 491.cfi_endproc 492.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2 493___ 494} 495 496sub gen_random { 497my $rdop = shift; 498print<<___; 499.globl OPENSSL_ia32_${rdop}_bytes 500.type OPENSSL_ia32_${rdop}_bytes,\@abi-omnipotent 501.align 16 502OPENSSL_ia32_${rdop}_bytes: 503.cfi_startproc 504 endbranch 505 xor %rax, %rax # return value 506 cmp \$0,$arg2 507 je .Ldone_${rdop}_bytes 508 509 mov \$8,%r11 510.Loop_${rdop}_bytes: 511 ${rdop} %r10 512 jc .Lbreak_${rdop}_bytes 513 dec %r11 514 jnz .Loop_${rdop}_bytes 515 jmp .Ldone_${rdop}_bytes 516 517.align 16 518.Lbreak_${rdop}_bytes: 519 cmp \$8,$arg2 520 jb .Ltail_${rdop}_bytes 521 mov %r10,($arg1) 522 lea 8($arg1),$arg1 523 add \$8,%rax 524 sub \$8,$arg2 525 jz .Ldone_${rdop}_bytes 526 mov \$8,%r11 527 jmp .Loop_${rdop}_bytes 528 529.align 16 530.Ltail_${rdop}_bytes: 531 mov %r10b,($arg1) 532 lea 1($arg1),$arg1 533 inc %rax 534 shr \$8,%r10 535 dec $arg2 536 jnz .Ltail_${rdop}_bytes 537 538.Ldone_${rdop}_bytes: 539 xor %r10,%r10 # Clear sensitive data from register 540 ret 541.cfi_endproc 542.size OPENSSL_ia32_${rdop}_bytes,.-OPENSSL_ia32_${rdop}_bytes 543___ 544} 545gen_random("rdrand"); 546gen_random("rdseed"); 547 548close STDOUT or die "error closing STDOUT: $!"; # flush 549