1#! /usr/bin/env perl 2# Copyright 2005-2021 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the Apache License 2.0 (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9 10# $output is the last argument if it looks like a file (it has an extension) 11# $flavour is the first argument if it doesn't look like a file 12$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 13$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 14 15$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); 16 17$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 18( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or 19( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or 20die "can't locate x86_64-xlate.pl"; 21 22open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"" 23 or die "can't call $xlate: $!"; 24*STDOUT=*OUT; 25 26($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order 27 ("%rdi","%rsi","%rdx","%rcx"); # Unix order 28 29print<<___; 30.extern OPENSSL_cpuid_setup 31.hidden OPENSSL_cpuid_setup 32.section .init 33 call OPENSSL_cpuid_setup 34 35.hidden OPENSSL_ia32cap_P 36.comm OPENSSL_ia32cap_P,16,4 37 38.text 39 40.globl OPENSSL_atomic_add 41.type OPENSSL_atomic_add,\@abi-omnipotent 42.align 16 43OPENSSL_atomic_add: 44.cfi_startproc 45 endbranch 46 movl ($arg1),%eax 47.Lspin: leaq ($arg2,%rax),%r8 48 .byte 0xf0 # lock 49 cmpxchgl %r8d,($arg1) 50 jne .Lspin 51 movl %r8d,%eax 52 .byte 0x48,0x98 # cltq/cdqe 53 ret 54.cfi_endproc 55.size OPENSSL_atomic_add,.-OPENSSL_atomic_add 56 57.globl OPENSSL_rdtsc 58.type OPENSSL_rdtsc,\@abi-omnipotent 59.align 16 60OPENSSL_rdtsc: 61.cfi_startproc 62 endbranch 63 rdtsc 64 shl \$32,%rdx 65 or %rdx,%rax 66 ret 67.cfi_endproc 68.size OPENSSL_rdtsc,.-OPENSSL_rdtsc 69 70.globl OPENSSL_ia32_cpuid 71.type OPENSSL_ia32_cpuid,\@function,1 72.align 16 73OPENSSL_ia32_cpuid: 74.cfi_startproc 75 endbranch 76 mov %rbx,%r8 # save %rbx 77.cfi_register %rbx,%r8 78 79 xor %eax,%eax 80 mov %rax,8(%rdi) # clear extended feature flags 81 cpuid 82 mov %eax,%r11d # max value for standard query level 83 84 xor %eax,%eax 85 cmp \$0x756e6547,%ebx # "Genu" 86 setne %al 87 mov %eax,%r9d 88 cmp \$0x49656e69,%edx # "ineI" 89 setne %al 90 or %eax,%r9d 91 cmp \$0x6c65746e,%ecx # "ntel" 92 setne %al 93 or %eax,%r9d # 0 indicates Intel CPU 94 jz .Lintel 95 96 cmp \$0x68747541,%ebx # "Auth" 97 setne %al 98 mov %eax,%r10d 99 cmp \$0x69746E65,%edx # "enti" 100 setne %al 101 or %eax,%r10d 102 cmp \$0x444D4163,%ecx # "cAMD" 103 setne %al 104 or %eax,%r10d # 0 indicates AMD CPU 105 jnz .Lintel 106 107 # AMD specific 108 mov \$0x80000000,%eax 109 cpuid 110 cmp \$0x80000001,%eax 111 jb .Lintel 112 mov %eax,%r10d 113 mov \$0x80000001,%eax 114 cpuid 115 or %ecx,%r9d 116 and \$0x00000801,%r9d # isolate AMD XOP bit, 1<<11 117 118 cmp \$0x80000008,%r10d 119 jb .Lintel 120 121 mov \$0x80000008,%eax 122 cpuid 123 movzb %cl,%r10 # number of cores - 1 124 inc %r10 # number of cores 125 126 mov \$1,%eax 127 cpuid 128 bt \$28,%edx # test hyper-threading bit 129 jnc .Lgeneric 130 shr \$16,%ebx # number of logical processors 131 cmp %r10b,%bl 132 ja .Lgeneric 133 and \$0xefffffff,%edx # ~(1<<28) 134 jmp .Lgeneric 135 136.Lintel: 137 cmp \$4,%r11d 138 mov \$-1,%r10d 139 jb .Lnocacheinfo 140 141 mov \$4,%eax 142 mov \$0,%ecx # query L1D 143 cpuid 144 mov %eax,%r10d 145 shr \$14,%r10d 146 and \$0xfff,%r10d # number of cores -1 per L1D 147 148.Lnocacheinfo: 149 mov \$1,%eax 150 cpuid 151 movd %eax,%xmm0 # put aside processor id 152 and \$0xbfefffff,%edx # force reserved bits to 0 153 cmp \$0,%r9d 154 jne .Lnotintel 155 or \$0x40000000,%edx # set reserved bit#30 on Intel CPUs 156 and \$15,%ah 157 cmp \$15,%ah # examine Family ID 158 jne .LnotP4 159 or \$0x00100000,%edx # set reserved bit#20 to engage RC4_CHAR 160.LnotP4: 161 cmp \$6,%ah 162 jne .Lnotintel 163 and \$0x0fff0ff0,%eax 164 cmp \$0x00050670,%eax # Knights Landing 165 je .Lknights 166 cmp \$0x00080650,%eax # Knights Mill (according to sde) 167 jne .Lnotintel 168.Lknights: 169 and \$0xfbffffff,%ecx # clear XSAVE flag to mimic Silvermont 170 171.Lnotintel: 172 bt \$28,%edx # test hyper-threading bit 173 jnc .Lgeneric 174 and \$0xefffffff,%edx # ~(1<<28) 175 cmp \$0,%r10d 176 je .Lgeneric 177 178 or \$0x10000000,%edx # 1<<28 179 shr \$16,%ebx 180 cmp \$1,%bl # see if cache is shared 181 ja .Lgeneric 182 and \$0xefffffff,%edx # ~(1<<28) 183.Lgeneric: 184 and \$0x00000800,%r9d # isolate AMD XOP flag 185 and \$0xfffff7ff,%ecx 186 or %ecx,%r9d # merge AMD XOP flag 187 188 mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx 189 190 cmp \$7,%r11d 191 jb .Lno_extended_info 192 mov \$7,%eax 193 xor %ecx,%ecx 194 cpuid 195 bt \$26,%r9d # check XSAVE bit, cleared on Knights 196 jc .Lnotknights 197 and \$0xfff7ffff,%ebx # clear ADCX/ADOX flag 198.Lnotknights: 199 movd %xmm0,%eax # restore processor id 200 and \$0x0fff0ff0,%eax 201 cmp \$0x00050650,%eax # Skylake-X 202 jne .Lnotskylakex 203 and \$0xfffeffff,%ebx # ~(1<<16) 204 # suppress AVX512F flag on Skylake-X 205.Lnotskylakex: 206 mov %ebx,8(%rdi) # save extended feature flags 207 mov %ecx,12(%rdi) 208.Lno_extended_info: 209 210 bt \$27,%r9d # check OSXSAVE bit 211 jnc .Lclear_avx 212 xor %ecx,%ecx # XCR0 213 .byte 0x0f,0x01,0xd0 # xgetbv 214 and \$0xe6,%eax # isolate XMM, YMM and ZMM state support 215 cmp \$0xe6,%eax 216 je .Ldone 217 andl \$0x3fdeffff,8(%rdi) # ~(1<<31|1<<30|1<<21|1<<16) 218 # clear AVX512F+BW+VL+IFMA, all of 219 # them are EVEX-encoded, which requires 220 # ZMM state support even if one uses 221 # only XMM and YMM :-( 222 and \$6,%eax # isolate XMM and YMM state support 223 cmp \$6,%eax 224 je .Ldone 225.Lclear_avx: 226 mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11) 227 and %eax,%r9d # clear AVX, FMA and AMD XOP bits 228 mov \$0x3fdeffdf,%eax # ~(1<<31|1<<30|1<<21|1<<16|1<<5) 229 and %eax,8(%rdi) # clear AVX2 and AVX512* bits 230.Ldone: 231 shl \$32,%r9 232 mov %r10d,%eax 233 mov %r8,%rbx # restore %rbx 234.cfi_restore %rbx 235 or %r9,%rax 236 ret 237.cfi_endproc 238.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid 239 240.globl OPENSSL_cleanse 241.type OPENSSL_cleanse,\@abi-omnipotent 242.align 16 243OPENSSL_cleanse: 244.cfi_startproc 245 endbranch 246 xor %rax,%rax 247 cmp \$15,$arg2 248 jae .Lot 249 cmp \$0,$arg2 250 je .Lret 251.Little: 252 mov %al,($arg1) 253 sub \$1,$arg2 254 lea 1($arg1),$arg1 255 jnz .Little 256.Lret: 257 ret 258.align 16 259.Lot: 260 test \$7,$arg1 261 jz .Laligned 262 mov %al,($arg1) 263 lea -1($arg2),$arg2 264 lea 1($arg1),$arg1 265 jmp .Lot 266.Laligned: 267 mov %rax,($arg1) 268 lea -8($arg2),$arg2 269 test \$-8,$arg2 270 lea 8($arg1),$arg1 271 jnz .Laligned 272 cmp \$0,$arg2 273 jne .Little 274 ret 275.cfi_endproc 276.size OPENSSL_cleanse,.-OPENSSL_cleanse 277 278.globl CRYPTO_memcmp 279.type CRYPTO_memcmp,\@abi-omnipotent 280.align 16 281CRYPTO_memcmp: 282.cfi_startproc 283 endbranch 284 xor %rax,%rax 285 xor %r10,%r10 286 cmp \$0,$arg3 287 je .Lno_data 288 cmp \$16,$arg3 289 jne .Loop_cmp 290 mov ($arg1),%r10 291 mov 8($arg1),%r11 292 mov \$1,$arg3 293 xor ($arg2),%r10 294 xor 8($arg2),%r11 295 or %r11,%r10 296 cmovnz $arg3,%rax 297 ret 298 299.align 16 300.Loop_cmp: 301 mov ($arg1),%r10b 302 lea 1($arg1),$arg1 303 xor ($arg2),%r10b 304 lea 1($arg2),$arg2 305 or %r10b,%al 306 dec $arg3 307 jnz .Loop_cmp 308 neg %rax 309 shr \$63,%rax 310.Lno_data: 311 ret 312.cfi_endproc 313.size CRYPTO_memcmp,.-CRYPTO_memcmp 314___ 315 316print<<___ if (!$win64); 317.globl OPENSSL_wipe_cpu 318.type OPENSSL_wipe_cpu,\@abi-omnipotent 319.align 16 320OPENSSL_wipe_cpu: 321.cfi_startproc 322 endbranch 323 pxor %xmm0,%xmm0 324 pxor %xmm1,%xmm1 325 pxor %xmm2,%xmm2 326 pxor %xmm3,%xmm3 327 pxor %xmm4,%xmm4 328 pxor %xmm5,%xmm5 329 pxor %xmm6,%xmm6 330 pxor %xmm7,%xmm7 331 pxor %xmm8,%xmm8 332 pxor %xmm9,%xmm9 333 pxor %xmm10,%xmm10 334 pxor %xmm11,%xmm11 335 pxor %xmm12,%xmm12 336 pxor %xmm13,%xmm13 337 pxor %xmm14,%xmm14 338 pxor %xmm15,%xmm15 339 xorq %rcx,%rcx 340 xorq %rdx,%rdx 341 xorq %rsi,%rsi 342 xorq %rdi,%rdi 343 xorq %r8,%r8 344 xorq %r9,%r9 345 xorq %r10,%r10 346 xorq %r11,%r11 347 leaq 8(%rsp),%rax 348 ret 349.cfi_endproc 350.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu 351___ 352print<<___ if ($win64); 353.globl OPENSSL_wipe_cpu 354.type OPENSSL_wipe_cpu,\@abi-omnipotent 355.align 16 356OPENSSL_wipe_cpu: 357 pxor %xmm0,%xmm0 358 pxor %xmm1,%xmm1 359 pxor %xmm2,%xmm2 360 pxor %xmm3,%xmm3 361 pxor %xmm4,%xmm4 362 pxor %xmm5,%xmm5 363 xorq %rcx,%rcx 364 xorq %rdx,%rdx 365 xorq %r8,%r8 366 xorq %r9,%r9 367 xorq %r10,%r10 368 xorq %r11,%r11 369 leaq 8(%rsp),%rax 370 ret 371.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu 372___ 373{ 374my $out="%r10"; 375my $cnt="%rcx"; 376my $max="%r11"; 377my $lasttick="%r8d"; 378my $lastdiff="%r9d"; 379my $redzone=win64?8:-8; 380 381print<<___; 382.globl OPENSSL_instrument_bus 383.type OPENSSL_instrument_bus,\@abi-omnipotent 384.align 16 385OPENSSL_instrument_bus: 386.cfi_startproc 387 endbranch 388 mov $arg1,$out # tribute to Win64 389 mov $arg2,$cnt 390 mov $arg2,$max 391 392 rdtsc # collect 1st tick 393 mov %eax,$lasttick # lasttick = tick 394 mov \$0,$lastdiff # lastdiff = 0 395 clflush ($out) 396 .byte 0xf0 # lock 397 add $lastdiff,($out) 398 jmp .Loop 399.align 16 400.Loop: rdtsc 401 mov %eax,%edx 402 sub $lasttick,%eax 403 mov %edx,$lasttick 404 mov %eax,$lastdiff 405 clflush ($out) 406 .byte 0xf0 # lock 407 add %eax,($out) 408 lea 4($out),$out 409 sub \$1,$cnt 410 jnz .Loop 411 412 mov $max,%rax 413 ret 414.cfi_endproc 415.size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus 416 417.globl OPENSSL_instrument_bus2 418.type OPENSSL_instrument_bus2,\@abi-omnipotent 419.align 16 420OPENSSL_instrument_bus2: 421.cfi_startproc 422 endbranch 423 mov $arg1,$out # tribute to Win64 424 mov $arg2,$cnt 425 mov $arg3,$max 426 mov $cnt,$redzone(%rsp) 427 428 rdtsc # collect 1st tick 429 mov %eax,$lasttick # lasttick = tick 430 mov \$0,$lastdiff # lastdiff = 0 431 432 clflush ($out) 433 .byte 0xf0 # lock 434 add $lastdiff,($out) 435 436 rdtsc # collect 1st diff 437 mov %eax,%edx 438 sub $lasttick,%eax # diff 439 mov %edx,$lasttick # lasttick = tick 440 mov %eax,$lastdiff # lastdiff = diff 441.Loop2: 442 clflush ($out) 443 .byte 0xf0 # lock 444 add %eax,($out) # accumulate diff 445 446 sub \$1,$max 447 jz .Ldone2 448 449 rdtsc 450 mov %eax,%edx 451 sub $lasttick,%eax # diff 452 mov %edx,$lasttick # lasttick = tick 453 cmp $lastdiff,%eax 454 mov %eax,$lastdiff # lastdiff = diff 455 mov \$0,%edx 456 setne %dl 457 sub %rdx,$cnt # conditional --$cnt 458 lea ($out,%rdx,4),$out # conditional ++$out 459 jnz .Loop2 460 461.Ldone2: 462 mov $redzone(%rsp),%rax 463 sub $cnt,%rax 464 ret 465.cfi_endproc 466.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2 467___ 468} 469 470sub gen_random { 471my $rdop = shift; 472print<<___; 473.globl OPENSSL_ia32_${rdop}_bytes 474.type OPENSSL_ia32_${rdop}_bytes,\@abi-omnipotent 475.align 16 476OPENSSL_ia32_${rdop}_bytes: 477.cfi_startproc 478 endbranch 479 xor %rax, %rax # return value 480 cmp \$0,$arg2 481 je .Ldone_${rdop}_bytes 482 483 mov \$8,%r11 484.Loop_${rdop}_bytes: 485 ${rdop} %r10 486 jc .Lbreak_${rdop}_bytes 487 dec %r11 488 jnz .Loop_${rdop}_bytes 489 jmp .Ldone_${rdop}_bytes 490 491.align 16 492.Lbreak_${rdop}_bytes: 493 cmp \$8,$arg2 494 jb .Ltail_${rdop}_bytes 495 mov %r10,($arg1) 496 lea 8($arg1),$arg1 497 add \$8,%rax 498 sub \$8,$arg2 499 jz .Ldone_${rdop}_bytes 500 mov \$8,%r11 501 jmp .Loop_${rdop}_bytes 502 503.align 16 504.Ltail_${rdop}_bytes: 505 mov %r10b,($arg1) 506 lea 1($arg1),$arg1 507 inc %rax 508 shr \$8,%r10 509 dec $arg2 510 jnz .Ltail_${rdop}_bytes 511 512.Ldone_${rdop}_bytes: 513 xor %r10,%r10 # Clear sensitive data from register 514 ret 515.cfi_endproc 516.size OPENSSL_ia32_${rdop}_bytes,.-OPENSSL_ia32_${rdop}_bytes 517___ 518} 519gen_random("rdrand"); 520gen_random("rdseed"); 521 522close STDOUT or die "error closing STDOUT: $!"; # flush 523