1#! /usr/bin/env perl 2# Copyright 2004-2020 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the Apache License 2.0 (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 10push(@INC, "${dir}perlasm", "perlasm"); 11require "x86asm.pl"; 12 13$output = pop and open STDOUT,">$output"; 14 15&asm_init($ARGV[0]); 16 17for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } 18 19&function_begin("OPENSSL_ia32_cpuid"); 20 &xor ("edx","edx"); 21 &pushf (); 22 &pop ("eax"); 23 &mov ("ecx","eax"); 24 &xor ("eax",1<<21); 25 &push ("eax"); 26 &popf (); 27 &pushf (); 28 &pop ("eax"); 29 &xor ("ecx","eax"); 30 &xor ("eax","eax"); 31 &mov ("esi",&wparam(0)); 32 &mov (&DWP(8,"esi"),"eax"); # clear extended feature flags 33 &bt ("ecx",21); 34 &jnc (&label("nocpuid")); 35 &cpuid (); 36 &mov ("edi","eax"); # max value for standard query level 37 38 &xor ("eax","eax"); 39 &cmp ("ebx",0x756e6547); # "Genu" 40 &setne (&LB("eax")); 41 &mov ("ebp","eax"); 42 &cmp ("edx",0x49656e69); # "ineI" 43 &setne (&LB("eax")); 44 &or ("ebp","eax"); 45 &cmp ("ecx",0x6c65746e); # "ntel" 46 &setne (&LB("eax")); 47 &or ("ebp","eax"); # 0 indicates Intel CPU 48 &jz (&label("intel")); 49 50 &cmp ("ebx",0x68747541); # "Auth" 51 &setne (&LB("eax")); 52 &mov ("esi","eax"); 53 &cmp ("edx",0x69746E65); # "enti" 54 &setne (&LB("eax")); 55 &or ("esi","eax"); 56 &cmp ("ecx",0x444D4163); # "cAMD" 57 &setne (&LB("eax")); 58 &or ("esi","eax"); # 0 indicates AMD CPU 59 &jnz (&label("intel")); 60 61 # AMD specific 62 &mov ("eax",0x80000000); 63 &cpuid (); 64 &cmp ("eax",0x80000001); 65 &jb (&label("intel")); 66 &mov ("esi","eax"); 67 &mov ("eax",0x80000001); 68 &cpuid (); 69 &or ("ebp","ecx"); 70 &and ("ebp",1<<11|1); # isolate XOP bit 71 &cmp ("esi",0x80000008); 72 &jb (&label("intel")); 73 74 &mov ("eax",0x80000008); 75 &cpuid (); 76 &movz ("esi",&LB("ecx")); # number of cores - 1 77 &inc ("esi"); # number of cores 78 79 &mov ("eax",1); 80 &xor ("ecx","ecx"); 81 &cpuid (); 82 &bt ("edx",28); 83 &jnc (&label("generic")); 84 &shr ("ebx",16); 85 &and ("ebx",0xff); 86 &cmp ("ebx","esi"); 87 &ja (&label("generic")); 88 &and ("edx",0xefffffff); # clear hyper-threading bit 89 &jmp (&label("generic")); 90 91&set_label("intel"); 92 &cmp ("edi",4); 93 &mov ("esi",-1); 94 &jb (&label("nocacheinfo")); 95 96 &mov ("eax",4); 97 &mov ("ecx",0); # query L1D 98 &cpuid (); 99 &mov ("esi","eax"); 100 &shr ("esi",14); 101 &and ("esi",0xfff); # number of cores -1 per L1D 102 103&set_label("nocacheinfo"); 104 &mov ("eax",1); 105 &xor ("ecx","ecx"); 106 &cpuid (); 107 &and ("edx",0xbfefffff); # force reserved bits #20, #30 to 0 108 &cmp ("ebp",0); 109 &jne (&label("notintel")); 110 &or ("edx",1<<30); # set reserved bit#30 on Intel CPUs 111 &and (&HB("eax"),15); # family ID 112 &cmp (&HB("eax"),15); # P4? 113 &jne (&label("notintel")); 114 &or ("edx",1<<20); # set reserved bit#20 to engage RC4_CHAR 115&set_label("notintel"); 116 &bt ("edx",28); # test hyper-threading bit 117 &jnc (&label("generic")); 118 &and ("edx",0xefffffff); 119 &cmp ("esi",0); 120 &je (&label("generic")); 121 122 &or ("edx",0x10000000); 123 &shr ("ebx",16); 124 &cmp (&LB("ebx"),1); 125 &ja (&label("generic")); 126 &and ("edx",0xefffffff); # clear hyper-threading bit if not 127 128&set_label("generic"); 129 &and ("ebp",1<<11); # isolate AMD XOP flag 130 &and ("ecx",0xfffff7ff); # force 11th bit to 0 131 &mov ("esi","edx"); # %ebp:%esi is copy of %ecx:%edx 132 &or ("ebp","ecx"); # merge AMD XOP flag 133 134 &cmp ("edi",7); 135 &mov ("edi",&wparam(0)); 136 &jb (&label("no_extended_info")); 137 &mov ("eax",7); 138 &xor ("ecx","ecx"); 139 &cpuid (); 140 &mov (&DWP(8,"edi"),"ebx"); # save cpuid(EAX=0x7, ECX=0x0).EBX to OPENSSL_ia32cap_P[2] 141 &mov (&DWP(12,"edi"),"ecx"); # save cpuid(EAX=0x7, ECX=0x0).ECX to OPENSSL_ia32cap_P[3] 142 &mov (&DWP(16,"edi"),"edx"); # save cpuid(EAX=0x7, ECX=0x0).EDX to OPENSSL_ia32cap_P[4] 143 &cmp ("eax",1); # Do we have cpuid(EAX=0x7, ECX=0x1)? 144 &jb (&label("no_extended_info")); 145 &mov ("eax",7); 146 &mov ("ecx",1); 147 &cpuid (); # cpuid(EAX=0x7, ECX=0x1) 148 &mov (&DWP(20,"edi"),"eax"); # save cpuid(EAX=0x7, ECX=0x1).EAX to OPENSSL_ia32cap_P[5] 149 &mov (&DWP(24,"edi"),"edx"); # save cpuid(EAX=0x7, ECX=0x1).EDX to OPENSSL_ia32cap_P[6] 150 &mov (&DWP(28,"edi"),"ebx"); # save cpuid(EAX=0x7, ECX=0x1).EBX to OPENSSL_ia32cap_P[7] 151 &mov (&DWP(32,"edi"),"ecx"); # save cpuid(EAX=0x7, ECX=0x1).ECX to OPENSSL_ia32cap_P[8] 152 153 &and ("edx",0x80000); # Mask cpuid(EAX=0x7, ECX=0x1).EDX bit 19 to detect AVX10 support 154 &cmp ("edx",0x0); 155 &je (&label("no_extended_info")); 156 157 &mov ("eax",0x24); # Have AVX10 Support, query for details 158 &mov ("ecx",0x0); 159 &cpuid (); # cpuid(EAX=0x24, ECX=0x0) AVX10 Leaf 160 &mov (&DWP(36,"edi"),"ebx"); # save cpuid(EAX=0x24, ECX=0x0).EBX to OPENSSL_ia32cap_P[9] 161 162&set_label("no_extended_info"); 163 164 &bt ("ebp",27); # check OSXSAVE bit 165 &jnc (&label("clear_avx")); 166 &xor ("ecx","ecx"); 167 &data_byte(0x0f,0x01,0xd0); # xgetbv 168 &and ("eax",6); 169 &cmp ("eax",6); 170 &je (&label("done")); 171 &cmp ("eax",2); 172 &je (&label("clear_avx")); 173&set_label("clear_xmm"); 174 &and ("ebp",0xfdfffffd); # clear AESNI and PCLMULQDQ bits 175 &and ("esi",0xfeffffff); # clear FXSR 176&set_label("clear_avx"); 177 &and ("ebp",0xefffe7ff); # clear AVX, FMA and AMD XOP bits 178 &and (&DWP(20,"edi"),0xff7fffff); # ~(1<<23) clear AVXIFMA, 179 # which is VEX-encoded 180 # and requires YMM state support 181 &and (&DWP(8,"edi"),0xffffffdf); # clear AVX2 182&set_label("done"); 183 &mov ("eax","esi"); 184 &mov ("edx","ebp"); 185&set_label("nocpuid"); 186&function_end("OPENSSL_ia32_cpuid"); 187 188&external_label("OPENSSL_ia32cap_P"); 189 190&function_begin_B("OPENSSL_rdtsc","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); 191 &xor ("eax","eax"); 192 &xor ("edx","edx"); 193 &picmeup("ecx","OPENSSL_ia32cap_P"); 194 &bt (&DWP(0,"ecx"),4); 195 &jnc (&label("notsc")); 196 &rdtsc (); 197&set_label("notsc"); 198 &ret (); 199&function_end_B("OPENSSL_rdtsc"); 200 201# This works in Ring 0 only [read DJGPP+MS-DOS+privileged DPMI host], 202# but it's safe to call it on any [supported] 32-bit platform... 203# Just check for [non-]zero return value... 204&function_begin_B("OPENSSL_instrument_halt","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); 205 &picmeup("ecx","OPENSSL_ia32cap_P"); 206 &bt (&DWP(0,"ecx"),4); 207 &jnc (&label("nohalt")); # no TSC 208 209 &data_word(0x9058900e); # push %cs; pop %eax 210 &and ("eax",3); 211 &jnz (&label("nohalt")); # not enough privileges 212 213 &pushf (); 214 &pop ("eax"); 215 &bt ("eax",9); 216 &jnc (&label("nohalt")); # interrupts are disabled 217 218 &rdtsc (); 219 &push ("edx"); 220 &push ("eax"); 221 &halt (); 222 &rdtsc (); 223 224 &sub ("eax",&DWP(0,"esp")); 225 &sbb ("edx",&DWP(4,"esp")); 226 &add ("esp",8); 227 &ret (); 228 229&set_label("nohalt"); 230 &xor ("eax","eax"); 231 &xor ("edx","edx"); 232 &ret (); 233&function_end_B("OPENSSL_instrument_halt"); 234 235# Essentially there is only one use for this function. Under DJGPP: 236# 237# #include <go32.h> 238# ... 239# i=OPENSSL_far_spin(_dos_ds,0x46c); 240# ... 241# to obtain the number of spins till closest timer interrupt. 242 243&function_begin_B("OPENSSL_far_spin"); 244 &pushf (); 245 &pop ("eax"); 246 &bt ("eax",9); 247 &jnc (&label("nospin")); # interrupts are disabled 248 249 &mov ("eax",&DWP(4,"esp")); 250 &mov ("ecx",&DWP(8,"esp")); 251 &data_word (0x90d88e1e); # push %ds, mov %eax,%ds 252 &xor ("eax","eax"); 253 &mov ("edx",&DWP(0,"ecx")); 254 &jmp (&label("spin")); 255 256 &align (16); 257&set_label("spin"); 258 &inc ("eax"); 259 &cmp ("edx",&DWP(0,"ecx")); 260 &je (&label("spin")); 261 262 &data_word (0x1f909090); # pop %ds 263 &ret (); 264 265&set_label("nospin"); 266 &xor ("eax","eax"); 267 &xor ("edx","edx"); 268 &ret (); 269&function_end_B("OPENSSL_far_spin"); 270 271&function_begin_B("OPENSSL_wipe_cpu","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); 272 &xor ("eax","eax"); 273 &xor ("edx","edx"); 274 &picmeup("ecx","OPENSSL_ia32cap_P"); 275 &mov ("ecx",&DWP(0,"ecx")); 276 &bt (&DWP(0,"ecx"),1); 277 &jnc (&label("no_x87")); 278 if ($sse2) { 279 &and ("ecx",1<<26|1<<24); # check SSE2 and FXSR bits 280 &cmp ("ecx",1<<26|1<<24); 281 &jne (&label("no_sse2")); 282 &pxor ("xmm0","xmm0"); 283 &pxor ("xmm1","xmm1"); 284 &pxor ("xmm2","xmm2"); 285 &pxor ("xmm3","xmm3"); 286 &pxor ("xmm4","xmm4"); 287 &pxor ("xmm5","xmm5"); 288 &pxor ("xmm6","xmm6"); 289 &pxor ("xmm7","xmm7"); 290 &set_label("no_sse2"); 291 } 292 # just a bunch of fldz to zap the fp/mm bank followed by finit... 293 &data_word(0xeed9eed9,0xeed9eed9,0xeed9eed9,0xeed9eed9,0x90e3db9b); 294&set_label("no_x87"); 295 &lea ("eax",&DWP(4,"esp")); 296 &ret (); 297&function_end_B("OPENSSL_wipe_cpu"); 298 299&function_begin_B("OPENSSL_atomic_add"); 300 &mov ("edx",&DWP(4,"esp")); # fetch the pointer, 1st arg 301 &mov ("ecx",&DWP(8,"esp")); # fetch the increment, 2nd arg 302 &push ("ebx"); 303 &nop (); 304 &mov ("eax",&DWP(0,"edx")); 305&set_label("spin"); 306 &lea ("ebx",&DWP(0,"eax","ecx")); 307 &nop (); 308 &data_word(0x1ab10ff0); # lock; cmpxchg %ebx,(%edx) # %eax is involved and is always reloaded 309 &jne (&label("spin")); 310 &mov ("eax","ebx"); # OpenSSL expects the new value 311 &pop ("ebx"); 312 &ret (); 313&function_end_B("OPENSSL_atomic_add"); 314 315&function_begin_B("OPENSSL_cleanse"); 316 &mov ("edx",&wparam(0)); 317 &mov ("ecx",&wparam(1)); 318 &xor ("eax","eax"); 319 &cmp ("ecx",7); 320 &jae (&label("lot")); 321 &cmp ("ecx",0); 322 &je (&label("ret")); 323&set_label("little"); 324 &mov (&BP(0,"edx"),"al"); 325 &sub ("ecx",1); 326 &lea ("edx",&DWP(1,"edx")); 327 &jnz (&label("little")); 328&set_label("ret"); 329 &ret (); 330 331&set_label("lot",16); 332 &test ("edx",3); 333 &jz (&label("aligned")); 334 &mov (&BP(0,"edx"),"al"); 335 &lea ("ecx",&DWP(-1,"ecx")); 336 &lea ("edx",&DWP(1,"edx")); 337 &jmp (&label("lot")); 338&set_label("aligned"); 339 &mov (&DWP(0,"edx"),"eax"); 340 &lea ("ecx",&DWP(-4,"ecx")); 341 &test ("ecx",-4); 342 &lea ("edx",&DWP(4,"edx")); 343 &jnz (&label("aligned")); 344 &cmp ("ecx",0); 345 &jne (&label("little")); 346 &ret (); 347&function_end_B("OPENSSL_cleanse"); 348 349&function_begin_B("CRYPTO_memcmp"); 350 &push ("esi"); 351 &push ("edi"); 352 &mov ("esi",&wparam(0)); 353 &mov ("edi",&wparam(1)); 354 &mov ("ecx",&wparam(2)); 355 &xor ("eax","eax"); 356 &xor ("edx","edx"); 357 &cmp ("ecx",0); 358 &je (&label("no_data")); 359&set_label("loop"); 360 &mov ("dl",&BP(0,"esi")); 361 &lea ("esi",&DWP(1,"esi")); 362 &xor ("dl",&BP(0,"edi")); 363 &lea ("edi",&DWP(1,"edi")); 364 &or ("al","dl"); 365 &dec ("ecx"); 366 &jnz (&label("loop")); 367 &neg ("eax"); 368 &shr ("eax",31); 369&set_label("no_data"); 370 &pop ("edi"); 371 &pop ("esi"); 372 &ret (); 373&function_end_B("CRYPTO_memcmp"); 374{ 375my $lasttick = "esi"; 376my $lastdiff = "ebx"; 377my $out = "edi"; 378my $cnt = "ecx"; 379my $max = "ebp"; 380 381&function_begin("OPENSSL_instrument_bus"); 382 &mov ("eax",0); 383 if ($sse2) { 384 &picmeup("edx","OPENSSL_ia32cap_P"); 385 &bt (&DWP(0,"edx"),4); 386 &jnc (&label("nogo")); # no TSC 387 &bt (&DWP(0,"edx"),19); 388 &jnc (&label("nogo")); # no CLFLUSH 389 390 &mov ($out,&wparam(0)); # load arguments 391 &mov ($cnt,&wparam(1)); 392 393 # collect 1st tick 394 &rdtsc (); 395 &mov ($lasttick,"eax"); # lasttick = tick 396 &mov ($lastdiff,0); # lastdiff = 0 397 &clflush(&DWP(0,$out)); 398 &data_byte(0xf0); # lock 399 &add (&DWP(0,$out),$lastdiff); 400 &jmp (&label("loop")); 401 402&set_label("loop",16); 403 &rdtsc (); 404 &mov ("edx","eax"); # put aside tick (yes, I neglect edx) 405 &sub ("eax",$lasttick); # diff 406 &mov ($lasttick,"edx"); # lasttick = tick 407 &mov ($lastdiff,"eax"); # lastdiff = diff 408 &clflush(&DWP(0,$out)); 409 &data_byte(0xf0); # lock 410 &add (&DWP(0,$out),"eax"); # accumulate diff 411 &lea ($out,&DWP(4,$out)); # ++$out 412 &sub ($cnt,1); # --$cnt 413 &jnz (&label("loop")); 414 415 &mov ("eax",&wparam(1)); 416&set_label("nogo"); 417 } 418&function_end("OPENSSL_instrument_bus"); 419 420&function_begin("OPENSSL_instrument_bus2"); 421 &mov ("eax",0); 422 if ($sse2) { 423 &picmeup("edx","OPENSSL_ia32cap_P"); 424 &bt (&DWP(0,"edx"),4); 425 &jnc (&label("nogo")); # no TSC 426 &bt (&DWP(0,"edx"),19); 427 &jnc (&label("nogo")); # no CLFLUSH 428 429 &mov ($out,&wparam(0)); # load arguments 430 &mov ($cnt,&wparam(1)); 431 &mov ($max,&wparam(2)); 432 433 &rdtsc (); # collect 1st tick 434 &mov ($lasttick,"eax"); # lasttick = tick 435 &mov ($lastdiff,0); # lastdiff = 0 436 437 &clflush(&DWP(0,$out)); 438 &data_byte(0xf0); # lock 439 &add (&DWP(0,$out),$lastdiff); 440 441 &rdtsc (); # collect 1st diff 442 &mov ("edx","eax"); # put aside tick (yes, I neglect edx) 443 &sub ("eax",$lasttick); # diff 444 &mov ($lasttick,"edx"); # lasttick = tick 445 &mov ($lastdiff,"eax"); # lastdiff = diff 446 &jmp (&label("loop2")); 447 448&set_label("loop2",16); 449 &clflush(&DWP(0,$out)); 450 &data_byte(0xf0); # lock 451 &add (&DWP(0,$out),"eax"); # accumulate diff 452 453 &sub ($max,1); 454 &jz (&label("done2")); 455 456 &rdtsc (); 457 &mov ("edx","eax"); # put aside tick (yes, I neglect edx) 458 &sub ("eax",$lasttick); # diff 459 &mov ($lasttick,"edx"); # lasttick = tick 460 &cmp ("eax",$lastdiff); 461 &mov ($lastdiff,"eax"); # lastdiff = diff 462 &mov ("edx",0); 463 &setne ("dl"); 464 &sub ($cnt,"edx"); # conditional --$cnt 465 &lea ($out,&DWP(0,$out,"edx",4)); # conditional ++$out 466 &jnz (&label("loop2")); 467 468&set_label("done2"); 469 &mov ("eax",&wparam(1)); 470 &sub ("eax",$cnt); 471&set_label("nogo"); 472 } 473&function_end("OPENSSL_instrument_bus2"); 474} 475 476sub gen_random { 477my $rdop = shift; 478&function_begin_B("OPENSSL_ia32_${rdop}_bytes"); 479 &push ("edi"); 480 &push ("ebx"); 481 &xor ("eax","eax"); # return value 482 &mov ("edi",&wparam(0)); 483 &mov ("ebx",&wparam(1)); 484 485 &cmp ("ebx",0); 486 &je (&label("done")); 487 488 &mov ("ecx",8); 489&set_label("loop"); 490 &${rdop}("edx"); 491 &jc (&label("break")); 492 &loop (&label("loop")); 493 &jmp (&label("done")); 494 495&set_label("break",16); 496 &cmp ("ebx",4); 497 &jb (&label("tail")); 498 &mov (&DWP(0,"edi"),"edx"); 499 &lea ("edi",&DWP(4,"edi")); 500 &add ("eax",4); 501 &sub ("ebx",4); 502 &jz (&label("done")); 503 &mov ("ecx",8); 504 &jmp (&label("loop")); 505 506&set_label("tail",16); 507 &mov (&BP(0,"edi"),"dl"); 508 &lea ("edi",&DWP(1,"edi")); 509 &inc ("eax"); 510 &shr ("edx",8); 511 &dec ("ebx"); 512 &jnz (&label("tail")); 513 514&set_label("done"); 515 &xor ("edx","edx"); # Clear random value from registers 516 &pop ("ebx"); 517 &pop ("edi"); 518 &ret (); 519&function_end_B("OPENSSL_ia32_${rdop}_bytes"); 520} 521&gen_random("rdrand"); 522&gen_random("rdseed"); 523 524&initseg("OPENSSL_cpuid_setup"); 525 526&hidden("OPENSSL_cpuid_setup"); 527&hidden("OPENSSL_ia32cap_P"); 528 529&asm_finish(); 530 531close STDOUT or die "error closing STDOUT: $!"; 532