1#! /usr/bin/env perl 2# Copyright 2004-2020 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the Apache License 2.0 (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 10push(@INC, "${dir}perlasm", "perlasm"); 11require "x86asm.pl"; 12 13$output = pop and open STDOUT,">$output"; 14 15&asm_init($ARGV[0]); 16 17for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } 18 19&function_begin("OPENSSL_ia32_cpuid"); 20 &xor ("edx","edx"); 21 &pushf (); 22 &pop ("eax"); 23 &mov ("ecx","eax"); 24 &xor ("eax",1<<21); 25 &push ("eax"); 26 &popf (); 27 &pushf (); 28 &pop ("eax"); 29 &xor ("ecx","eax"); 30 &xor ("eax","eax"); 31 &mov ("esi",&wparam(0)); 32 &mov (&DWP(8,"esi"),"eax"); # clear extended feature flags 33 &bt ("ecx",21); 34 &jnc (&label("nocpuid")); 35 &cpuid (); 36 &mov ("edi","eax"); # max value for standard query level 37 38 &xor ("eax","eax"); 39 &cmp ("ebx",0x756e6547); # "Genu" 40 &setne (&LB("eax")); 41 &mov ("ebp","eax"); 42 &cmp ("edx",0x49656e69); # "ineI" 43 &setne (&LB("eax")); 44 &or ("ebp","eax"); 45 &cmp ("ecx",0x6c65746e); # "ntel" 46 &setne (&LB("eax")); 47 &or ("ebp","eax"); # 0 indicates Intel CPU 48 &jz (&label("intel")); 49 50 &cmp ("ebx",0x68747541); # "Auth" 51 &setne (&LB("eax")); 52 &mov ("esi","eax"); 53 &cmp ("edx",0x69746E65); # "enti" 54 &setne (&LB("eax")); 55 &or ("esi","eax"); 56 &cmp ("ecx",0x444D4163); # "cAMD" 57 &setne (&LB("eax")); 58 &or ("esi","eax"); # 0 indicates AMD CPU 59 &jnz (&label("intel")); 60 61 # AMD specific 62 &mov ("eax",0x80000000); 63 &cpuid (); 64 &cmp ("eax",0x80000001); 65 &jb (&label("intel")); 66 &mov ("esi","eax"); 67 &mov ("eax",0x80000001); 68 &cpuid (); 69 &or ("ebp","ecx"); 70 &and ("ebp",1<<11|1); # isolate XOP bit 71 &cmp ("esi",0x80000008); 72 &jb (&label("intel")); 73 74 &mov ("eax",0x80000008); 75 &cpuid (); 76 &movz ("esi",&LB("ecx")); # number of cores - 1 77 &inc ("esi"); # number of cores 78 79 &mov ("eax",1); 80 &xor ("ecx","ecx"); 81 &cpuid (); 82 &bt ("edx",28); 83 &jnc (&label("generic")); 84 &shr ("ebx",16); 85 &and ("ebx",0xff); 86 &cmp ("ebx","esi"); 87 &ja (&label("generic")); 88 &and ("edx",0xefffffff); # clear hyper-threading bit 89 &jmp (&label("generic")); 90 91&set_label("intel"); 92 &cmp ("edi",4); 93 &mov ("esi",-1); 94 &jb (&label("nocacheinfo")); 95 96 &mov ("eax",4); 97 &mov ("ecx",0); # query L1D 98 &cpuid (); 99 &mov ("esi","eax"); 100 &shr ("esi",14); 101 &and ("esi",0xfff); # number of cores -1 per L1D 102 103&set_label("nocacheinfo"); 104 &mov ("eax",1); 105 &xor ("ecx","ecx"); 106 &cpuid (); 107 &and ("edx",0xbfefffff); # force reserved bits #20, #30 to 0 108 &cmp ("ebp",0); 109 &jne (&label("notintel")); 110 &or ("edx",1<<30); # set reserved bit#30 on Intel CPUs 111 &and (&HB("eax"),15); # family ID 112 &cmp (&HB("eax"),15); # P4? 113 &jne (&label("notintel")); 114 &or ("edx",1<<20); # set reserved bit#20 to engage RC4_CHAR 115&set_label("notintel"); 116 &bt ("edx",28); # test hyper-threading bit 117 &jnc (&label("generic")); 118 &and ("edx",0xefffffff); 119 &cmp ("esi",0); 120 &je (&label("generic")); 121 122 &or ("edx",0x10000000); 123 &shr ("ebx",16); 124 &cmp (&LB("ebx"),1); 125 &ja (&label("generic")); 126 &and ("edx",0xefffffff); # clear hyper-threading bit if not 127 128&set_label("generic"); 129 &and ("ebp",1<<11); # isolate AMD XOP flag 130 &and ("ecx",0xfffff7ff); # force 11th bit to 0 131 &mov ("esi","edx"); # %ebp:%esi is copy of %ecx:%edx 132 &or ("ebp","ecx"); # merge AMD XOP flag 133 134 &cmp ("edi",7); 135 &mov ("edi",&wparam(0)); 136 &jb (&label("no_extended_info")); 137 &mov ("eax",7); 138 &xor ("ecx","ecx"); 139 &cpuid (); 140 &mov (&DWP(8,"edi"),"ebx"); # save extended feature flag 141&set_label("no_extended_info"); 142 143 &bt ("ebp",27); # check OSXSAVE bit 144 &jnc (&label("clear_avx")); 145 &xor ("ecx","ecx"); 146 &data_byte(0x0f,0x01,0xd0); # xgetbv 147 &and ("eax",6); 148 &cmp ("eax",6); 149 &je (&label("done")); 150 &cmp ("eax",2); 151 &je (&label("clear_avx")); 152&set_label("clear_xmm"); 153 &and ("ebp",0xfdfffffd); # clear AESNI and PCLMULQDQ bits 154 &and ("esi",0xfeffffff); # clear FXSR 155&set_label("clear_avx"); 156 &and ("ebp",0xefffe7ff); # clear AVX, FMA and AMD XOP bits 157 &and (&DWP(8,"edi"),0xffffffdf); # clear AVX2 158&set_label("done"); 159 &mov ("eax","esi"); 160 &mov ("edx","ebp"); 161&set_label("nocpuid"); 162&function_end("OPENSSL_ia32_cpuid"); 163 164&external_label("OPENSSL_ia32cap_P"); 165 166&function_begin_B("OPENSSL_rdtsc","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); 167 &xor ("eax","eax"); 168 &xor ("edx","edx"); 169 &picmeup("ecx","OPENSSL_ia32cap_P"); 170 &bt (&DWP(0,"ecx"),4); 171 &jnc (&label("notsc")); 172 &rdtsc (); 173&set_label("notsc"); 174 &ret (); 175&function_end_B("OPENSSL_rdtsc"); 176 177# This works in Ring 0 only [read DJGPP+MS-DOS+privileged DPMI host], 178# but it's safe to call it on any [supported] 32-bit platform... 179# Just check for [non-]zero return value... 180&function_begin_B("OPENSSL_instrument_halt","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); 181 &picmeup("ecx","OPENSSL_ia32cap_P"); 182 &bt (&DWP(0,"ecx"),4); 183 &jnc (&label("nohalt")); # no TSC 184 185 &data_word(0x9058900e); # push %cs; pop %eax 186 &and ("eax",3); 187 &jnz (&label("nohalt")); # not enough privileges 188 189 &pushf (); 190 &pop ("eax"); 191 &bt ("eax",9); 192 &jnc (&label("nohalt")); # interrupts are disabled 193 194 &rdtsc (); 195 &push ("edx"); 196 &push ("eax"); 197 &halt (); 198 &rdtsc (); 199 200 &sub ("eax",&DWP(0,"esp")); 201 &sbb ("edx",&DWP(4,"esp")); 202 &add ("esp",8); 203 &ret (); 204 205&set_label("nohalt"); 206 &xor ("eax","eax"); 207 &xor ("edx","edx"); 208 &ret (); 209&function_end_B("OPENSSL_instrument_halt"); 210 211# Essentially there is only one use for this function. Under DJGPP: 212# 213# #include <go32.h> 214# ... 215# i=OPENSSL_far_spin(_dos_ds,0x46c); 216# ... 217# to obtain the number of spins till closest timer interrupt. 218 219&function_begin_B("OPENSSL_far_spin"); 220 &pushf (); 221 &pop ("eax"); 222 &bt ("eax",9); 223 &jnc (&label("nospin")); # interrupts are disabled 224 225 &mov ("eax",&DWP(4,"esp")); 226 &mov ("ecx",&DWP(8,"esp")); 227 &data_word (0x90d88e1e); # push %ds, mov %eax,%ds 228 &xor ("eax","eax"); 229 &mov ("edx",&DWP(0,"ecx")); 230 &jmp (&label("spin")); 231 232 &align (16); 233&set_label("spin"); 234 &inc ("eax"); 235 &cmp ("edx",&DWP(0,"ecx")); 236 &je (&label("spin")); 237 238 &data_word (0x1f909090); # pop %ds 239 &ret (); 240 241&set_label("nospin"); 242 &xor ("eax","eax"); 243 &xor ("edx","edx"); 244 &ret (); 245&function_end_B("OPENSSL_far_spin"); 246 247&function_begin_B("OPENSSL_wipe_cpu","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); 248 &xor ("eax","eax"); 249 &xor ("edx","edx"); 250 &picmeup("ecx","OPENSSL_ia32cap_P"); 251 &mov ("ecx",&DWP(0,"ecx")); 252 &bt (&DWP(0,"ecx"),1); 253 &jnc (&label("no_x87")); 254 if ($sse2) { 255 &and ("ecx",1<<26|1<<24); # check SSE2 and FXSR bits 256 &cmp ("ecx",1<<26|1<<24); 257 &jne (&label("no_sse2")); 258 &pxor ("xmm0","xmm0"); 259 &pxor ("xmm1","xmm1"); 260 &pxor ("xmm2","xmm2"); 261 &pxor ("xmm3","xmm3"); 262 &pxor ("xmm4","xmm4"); 263 &pxor ("xmm5","xmm5"); 264 &pxor ("xmm6","xmm6"); 265 &pxor ("xmm7","xmm7"); 266 &set_label("no_sse2"); 267 } 268 # just a bunch of fldz to zap the fp/mm bank followed by finit... 269 &data_word(0xeed9eed9,0xeed9eed9,0xeed9eed9,0xeed9eed9,0x90e3db9b); 270&set_label("no_x87"); 271 &lea ("eax",&DWP(4,"esp")); 272 &ret (); 273&function_end_B("OPENSSL_wipe_cpu"); 274 275&function_begin_B("OPENSSL_atomic_add"); 276 &mov ("edx",&DWP(4,"esp")); # fetch the pointer, 1st arg 277 &mov ("ecx",&DWP(8,"esp")); # fetch the increment, 2nd arg 278 &push ("ebx"); 279 &nop (); 280 &mov ("eax",&DWP(0,"edx")); 281&set_label("spin"); 282 &lea ("ebx",&DWP(0,"eax","ecx")); 283 &nop (); 284 &data_word(0x1ab10ff0); # lock; cmpxchg %ebx,(%edx) # %eax is involved and is always reloaded 285 &jne (&label("spin")); 286 &mov ("eax","ebx"); # OpenSSL expects the new value 287 &pop ("ebx"); 288 &ret (); 289&function_end_B("OPENSSL_atomic_add"); 290 291&function_begin_B("OPENSSL_cleanse"); 292 &mov ("edx",&wparam(0)); 293 &mov ("ecx",&wparam(1)); 294 &xor ("eax","eax"); 295 &cmp ("ecx",7); 296 &jae (&label("lot")); 297 &cmp ("ecx",0); 298 &je (&label("ret")); 299&set_label("little"); 300 &mov (&BP(0,"edx"),"al"); 301 &sub ("ecx",1); 302 &lea ("edx",&DWP(1,"edx")); 303 &jnz (&label("little")); 304&set_label("ret"); 305 &ret (); 306 307&set_label("lot",16); 308 &test ("edx",3); 309 &jz (&label("aligned")); 310 &mov (&BP(0,"edx"),"al"); 311 &lea ("ecx",&DWP(-1,"ecx")); 312 &lea ("edx",&DWP(1,"edx")); 313 &jmp (&label("lot")); 314&set_label("aligned"); 315 &mov (&DWP(0,"edx"),"eax"); 316 &lea ("ecx",&DWP(-4,"ecx")); 317 &test ("ecx",-4); 318 &lea ("edx",&DWP(4,"edx")); 319 &jnz (&label("aligned")); 320 &cmp ("ecx",0); 321 &jne (&label("little")); 322 &ret (); 323&function_end_B("OPENSSL_cleanse"); 324 325&function_begin_B("CRYPTO_memcmp"); 326 &push ("esi"); 327 &push ("edi"); 328 &mov ("esi",&wparam(0)); 329 &mov ("edi",&wparam(1)); 330 &mov ("ecx",&wparam(2)); 331 &xor ("eax","eax"); 332 &xor ("edx","edx"); 333 &cmp ("ecx",0); 334 &je (&label("no_data")); 335&set_label("loop"); 336 &mov ("dl",&BP(0,"esi")); 337 &lea ("esi",&DWP(1,"esi")); 338 &xor ("dl",&BP(0,"edi")); 339 &lea ("edi",&DWP(1,"edi")); 340 &or ("al","dl"); 341 &dec ("ecx"); 342 &jnz (&label("loop")); 343 &neg ("eax"); 344 &shr ("eax",31); 345&set_label("no_data"); 346 &pop ("edi"); 347 &pop ("esi"); 348 &ret (); 349&function_end_B("CRYPTO_memcmp"); 350{ 351my $lasttick = "esi"; 352my $lastdiff = "ebx"; 353my $out = "edi"; 354my $cnt = "ecx"; 355my $max = "ebp"; 356 357&function_begin("OPENSSL_instrument_bus"); 358 &mov ("eax",0); 359 if ($sse2) { 360 &picmeup("edx","OPENSSL_ia32cap_P"); 361 &bt (&DWP(0,"edx"),4); 362 &jnc (&label("nogo")); # no TSC 363 &bt (&DWP(0,"edx"),19); 364 &jnc (&label("nogo")); # no CLFLUSH 365 366 &mov ($out,&wparam(0)); # load arguments 367 &mov ($cnt,&wparam(1)); 368 369 # collect 1st tick 370 &rdtsc (); 371 &mov ($lasttick,"eax"); # lasttick = tick 372 &mov ($lastdiff,0); # lastdiff = 0 373 &clflush(&DWP(0,$out)); 374 &data_byte(0xf0); # lock 375 &add (&DWP(0,$out),$lastdiff); 376 &jmp (&label("loop")); 377 378&set_label("loop",16); 379 &rdtsc (); 380 &mov ("edx","eax"); # put aside tick (yes, I neglect edx) 381 &sub ("eax",$lasttick); # diff 382 &mov ($lasttick,"edx"); # lasttick = tick 383 &mov ($lastdiff,"eax"); # lastdiff = diff 384 &clflush(&DWP(0,$out)); 385 &data_byte(0xf0); # lock 386 &add (&DWP(0,$out),"eax"); # accumulate diff 387 &lea ($out,&DWP(4,$out)); # ++$out 388 &sub ($cnt,1); # --$cnt 389 &jnz (&label("loop")); 390 391 &mov ("eax",&wparam(1)); 392&set_label("nogo"); 393 } 394&function_end("OPENSSL_instrument_bus"); 395 396&function_begin("OPENSSL_instrument_bus2"); 397 &mov ("eax",0); 398 if ($sse2) { 399 &picmeup("edx","OPENSSL_ia32cap_P"); 400 &bt (&DWP(0,"edx"),4); 401 &jnc (&label("nogo")); # no TSC 402 &bt (&DWP(0,"edx"),19); 403 &jnc (&label("nogo")); # no CLFLUSH 404 405 &mov ($out,&wparam(0)); # load arguments 406 &mov ($cnt,&wparam(1)); 407 &mov ($max,&wparam(2)); 408 409 &rdtsc (); # collect 1st tick 410 &mov ($lasttick,"eax"); # lasttick = tick 411 &mov ($lastdiff,0); # lastdiff = 0 412 413 &clflush(&DWP(0,$out)); 414 &data_byte(0xf0); # lock 415 &add (&DWP(0,$out),$lastdiff); 416 417 &rdtsc (); # collect 1st diff 418 &mov ("edx","eax"); # put aside tick (yes, I neglect edx) 419 &sub ("eax",$lasttick); # diff 420 &mov ($lasttick,"edx"); # lasttick = tick 421 &mov ($lastdiff,"eax"); # lastdiff = diff 422 &jmp (&label("loop2")); 423 424&set_label("loop2",16); 425 &clflush(&DWP(0,$out)); 426 &data_byte(0xf0); # lock 427 &add (&DWP(0,$out),"eax"); # accumulate diff 428 429 &sub ($max,1); 430 &jz (&label("done2")); 431 432 &rdtsc (); 433 &mov ("edx","eax"); # put aside tick (yes, I neglect edx) 434 &sub ("eax",$lasttick); # diff 435 &mov ($lasttick,"edx"); # lasttick = tick 436 &cmp ("eax",$lastdiff); 437 &mov ($lastdiff,"eax"); # lastdiff = diff 438 &mov ("edx",0); 439 &setne ("dl"); 440 &sub ($cnt,"edx"); # conditional --$cnt 441 &lea ($out,&DWP(0,$out,"edx",4)); # conditional ++$out 442 &jnz (&label("loop2")); 443 444&set_label("done2"); 445 &mov ("eax",&wparam(1)); 446 &sub ("eax",$cnt); 447&set_label("nogo"); 448 } 449&function_end("OPENSSL_instrument_bus2"); 450} 451 452sub gen_random { 453my $rdop = shift; 454&function_begin_B("OPENSSL_ia32_${rdop}_bytes"); 455 &push ("edi"); 456 &push ("ebx"); 457 &xor ("eax","eax"); # return value 458 &mov ("edi",&wparam(0)); 459 &mov ("ebx",&wparam(1)); 460 461 &cmp ("ebx",0); 462 &je (&label("done")); 463 464 &mov ("ecx",8); 465&set_label("loop"); 466 &${rdop}("edx"); 467 &jc (&label("break")); 468 &loop (&label("loop")); 469 &jmp (&label("done")); 470 471&set_label("break",16); 472 &cmp ("ebx",4); 473 &jb (&label("tail")); 474 &mov (&DWP(0,"edi"),"edx"); 475 &lea ("edi",&DWP(4,"edi")); 476 &add ("eax",4); 477 &sub ("ebx",4); 478 &jz (&label("done")); 479 &mov ("ecx",8); 480 &jmp (&label("loop")); 481 482&set_label("tail",16); 483 &mov (&BP(0,"edi"),"dl"); 484 &lea ("edi",&DWP(1,"edi")); 485 &inc ("eax"); 486 &shr ("edx",8); 487 &dec ("ebx"); 488 &jnz (&label("tail")); 489 490&set_label("done"); 491 &xor ("edx","edx"); # Clear random value from registers 492 &pop ("ebx"); 493 &pop ("edi"); 494 &ret (); 495&function_end_B("OPENSSL_ia32_${rdop}_bytes"); 496} 497&gen_random("rdrand"); 498&gen_random("rdseed"); 499 500&initseg("OPENSSL_cpuid_setup"); 501 502&hidden("OPENSSL_cpuid_setup"); 503&hidden("OPENSSL_ia32cap_P"); 504 505&asm_finish(); 506 507close STDOUT or die "error closing STDOUT: $!"; 508