1#! /usr/bin/env perl 2# Copyright 1995-2020 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the Apache License 2.0 (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9 10$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 11push(@INC,"${dir}","${dir}../../perlasm"); 12require "x86asm.pl"; 13 14$output = pop and open STDOUT,">$output"; 15 16&asm_init($ARGV[0]); 17 18$sse2=0; 19for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } 20 21&external_label("OPENSSL_ia32cap_P") if ($sse2); 22 23&bn_mul_add_words("bn_mul_add_words"); 24&bn_mul_words("bn_mul_words"); 25&bn_sqr_words("bn_sqr_words"); 26&bn_div_words("bn_div_words"); 27&bn_add_words("bn_add_words"); 28&bn_sub_words("bn_sub_words"); 29&bn_sub_part_words("bn_sub_part_words"); 30 31&asm_finish(); 32 33close STDOUT or die "error closing STDOUT: $!"; 34 35sub bn_mul_add_words 36 { 37 local($name)=@_; 38 39 &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); 40 41 $r="eax"; 42 $a="edx"; 43 $c="ecx"; 44 45 if ($sse2) { 46 &picmeup("eax","OPENSSL_ia32cap_P"); 47 &bt(&DWP(0,"eax"),26); 48 &jnc(&label("maw_non_sse2")); 49 50 &mov($r,&wparam(0)); 51 &mov($a,&wparam(1)); 52 &mov($c,&wparam(2)); 53 &movd("mm0",&wparam(3)); # mm0 = w 54 &pxor("mm1","mm1"); # mm1 = carry_in 55 &jmp(&label("maw_sse2_entry")); 56 57 &set_label("maw_sse2_unrolled",16); 58 &movd("mm3",&DWP(0,$r,"",0)); # mm3 = r[0] 59 &paddq("mm1","mm3"); # mm1 = carry_in + r[0] 60 &movd("mm2",&DWP(0,$a,"",0)); # mm2 = a[0] 61 &pmuludq("mm2","mm0"); # mm2 = w*a[0] 62 &movd("mm4",&DWP(4,$a,"",0)); # mm4 = a[1] 63 &pmuludq("mm4","mm0"); # mm4 = w*a[1] 64 &movd("mm6",&DWP(8,$a,"",0)); # mm6 = a[2] 65 &pmuludq("mm6","mm0"); # mm6 = w*a[2] 66 &movd("mm7",&DWP(12,$a,"",0)); # mm7 = a[3] 67 &pmuludq("mm7","mm0"); # mm7 = w*a[3] 68 &paddq("mm1","mm2"); # mm1 = carry_in + r[0] + w*a[0] 69 &movd("mm3",&DWP(4,$r,"",0)); # mm3 = r[1] 70 &paddq("mm3","mm4"); # mm3 = r[1] + w*a[1] 71 &movd("mm5",&DWP(8,$r,"",0)); # mm5 = r[2] 72 &paddq("mm5","mm6"); # mm5 = r[2] + w*a[2] 73 &movd("mm4",&DWP(12,$r,"",0)); # mm4 = r[3] 74 &paddq("mm7","mm4"); # mm7 = r[3] + w*a[3] 75 &movd(&DWP(0,$r,"",0),"mm1"); 76 &movd("mm2",&DWP(16,$a,"",0)); # mm2 = a[4] 77 &pmuludq("mm2","mm0"); # mm2 = w*a[4] 78 &psrlq("mm1",32); # mm1 = carry0 79 &movd("mm4",&DWP(20,$a,"",0)); # mm4 = a[5] 80 &pmuludq("mm4","mm0"); # mm4 = w*a[5] 81 &paddq("mm1","mm3"); # mm1 = carry0 + r[1] + w*a[1] 82 &movd("mm6",&DWP(24,$a,"",0)); # mm6 = a[6] 83 &pmuludq("mm6","mm0"); # mm6 = w*a[6] 84 &movd(&DWP(4,$r,"",0),"mm1"); 85 &psrlq("mm1",32); # mm1 = carry1 86 &movd("mm3",&DWP(28,$a,"",0)); # mm3 = a[7] 87 &add($a,32); 88 &pmuludq("mm3","mm0"); # mm3 = w*a[7] 89 &paddq("mm1","mm5"); # mm1 = carry1 + r[2] + w*a[2] 90 &movd("mm5",&DWP(16,$r,"",0)); # mm5 = r[4] 91 &paddq("mm2","mm5"); # mm2 = r[4] + w*a[4] 92 &movd(&DWP(8,$r,"",0),"mm1"); 93 &psrlq("mm1",32); # mm1 = carry2 94 &paddq("mm1","mm7"); # mm1 = carry2 + r[3] + w*a[3] 95 &movd("mm5",&DWP(20,$r,"",0)); # mm5 = r[5] 96 &paddq("mm4","mm5"); # mm4 = r[5] + w*a[5] 97 &movd(&DWP(12,$r,"",0),"mm1"); 98 &psrlq("mm1",32); # mm1 = carry3 99 &paddq("mm1","mm2"); # mm1 = carry3 + r[4] + w*a[4] 100 &movd("mm5",&DWP(24,$r,"",0)); # mm5 = r[6] 101 &paddq("mm6","mm5"); # mm6 = r[6] + w*a[6] 102 &movd(&DWP(16,$r,"",0),"mm1"); 103 &psrlq("mm1",32); # mm1 = carry4 104 &paddq("mm1","mm4"); # mm1 = carry4 + r[5] + w*a[5] 105 &movd("mm5",&DWP(28,$r,"",0)); # mm5 = r[7] 106 &paddq("mm3","mm5"); # mm3 = r[7] + w*a[7] 107 &movd(&DWP(20,$r,"",0),"mm1"); 108 &psrlq("mm1",32); # mm1 = carry5 109 &paddq("mm1","mm6"); # mm1 = carry5 + r[6] + w*a[6] 110 &movd(&DWP(24,$r,"",0),"mm1"); 111 &psrlq("mm1",32); # mm1 = carry6 112 &paddq("mm1","mm3"); # mm1 = carry6 + r[7] + w*a[7] 113 &movd(&DWP(28,$r,"",0),"mm1"); 114 &lea($r,&DWP(32,$r)); 115 &psrlq("mm1",32); # mm1 = carry_out 116 117 &sub($c,8); 118 &jz(&label("maw_sse2_exit")); 119 &set_label("maw_sse2_entry"); 120 &test($c,0xfffffff8); 121 &jnz(&label("maw_sse2_unrolled")); 122 123 &set_label("maw_sse2_loop",4); 124 &movd("mm2",&DWP(0,$a)); # mm2 = a[i] 125 &movd("mm3",&DWP(0,$r)); # mm3 = r[i] 126 &pmuludq("mm2","mm0"); # a[i] *= w 127 &lea($a,&DWP(4,$a)); 128 &paddq("mm1","mm3"); # carry += r[i] 129 &paddq("mm1","mm2"); # carry += a[i]*w 130 &movd(&DWP(0,$r),"mm1"); # r[i] = carry_low 131 &sub($c,1); 132 &psrlq("mm1",32); # carry = carry_high 133 &lea($r,&DWP(4,$r)); 134 &jnz(&label("maw_sse2_loop")); 135 &set_label("maw_sse2_exit"); 136 &movd("eax","mm1"); # c = carry_out 137 &emms(); 138 &ret(); 139 140 &set_label("maw_non_sse2",16); 141 } 142 143 # function_begin prologue 144 &push("ebp"); 145 &push("ebx"); 146 &push("esi"); 147 &push("edi"); 148 149 &comment(""); 150 $Low="eax"; 151 $High="edx"; 152 $a="ebx"; 153 $w="ebp"; 154 $r="edi"; 155 $c="esi"; 156 157 &xor($c,$c); # clear carry 158 &mov($r,&wparam(0)); # 159 160 &mov("ecx",&wparam(2)); # 161 &mov($a,&wparam(1)); # 162 163 &and("ecx",0xfffffff8); # num / 8 164 &mov($w,&wparam(3)); # 165 166 &push("ecx"); # Up the stack for a tmp variable 167 168 &jz(&label("maw_finish")); 169 170 &set_label("maw_loop",16); 171 172 for ($i=0; $i<32; $i+=4) 173 { 174 &comment("Round $i"); 175 176 &mov("eax",&DWP($i,$a)); # *a 177 &mul($w); # *a * w 178 &add("eax",$c); # L(t)+= c 179 &adc("edx",0); # H(t)+=carry 180 &add("eax",&DWP($i,$r)); # L(t)+= *r 181 &adc("edx",0); # H(t)+=carry 182 &mov(&DWP($i,$r),"eax"); # *r= L(t); 183 &mov($c,"edx"); # c= H(t); 184 } 185 186 &comment(""); 187 &sub("ecx",8); 188 &lea($a,&DWP(32,$a)); 189 &lea($r,&DWP(32,$r)); 190 &jnz(&label("maw_loop")); 191 192 &set_label("maw_finish",0); 193 &mov("ecx",&wparam(2)); # get num 194 &and("ecx",7); 195 &jnz(&label("maw_finish2")); # helps branch prediction 196 &jmp(&label("maw_end")); 197 198 &set_label("maw_finish2",1); 199 for ($i=0; $i<7; $i++) 200 { 201 &comment("Tail Round $i"); 202 &mov("eax",&DWP($i*4,$a)); # *a 203 &mul($w); # *a * w 204 &add("eax",$c); # L(t)+=c 205 &adc("edx",0); # H(t)+=carry 206 &add("eax",&DWP($i*4,$r)); # L(t)+= *r 207 &adc("edx",0); # H(t)+=carry 208 &dec("ecx") if ($i != 7-1); 209 &mov(&DWP($i*4,$r),"eax"); # *r= L(t); 210 &mov($c,"edx"); # c= H(t); 211 &jz(&label("maw_end")) if ($i != 7-1); 212 } 213 &set_label("maw_end",0); 214 &mov("eax",$c); 215 216 &pop("ecx"); # clear variable from 217 218 &function_end($name); 219 } 220 221sub bn_mul_words 222 { 223 local($name)=@_; 224 225 &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); 226 227 $r="eax"; 228 $a="edx"; 229 $c="ecx"; 230 231 if ($sse2) { 232 &picmeup("eax","OPENSSL_ia32cap_P"); 233 &bt(&DWP(0,"eax"),26); 234 &jnc(&label("mw_non_sse2")); 235 236 &mov($r,&wparam(0)); 237 &mov($a,&wparam(1)); 238 &mov($c,&wparam(2)); 239 &movd("mm0",&wparam(3)); # mm0 = w 240 &pxor("mm1","mm1"); # mm1 = carry = 0 241 242 &set_label("mw_sse2_loop",16); 243 &movd("mm2",&DWP(0,$a)); # mm2 = a[i] 244 &pmuludq("mm2","mm0"); # a[i] *= w 245 &lea($a,&DWP(4,$a)); 246 &paddq("mm1","mm2"); # carry += a[i]*w 247 &movd(&DWP(0,$r),"mm1"); # r[i] = carry_low 248 &sub($c,1); 249 &psrlq("mm1",32); # carry = carry_high 250 &lea($r,&DWP(4,$r)); 251 &jnz(&label("mw_sse2_loop")); 252 253 &movd("eax","mm1"); # return carry 254 &emms(); 255 &ret(); 256 &set_label("mw_non_sse2",16); 257 } 258 259 # function_begin prologue 260 &push("ebp"); 261 &push("ebx"); 262 &push("esi"); 263 &push("edi"); 264 265 &comment(""); 266 $Low="eax"; 267 $High="edx"; 268 $a="ebx"; 269 $w="ecx"; 270 $r="edi"; 271 $c="esi"; 272 $num="ebp"; 273 274 &xor($c,$c); # clear carry 275 &mov($r,&wparam(0)); # 276 &mov($a,&wparam(1)); # 277 &mov($num,&wparam(2)); # 278 &mov($w,&wparam(3)); # 279 280 &and($num,0xfffffff8); # num / 8 281 &jz(&label("mw_finish")); 282 283 &set_label("mw_loop",0); 284 for ($i=0; $i<32; $i+=4) 285 { 286 &comment("Round $i"); 287 288 &mov("eax",&DWP($i,$a,"",0)); # *a 289 &mul($w); # *a * w 290 &add("eax",$c); # L(t)+=c 291 # XXX 292 293 &adc("edx",0); # H(t)+=carry 294 &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t); 295 296 &mov($c,"edx"); # c= H(t); 297 } 298 299 &comment(""); 300 &add($a,32); 301 &add($r,32); 302 &sub($num,8); 303 &jz(&label("mw_finish")); 304 &jmp(&label("mw_loop")); 305 306 &set_label("mw_finish",0); 307 &mov($num,&wparam(2)); # get num 308 &and($num,7); 309 &jnz(&label("mw_finish2")); 310 &jmp(&label("mw_end")); 311 312 &set_label("mw_finish2",1); 313 for ($i=0; $i<7; $i++) 314 { 315 &comment("Tail Round $i"); 316 &mov("eax",&DWP($i*4,$a,"",0));# *a 317 &mul($w); # *a * w 318 &add("eax",$c); # L(t)+=c 319 # XXX 320 &adc("edx",0); # H(t)+=carry 321 &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t); 322 &mov($c,"edx"); # c= H(t); 323 &dec($num) if ($i != 7-1); 324 &jz(&label("mw_end")) if ($i != 7-1); 325 } 326 &set_label("mw_end",0); 327 &mov("eax",$c); 328 329 &function_end($name); 330 } 331 332sub bn_sqr_words 333 { 334 local($name)=@_; 335 336 &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); 337 338 $r="eax"; 339 $a="edx"; 340 $c="ecx"; 341 342 if ($sse2) { 343 &picmeup("eax","OPENSSL_ia32cap_P"); 344 &bt(&DWP(0,"eax"),26); 345 &jnc(&label("sqr_non_sse2")); 346 347 &mov($r,&wparam(0)); 348 &mov($a,&wparam(1)); 349 &mov($c,&wparam(2)); 350 351 &set_label("sqr_sse2_loop",16); 352 &movd("mm0",&DWP(0,$a)); # mm0 = a[i] 353 &pmuludq("mm0","mm0"); # a[i] *= a[i] 354 &lea($a,&DWP(4,$a)); # a++ 355 &movq(&QWP(0,$r),"mm0"); # r[i] = a[i]*a[i] 356 &sub($c,1); 357 &lea($r,&DWP(8,$r)); # r += 2 358 &jnz(&label("sqr_sse2_loop")); 359 360 &emms(); 361 &ret(); 362 &set_label("sqr_non_sse2",16); 363 } 364 365 # function_begin prologue 366 &push("ebp"); 367 &push("ebx"); 368 &push("esi"); 369 &push("edi"); 370 371 &comment(""); 372 $r="esi"; 373 $a="edi"; 374 $num="ebx"; 375 376 &mov($r,&wparam(0)); # 377 &mov($a,&wparam(1)); # 378 &mov($num,&wparam(2)); # 379 380 &and($num,0xfffffff8); # num / 8 381 &jz(&label("sw_finish")); 382 383 &set_label("sw_loop",0); 384 for ($i=0; $i<32; $i+=4) 385 { 386 &comment("Round $i"); 387 &mov("eax",&DWP($i,$a,"",0)); # *a 388 # XXX 389 &mul("eax"); # *a * *a 390 &mov(&DWP($i*2,$r,"",0),"eax"); # 391 &mov(&DWP($i*2+4,$r,"",0),"edx");# 392 } 393 394 &comment(""); 395 &add($a,32); 396 &add($r,64); 397 &sub($num,8); 398 &jnz(&label("sw_loop")); 399 400 &set_label("sw_finish",0); 401 &mov($num,&wparam(2)); # get num 402 &and($num,7); 403 &jz(&label("sw_end")); 404 405 for ($i=0; $i<7; $i++) 406 { 407 &comment("Tail Round $i"); 408 &mov("eax",&DWP($i*4,$a,"",0)); # *a 409 # XXX 410 &mul("eax"); # *a * *a 411 &mov(&DWP($i*8,$r,"",0),"eax"); # 412 &dec($num) if ($i != 7-1); 413 &mov(&DWP($i*8+4,$r,"",0),"edx"); 414 &jz(&label("sw_end")) if ($i != 7-1); 415 } 416 &set_label("sw_end",0); 417 418 &function_end($name); 419 } 420 421sub bn_div_words 422 { 423 local($name)=@_; 424 425 &function_begin_B($name,""); 426 &mov("edx",&wparam(0)); # 427 &mov("eax",&wparam(1)); # 428 &mov("ecx",&wparam(2)); # 429 &div("ecx"); 430 &ret(); 431 &function_end_B($name); 432 } 433 434sub bn_add_words 435 { 436 local($name)=@_; 437 438 &function_begin($name,""); 439 440 &comment(""); 441 $a="esi"; 442 $b="edi"; 443 $c="eax"; 444 $r="ebx"; 445 $tmp1="ecx"; 446 $tmp2="edx"; 447 $num="ebp"; 448 449 &mov($r,&wparam(0)); # get r 450 &mov($a,&wparam(1)); # get a 451 &mov($b,&wparam(2)); # get b 452 &mov($num,&wparam(3)); # get num 453 &xor($c,$c); # clear carry 454 &and($num,0xfffffff8); # num / 8 455 456 &jz(&label("aw_finish")); 457 458 &set_label("aw_loop",0); 459 for ($i=0; $i<8; $i++) 460 { 461 &comment("Round $i"); 462 463 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 464 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b 465 &add($tmp1,$c); 466 &mov($c,0); 467 &adc($c,$c); 468 &add($tmp1,$tmp2); 469 &adc($c,0); 470 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 471 } 472 473 &comment(""); 474 &add($a,32); 475 &add($b,32); 476 &add($r,32); 477 &sub($num,8); 478 &jnz(&label("aw_loop")); 479 480 &set_label("aw_finish",0); 481 &mov($num,&wparam(3)); # get num 482 &and($num,7); 483 &jz(&label("aw_end")); 484 485 for ($i=0; $i<7; $i++) 486 { 487 &comment("Tail Round $i"); 488 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 489 &mov($tmp2,&DWP($i*4,$b,"",0));# *b 490 &add($tmp1,$c); 491 &mov($c,0); 492 &adc($c,$c); 493 &add($tmp1,$tmp2); 494 &adc($c,0); 495 &dec($num) if ($i != 6); 496 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 497 &jz(&label("aw_end")) if ($i != 6); 498 } 499 &set_label("aw_end",0); 500 501# &mov("eax",$c); # $c is "eax" 502 503 &function_end($name); 504 } 505 506sub bn_sub_words 507 { 508 local($name)=@_; 509 510 &function_begin($name,""); 511 512 &comment(""); 513 $a="esi"; 514 $b="edi"; 515 $c="eax"; 516 $r="ebx"; 517 $tmp1="ecx"; 518 $tmp2="edx"; 519 $num="ebp"; 520 521 &mov($r,&wparam(0)); # get r 522 &mov($a,&wparam(1)); # get a 523 &mov($b,&wparam(2)); # get b 524 &mov($num,&wparam(3)); # get num 525 &xor($c,$c); # clear carry 526 &and($num,0xfffffff8); # num / 8 527 528 &jz(&label("aw_finish")); 529 530 &set_label("aw_loop",0); 531 for ($i=0; $i<8; $i++) 532 { 533 &comment("Round $i"); 534 535 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 536 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b 537 &sub($tmp1,$c); 538 &mov($c,0); 539 &adc($c,$c); 540 &sub($tmp1,$tmp2); 541 &adc($c,0); 542 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 543 } 544 545 &comment(""); 546 &add($a,32); 547 &add($b,32); 548 &add($r,32); 549 &sub($num,8); 550 &jnz(&label("aw_loop")); 551 552 &set_label("aw_finish",0); 553 &mov($num,&wparam(3)); # get num 554 &and($num,7); 555 &jz(&label("aw_end")); 556 557 for ($i=0; $i<7; $i++) 558 { 559 &comment("Tail Round $i"); 560 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 561 &mov($tmp2,&DWP($i*4,$b,"",0));# *b 562 &sub($tmp1,$c); 563 &mov($c,0); 564 &adc($c,$c); 565 &sub($tmp1,$tmp2); 566 &adc($c,0); 567 &dec($num) if ($i != 6); 568 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 569 &jz(&label("aw_end")) if ($i != 6); 570 } 571 &set_label("aw_end",0); 572 573# &mov("eax",$c); # $c is "eax" 574 575 &function_end($name); 576 } 577 578sub bn_sub_part_words 579 { 580 local($name)=@_; 581 582 &function_begin($name,""); 583 584 &comment(""); 585 $a="esi"; 586 $b="edi"; 587 $c="eax"; 588 $r="ebx"; 589 $tmp1="ecx"; 590 $tmp2="edx"; 591 $num="ebp"; 592 593 &mov($r,&wparam(0)); # get r 594 &mov($a,&wparam(1)); # get a 595 &mov($b,&wparam(2)); # get b 596 &mov($num,&wparam(3)); # get num 597 &xor($c,$c); # clear carry 598 &and($num,0xfffffff8); # num / 8 599 600 &jz(&label("aw_finish")); 601 602 &set_label("aw_loop",0); 603 for ($i=0; $i<8; $i++) 604 { 605 &comment("Round $i"); 606 607 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 608 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b 609 &sub($tmp1,$c); 610 &mov($c,0); 611 &adc($c,$c); 612 &sub($tmp1,$tmp2); 613 &adc($c,0); 614 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 615 } 616 617 &comment(""); 618 &add($a,32); 619 &add($b,32); 620 &add($r,32); 621 &sub($num,8); 622 &jnz(&label("aw_loop")); 623 624 &set_label("aw_finish",0); 625 &mov($num,&wparam(3)); # get num 626 &and($num,7); 627 &jz(&label("aw_end")); 628 629 for ($i=0; $i<7; $i++) 630 { 631 &comment("Tail Round $i"); 632 &mov($tmp1,&DWP(0,$a,"",0)); # *a 633 &mov($tmp2,&DWP(0,$b,"",0));# *b 634 &sub($tmp1,$c); 635 &mov($c,0); 636 &adc($c,$c); 637 &sub($tmp1,$tmp2); 638 &adc($c,0); 639 &mov(&DWP(0,$r,"",0),$tmp1); # *r 640 &add($a, 4); 641 &add($b, 4); 642 &add($r, 4); 643 &dec($num) if ($i != 6); 644 &jz(&label("aw_end")) if ($i != 6); 645 } 646 &set_label("aw_end",0); 647 648 &cmp(&wparam(4),0); 649 &je(&label("pw_end")); 650 651 &mov($num,&wparam(4)); # get dl 652 &cmp($num,0); 653 &je(&label("pw_end")); 654 &jge(&label("pw_pos")); 655 656 &comment("pw_neg"); 657 &mov($tmp2,0); 658 &sub($tmp2,$num); 659 &mov($num,$tmp2); 660 &and($num,0xfffffff8); # num / 8 661 &jz(&label("pw_neg_finish")); 662 663 &set_label("pw_neg_loop",0); 664 for ($i=0; $i<8; $i++) 665 { 666 &comment("dl<0 Round $i"); 667 668 &mov($tmp1,0); 669 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b 670 &sub($tmp1,$c); 671 &mov($c,0); 672 &adc($c,$c); 673 &sub($tmp1,$tmp2); 674 &adc($c,0); 675 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 676 } 677 678 &comment(""); 679 &add($b,32); 680 &add($r,32); 681 &sub($num,8); 682 &jnz(&label("pw_neg_loop")); 683 684 &set_label("pw_neg_finish",0); 685 &mov($tmp2,&wparam(4)); # get dl 686 &mov($num,0); 687 &sub($num,$tmp2); 688 &and($num,7); 689 &jz(&label("pw_end")); 690 691 for ($i=0; $i<7; $i++) 692 { 693 &comment("dl<0 Tail Round $i"); 694 &mov($tmp1,0); 695 &mov($tmp2,&DWP($i*4,$b,"",0));# *b 696 &sub($tmp1,$c); 697 &mov($c,0); 698 &adc($c,$c); 699 &sub($tmp1,$tmp2); 700 &adc($c,0); 701 &dec($num) if ($i != 6); 702 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 703 &jz(&label("pw_end")) if ($i != 6); 704 } 705 706 &jmp(&label("pw_end")); 707 708 &set_label("pw_pos",0); 709 710 &and($num,0xfffffff8); # num / 8 711 &jz(&label("pw_pos_finish")); 712 713 &set_label("pw_pos_loop",0); 714 715 for ($i=0; $i<8; $i++) 716 { 717 &comment("dl>0 Round $i"); 718 719 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 720 &sub($tmp1,$c); 721 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 722 &jnc(&label("pw_nc".$i)); 723 } 724 725 &comment(""); 726 &add($a,32); 727 &add($r,32); 728 &sub($num,8); 729 &jnz(&label("pw_pos_loop")); 730 731 &set_label("pw_pos_finish",0); 732 &mov($num,&wparam(4)); # get dl 733 &and($num,7); 734 &jz(&label("pw_end")); 735 736 for ($i=0; $i<7; $i++) 737 { 738 &comment("dl>0 Tail Round $i"); 739 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 740 &sub($tmp1,$c); 741 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 742 &jnc(&label("pw_tail_nc".$i)); 743 &dec($num) if ($i != 6); 744 &jz(&label("pw_end")) if ($i != 6); 745 } 746 &mov($c,1); 747 &jmp(&label("pw_end")); 748 749 &set_label("pw_nc_loop",0); 750 for ($i=0; $i<8; $i++) 751 { 752 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 753 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 754 &set_label("pw_nc".$i,0); 755 } 756 757 &comment(""); 758 &add($a,32); 759 &add($r,32); 760 &sub($num,8); 761 &jnz(&label("pw_nc_loop")); 762 763 &mov($num,&wparam(4)); # get dl 764 &and($num,7); 765 &jz(&label("pw_nc_end")); 766 767 for ($i=0; $i<7; $i++) 768 { 769 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 770 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 771 &set_label("pw_tail_nc".$i,0); 772 &dec($num) if ($i != 6); 773 &jz(&label("pw_nc_end")) if ($i != 6); 774 } 775 776 &set_label("pw_nc_end",0); 777 &mov($c,0); 778 779 &set_label("pw_end",0); 780 781# &mov("eax",$c); # $c is "eax" 782 783 &function_end($name); 784 } 785