1#! /usr/bin/env perl 2# Copyright 2010-2020 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the Apache License 2.0 (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9# 10# ==================================================================== 11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12# project. The module is, however, dual licensed under OpenSSL and 13# CRYPTOGAMS licenses depending on where you obtain it. For further 14# details see http://www.openssl.org/~appro/cryptogams/. 15# ==================================================================== 16# 17# April 2010 18# 19# The module implements "4-bit" GCM GHASH function and underlying 20# single multiplication operation in GF(2^128). "4-bit" means that it 21# uses 256 bytes per-key table [+128 bytes shared table]. On PA-7100LC 22# it processes one byte in 19.6 cycles, which is more than twice as 23# fast as code generated by gcc 3.2. PA-RISC 2.0 loop is scheduled for 24# 8 cycles, but measured performance on PA-8600 system is ~9 cycles per 25# processed byte. This is ~2.2x faster than 64-bit code generated by 26# vendor compiler (which used to be very hard to beat:-). 27# 28# Special thanks to polarhome.com for providing HP-UX account. 29 30# $output is the last argument if it looks like a file (it has an extension) 31# $flavour is the first argument if it doesn't look like a file 32$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 33$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 34 35$output and open STDOUT,">$output"; 36 37if ($flavour =~ /64/) { 38 $LEVEL ="2.0W"; 39 $SIZE_T =8; 40 $FRAME_MARKER =80; 41 $SAVED_RP =16; 42 $PUSH ="std"; 43 $PUSHMA ="std,ma"; 44 $POP ="ldd"; 45 $POPMB ="ldd,mb"; 46 $NREGS =6; 47} else { 48 $LEVEL ="1.0"; #"\n\t.ALLOW\t2.0"; 49 $SIZE_T =4; 50 $FRAME_MARKER =48; 51 $SAVED_RP =20; 52 $PUSH ="stw"; 53 $PUSHMA ="stwm"; 54 $POP ="ldw"; 55 $POPMB ="ldwm"; 56 $NREGS =11; 57} 58 59$FRAME=10*$SIZE_T+$FRAME_MARKER;# NREGS saved regs + frame marker 60 # [+ argument transfer] 61 62################# volatile registers 63$Xi="%r26"; # argument block 64$Htbl="%r25"; 65$inp="%r24"; 66$len="%r23"; 67$Hhh=$Htbl; # variables 68$Hll="%r22"; 69$Zhh="%r21"; 70$Zll="%r20"; 71$cnt="%r19"; 72$rem_4bit="%r28"; 73$rem="%r29"; 74$mask0xf0="%r31"; 75 76################# preserved registers 77$Thh="%r1"; 78$Tll="%r2"; 79$nlo="%r3"; 80$nhi="%r4"; 81$byte="%r5"; 82if ($SIZE_T==4) { 83 $Zhl="%r6"; 84 $Zlh="%r7"; 85 $Hhl="%r8"; 86 $Hlh="%r9"; 87 $Thl="%r10"; 88 $Tlh="%r11"; 89} 90$rem2="%r6"; # used in PA-RISC 2.0 code 91 92$code.=<<___; 93 .LEVEL $LEVEL 94 .SPACE \$TEXT\$ 95 .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY 96 97 .EXPORT gcm_gmult_4bit,ENTRY,ARGW0=GR,ARGW1=GR 98 .ALIGN 64 99gcm_gmult_4bit 100 .PROC 101 .CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=$NREGS 102 .ENTRY 103 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue 104 $PUSHMA %r3,$FRAME(%sp) 105 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) 106 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) 107 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) 108___ 109$code.=<<___ if ($SIZE_T==4); 110 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp) 111 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp) 112 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp) 113 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp) 114 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp) 115___ 116$code.=<<___; 117 blr %r0,$rem_4bit 118 ldi 3,$rem 119L\$pic_gmult 120 andcm $rem_4bit,$rem,$rem_4bit 121 addl $inp,$len,$len 122 ldo L\$rem_4bit-L\$pic_gmult($rem_4bit),$rem_4bit 123 ldi 0xf0,$mask0xf0 124___ 125$code.=<<___ if ($SIZE_T==4); 126 ldi 31,$rem 127 mtctl $rem,%cr11 128 extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0 129 b L\$parisc1_gmult 130 nop 131___ 132 133$code.=<<___; 134 ldb 15($Xi),$nlo 135 ldo 8($Htbl),$Hll 136 137 and $mask0xf0,$nlo,$nhi 138 depd,z $nlo,59,4,$nlo 139 140 ldd $nlo($Hll),$Zll 141 ldd $nlo($Hhh),$Zhh 142 143 depd,z $Zll,60,4,$rem 144 shrpd $Zhh,$Zll,4,$Zll 145 extrd,u $Zhh,59,60,$Zhh 146 ldb 14($Xi),$nlo 147 148 ldd $nhi($Hll),$Tll 149 ldd $nhi($Hhh),$Thh 150 and $mask0xf0,$nlo,$nhi 151 depd,z $nlo,59,4,$nlo 152 153 xor $Tll,$Zll,$Zll 154 xor $Thh,$Zhh,$Zhh 155 ldd $rem($rem_4bit),$rem 156 b L\$oop_gmult_pa2 157 ldi 13,$cnt 158 159 .ALIGN 8 160L\$oop_gmult_pa2 161 xor $rem,$Zhh,$Zhh ; moved here to work around gas bug 162 depd,z $Zll,60,4,$rem 163 164 shrpd $Zhh,$Zll,4,$Zll 165 extrd,u $Zhh,59,60,$Zhh 166 ldd $nlo($Hll),$Tll 167 ldd $nlo($Hhh),$Thh 168 169 xor $Tll,$Zll,$Zll 170 xor $Thh,$Zhh,$Zhh 171 ldd $rem($rem_4bit),$rem 172 173 xor $rem,$Zhh,$Zhh 174 depd,z $Zll,60,4,$rem 175 ldbx $cnt($Xi),$nlo 176 177 shrpd $Zhh,$Zll,4,$Zll 178 extrd,u $Zhh,59,60,$Zhh 179 ldd $nhi($Hll),$Tll 180 ldd $nhi($Hhh),$Thh 181 182 and $mask0xf0,$nlo,$nhi 183 depd,z $nlo,59,4,$nlo 184 ldd $rem($rem_4bit),$rem 185 186 xor $Tll,$Zll,$Zll 187 addib,uv -1,$cnt,L\$oop_gmult_pa2 188 xor $Thh,$Zhh,$Zhh 189 190 xor $rem,$Zhh,$Zhh 191 depd,z $Zll,60,4,$rem 192 193 shrpd $Zhh,$Zll,4,$Zll 194 extrd,u $Zhh,59,60,$Zhh 195 ldd $nlo($Hll),$Tll 196 ldd $nlo($Hhh),$Thh 197 198 xor $Tll,$Zll,$Zll 199 xor $Thh,$Zhh,$Zhh 200 ldd $rem($rem_4bit),$rem 201 202 xor $rem,$Zhh,$Zhh 203 depd,z $Zll,60,4,$rem 204 205 shrpd $Zhh,$Zll,4,$Zll 206 extrd,u $Zhh,59,60,$Zhh 207 ldd $nhi($Hll),$Tll 208 ldd $nhi($Hhh),$Thh 209 210 xor $Tll,$Zll,$Zll 211 xor $Thh,$Zhh,$Zhh 212 ldd $rem($rem_4bit),$rem 213 214 xor $rem,$Zhh,$Zhh 215 std $Zll,8($Xi) 216 std $Zhh,0($Xi) 217___ 218 219$code.=<<___ if ($SIZE_T==4); 220 b L\$done_gmult 221 nop 222 223L\$parisc1_gmult 224 ldb 15($Xi),$nlo 225 ldo 12($Htbl),$Hll 226 ldo 8($Htbl),$Hlh 227 ldo 4($Htbl),$Hhl 228 229 and $mask0xf0,$nlo,$nhi 230 zdep $nlo,27,4,$nlo 231 232 ldwx $nlo($Hll),$Zll 233 ldwx $nlo($Hlh),$Zlh 234 ldwx $nlo($Hhl),$Zhl 235 ldwx $nlo($Hhh),$Zhh 236 zdep $Zll,28,4,$rem 237 ldb 14($Xi),$nlo 238 ldwx $rem($rem_4bit),$rem 239 shrpw $Zlh,$Zll,4,$Zll 240 ldwx $nhi($Hll),$Tll 241 shrpw $Zhl,$Zlh,4,$Zlh 242 ldwx $nhi($Hlh),$Tlh 243 shrpw $Zhh,$Zhl,4,$Zhl 244 ldwx $nhi($Hhl),$Thl 245 extru $Zhh,27,28,$Zhh 246 ldwx $nhi($Hhh),$Thh 247 xor $rem,$Zhh,$Zhh 248 and $mask0xf0,$nlo,$nhi 249 zdep $nlo,27,4,$nlo 250 251 xor $Tll,$Zll,$Zll 252 ldwx $nlo($Hll),$Tll 253 xor $Tlh,$Zlh,$Zlh 254 ldwx $nlo($Hlh),$Tlh 255 xor $Thl,$Zhl,$Zhl 256 b L\$oop_gmult_pa1 257 ldi 13,$cnt 258 259 .ALIGN 8 260L\$oop_gmult_pa1 261 zdep $Zll,28,4,$rem 262 ldwx $nlo($Hhl),$Thl 263 xor $Thh,$Zhh,$Zhh 264 ldwx $rem($rem_4bit),$rem 265 shrpw $Zlh,$Zll,4,$Zll 266 ldwx $nlo($Hhh),$Thh 267 shrpw $Zhl,$Zlh,4,$Zlh 268 ldbx $cnt($Xi),$nlo 269 xor $Tll,$Zll,$Zll 270 ldwx $nhi($Hll),$Tll 271 shrpw $Zhh,$Zhl,4,$Zhl 272 xor $Tlh,$Zlh,$Zlh 273 ldwx $nhi($Hlh),$Tlh 274 extru $Zhh,27,28,$Zhh 275 xor $Thl,$Zhl,$Zhl 276 ldwx $nhi($Hhl),$Thl 277 xor $rem,$Zhh,$Zhh 278 zdep $Zll,28,4,$rem 279 xor $Thh,$Zhh,$Zhh 280 ldwx $nhi($Hhh),$Thh 281 shrpw $Zlh,$Zll,4,$Zll 282 ldwx $rem($rem_4bit),$rem 283 shrpw $Zhl,$Zlh,4,$Zlh 284 shrpw $Zhh,$Zhl,4,$Zhl 285 and $mask0xf0,$nlo,$nhi 286 extru $Zhh,27,28,$Zhh 287 zdep $nlo,27,4,$nlo 288 xor $Tll,$Zll,$Zll 289 ldwx $nlo($Hll),$Tll 290 xor $Tlh,$Zlh,$Zlh 291 ldwx $nlo($Hlh),$Tlh 292 xor $rem,$Zhh,$Zhh 293 addib,uv -1,$cnt,L\$oop_gmult_pa1 294 xor $Thl,$Zhl,$Zhl 295 296 zdep $Zll,28,4,$rem 297 ldwx $nlo($Hhl),$Thl 298 xor $Thh,$Zhh,$Zhh 299 ldwx $rem($rem_4bit),$rem 300 shrpw $Zlh,$Zll,4,$Zll 301 ldwx $nlo($Hhh),$Thh 302 shrpw $Zhl,$Zlh,4,$Zlh 303 xor $Tll,$Zll,$Zll 304 ldwx $nhi($Hll),$Tll 305 shrpw $Zhh,$Zhl,4,$Zhl 306 xor $Tlh,$Zlh,$Zlh 307 ldwx $nhi($Hlh),$Tlh 308 extru $Zhh,27,28,$Zhh 309 xor $rem,$Zhh,$Zhh 310 xor $Thl,$Zhl,$Zhl 311 ldwx $nhi($Hhl),$Thl 312 xor $Thh,$Zhh,$Zhh 313 ldwx $nhi($Hhh),$Thh 314 zdep $Zll,28,4,$rem 315 ldwx $rem($rem_4bit),$rem 316 shrpw $Zlh,$Zll,4,$Zll 317 shrpw $Zhl,$Zlh,4,$Zlh 318 shrpw $Zhh,$Zhl,4,$Zhl 319 extru $Zhh,27,28,$Zhh 320 xor $Tll,$Zll,$Zll 321 xor $Tlh,$Zlh,$Zlh 322 xor $rem,$Zhh,$Zhh 323 stw $Zll,12($Xi) 324 xor $Thl,$Zhl,$Zhl 325 stw $Zlh,8($Xi) 326 xor $Thh,$Zhh,$Zhh 327 stw $Zhl,4($Xi) 328 stw $Zhh,0($Xi) 329___ 330$code.=<<___; 331L\$done_gmult 332 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue 333 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 334 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 335 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 336___ 337$code.=<<___ if ($SIZE_T==4); 338 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7 339 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8 340 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9 341 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10 342 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11 343___ 344$code.=<<___; 345 bv (%r2) 346 .EXIT 347 $POPMB -$FRAME(%sp),%r3 348 .PROCEND 349 350 .EXPORT gcm_ghash_4bit,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR 351 .ALIGN 64 352gcm_ghash_4bit 353 .PROC 354 .CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=11 355 .ENTRY 356 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue 357 $PUSHMA %r3,$FRAME(%sp) 358 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) 359 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) 360 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) 361___ 362$code.=<<___ if ($SIZE_T==4); 363 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp) 364 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp) 365 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp) 366 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp) 367 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp) 368___ 369$code.=<<___; 370 blr %r0,$rem_4bit 371 ldi 3,$rem 372L\$pic_ghash 373 andcm $rem_4bit,$rem,$rem_4bit 374 addl $inp,$len,$len 375 ldo L\$rem_4bit-L\$pic_ghash($rem_4bit),$rem_4bit 376 ldi 0xf0,$mask0xf0 377___ 378$code.=<<___ if ($SIZE_T==4); 379 ldi 31,$rem 380 mtctl $rem,%cr11 381 extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0 382 b L\$parisc1_ghash 383 nop 384___ 385 386$code.=<<___; 387 ldb 15($Xi),$nlo 388 ldo 8($Htbl),$Hll 389 390L\$outer_ghash_pa2 391 ldb 15($inp),$nhi 392 xor $nhi,$nlo,$nlo 393 and $mask0xf0,$nlo,$nhi 394 depd,z $nlo,59,4,$nlo 395 396 ldd $nlo($Hll),$Zll 397 ldd $nlo($Hhh),$Zhh 398 399 depd,z $Zll,60,4,$rem 400 shrpd $Zhh,$Zll,4,$Zll 401 extrd,u $Zhh,59,60,$Zhh 402 ldb 14($Xi),$nlo 403 ldb 14($inp),$byte 404 405 ldd $nhi($Hll),$Tll 406 ldd $nhi($Hhh),$Thh 407 xor $byte,$nlo,$nlo 408 and $mask0xf0,$nlo,$nhi 409 depd,z $nlo,59,4,$nlo 410 411 xor $Tll,$Zll,$Zll 412 xor $Thh,$Zhh,$Zhh 413 ldd $rem($rem_4bit),$rem 414 b L\$oop_ghash_pa2 415 ldi 13,$cnt 416 417 .ALIGN 8 418L\$oop_ghash_pa2 419 xor $rem,$Zhh,$Zhh ; moved here to work around gas bug 420 depd,z $Zll,60,4,$rem2 421 422 shrpd $Zhh,$Zll,4,$Zll 423 extrd,u $Zhh,59,60,$Zhh 424 ldd $nlo($Hll),$Tll 425 ldd $nlo($Hhh),$Thh 426 427 xor $Tll,$Zll,$Zll 428 xor $Thh,$Zhh,$Zhh 429 ldbx $cnt($Xi),$nlo 430 ldbx $cnt($inp),$byte 431 432 depd,z $Zll,60,4,$rem 433 shrpd $Zhh,$Zll,4,$Zll 434 ldd $rem2($rem_4bit),$rem2 435 436 xor $rem2,$Zhh,$Zhh 437 xor $byte,$nlo,$nlo 438 ldd $nhi($Hll),$Tll 439 ldd $nhi($Hhh),$Thh 440 441 and $mask0xf0,$nlo,$nhi 442 depd,z $nlo,59,4,$nlo 443 444 extrd,u $Zhh,59,60,$Zhh 445 xor $Tll,$Zll,$Zll 446 447 ldd $rem($rem_4bit),$rem 448 addib,uv -1,$cnt,L\$oop_ghash_pa2 449 xor $Thh,$Zhh,$Zhh 450 451 xor $rem,$Zhh,$Zhh 452 depd,z $Zll,60,4,$rem2 453 454 shrpd $Zhh,$Zll,4,$Zll 455 extrd,u $Zhh,59,60,$Zhh 456 ldd $nlo($Hll),$Tll 457 ldd $nlo($Hhh),$Thh 458 459 xor $Tll,$Zll,$Zll 460 xor $Thh,$Zhh,$Zhh 461 462 depd,z $Zll,60,4,$rem 463 shrpd $Zhh,$Zll,4,$Zll 464 ldd $rem2($rem_4bit),$rem2 465 466 xor $rem2,$Zhh,$Zhh 467 ldd $nhi($Hll),$Tll 468 ldd $nhi($Hhh),$Thh 469 470 extrd,u $Zhh,59,60,$Zhh 471 xor $Tll,$Zll,$Zll 472 xor $Thh,$Zhh,$Zhh 473 ldd $rem($rem_4bit),$rem 474 475 xor $rem,$Zhh,$Zhh 476 std $Zll,8($Xi) 477 ldo 16($inp),$inp 478 std $Zhh,0($Xi) 479 cmpb,*<> $inp,$len,L\$outer_ghash_pa2 480 copy $Zll,$nlo 481___ 482 483$code.=<<___ if ($SIZE_T==4); 484 b L\$done_ghash 485 nop 486 487L\$parisc1_ghash 488 ldb 15($Xi),$nlo 489 ldo 12($Htbl),$Hll 490 ldo 8($Htbl),$Hlh 491 ldo 4($Htbl),$Hhl 492 493L\$outer_ghash_pa1 494 ldb 15($inp),$byte 495 xor $byte,$nlo,$nlo 496 and $mask0xf0,$nlo,$nhi 497 zdep $nlo,27,4,$nlo 498 499 ldwx $nlo($Hll),$Zll 500 ldwx $nlo($Hlh),$Zlh 501 ldwx $nlo($Hhl),$Zhl 502 ldwx $nlo($Hhh),$Zhh 503 zdep $Zll,28,4,$rem 504 ldb 14($Xi),$nlo 505 ldb 14($inp),$byte 506 ldwx $rem($rem_4bit),$rem 507 shrpw $Zlh,$Zll,4,$Zll 508 ldwx $nhi($Hll),$Tll 509 shrpw $Zhl,$Zlh,4,$Zlh 510 ldwx $nhi($Hlh),$Tlh 511 shrpw $Zhh,$Zhl,4,$Zhl 512 ldwx $nhi($Hhl),$Thl 513 extru $Zhh,27,28,$Zhh 514 ldwx $nhi($Hhh),$Thh 515 xor $byte,$nlo,$nlo 516 xor $rem,$Zhh,$Zhh 517 and $mask0xf0,$nlo,$nhi 518 zdep $nlo,27,4,$nlo 519 520 xor $Tll,$Zll,$Zll 521 ldwx $nlo($Hll),$Tll 522 xor $Tlh,$Zlh,$Zlh 523 ldwx $nlo($Hlh),$Tlh 524 xor $Thl,$Zhl,$Zhl 525 b L\$oop_ghash_pa1 526 ldi 13,$cnt 527 528 .ALIGN 8 529L\$oop_ghash_pa1 530 zdep $Zll,28,4,$rem 531 ldwx $nlo($Hhl),$Thl 532 xor $Thh,$Zhh,$Zhh 533 ldwx $rem($rem_4bit),$rem 534 shrpw $Zlh,$Zll,4,$Zll 535 ldwx $nlo($Hhh),$Thh 536 shrpw $Zhl,$Zlh,4,$Zlh 537 ldbx $cnt($Xi),$nlo 538 xor $Tll,$Zll,$Zll 539 ldwx $nhi($Hll),$Tll 540 shrpw $Zhh,$Zhl,4,$Zhl 541 ldbx $cnt($inp),$byte 542 xor $Tlh,$Zlh,$Zlh 543 ldwx $nhi($Hlh),$Tlh 544 extru $Zhh,27,28,$Zhh 545 xor $Thl,$Zhl,$Zhl 546 ldwx $nhi($Hhl),$Thl 547 xor $rem,$Zhh,$Zhh 548 zdep $Zll,28,4,$rem 549 xor $Thh,$Zhh,$Zhh 550 ldwx $nhi($Hhh),$Thh 551 shrpw $Zlh,$Zll,4,$Zll 552 ldwx $rem($rem_4bit),$rem 553 shrpw $Zhl,$Zlh,4,$Zlh 554 xor $byte,$nlo,$nlo 555 shrpw $Zhh,$Zhl,4,$Zhl 556 and $mask0xf0,$nlo,$nhi 557 extru $Zhh,27,28,$Zhh 558 zdep $nlo,27,4,$nlo 559 xor $Tll,$Zll,$Zll 560 ldwx $nlo($Hll),$Tll 561 xor $Tlh,$Zlh,$Zlh 562 ldwx $nlo($Hlh),$Tlh 563 xor $rem,$Zhh,$Zhh 564 addib,uv -1,$cnt,L\$oop_ghash_pa1 565 xor $Thl,$Zhl,$Zhl 566 567 zdep $Zll,28,4,$rem 568 ldwx $nlo($Hhl),$Thl 569 xor $Thh,$Zhh,$Zhh 570 ldwx $rem($rem_4bit),$rem 571 shrpw $Zlh,$Zll,4,$Zll 572 ldwx $nlo($Hhh),$Thh 573 shrpw $Zhl,$Zlh,4,$Zlh 574 xor $Tll,$Zll,$Zll 575 ldwx $nhi($Hll),$Tll 576 shrpw $Zhh,$Zhl,4,$Zhl 577 xor $Tlh,$Zlh,$Zlh 578 ldwx $nhi($Hlh),$Tlh 579 extru $Zhh,27,28,$Zhh 580 xor $rem,$Zhh,$Zhh 581 xor $Thl,$Zhl,$Zhl 582 ldwx $nhi($Hhl),$Thl 583 xor $Thh,$Zhh,$Zhh 584 ldwx $nhi($Hhh),$Thh 585 zdep $Zll,28,4,$rem 586 ldwx $rem($rem_4bit),$rem 587 shrpw $Zlh,$Zll,4,$Zll 588 shrpw $Zhl,$Zlh,4,$Zlh 589 shrpw $Zhh,$Zhl,4,$Zhl 590 extru $Zhh,27,28,$Zhh 591 xor $Tll,$Zll,$Zll 592 xor $Tlh,$Zlh,$Zlh 593 xor $rem,$Zhh,$Zhh 594 stw $Zll,12($Xi) 595 xor $Thl,$Zhl,$Zhl 596 stw $Zlh,8($Xi) 597 xor $Thh,$Zhh,$Zhh 598 stw $Zhl,4($Xi) 599 ldo 16($inp),$inp 600 stw $Zhh,0($Xi) 601 comb,<> $inp,$len,L\$outer_ghash_pa1 602 copy $Zll,$nlo 603___ 604$code.=<<___; 605L\$done_ghash 606 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue 607 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 608 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 609 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 610___ 611$code.=<<___ if ($SIZE_T==4); 612 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7 613 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8 614 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9 615 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10 616 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11 617___ 618$code.=<<___; 619 bv (%r2) 620 .EXIT 621 $POPMB -$FRAME(%sp),%r3 622 .PROCEND 623 624 .ALIGN 64 625L\$rem_4bit 626 .WORD `0x0000<<16`,0,`0x1C20<<16`,0,`0x3840<<16`,0,`0x2460<<16`,0 627 .WORD `0x7080<<16`,0,`0x6CA0<<16`,0,`0x48C0<<16`,0,`0x54E0<<16`,0 628 .WORD `0xE100<<16`,0,`0xFD20<<16`,0,`0xD940<<16`,0,`0xC560<<16`,0 629 .WORD `0x9180<<16`,0,`0x8DA0<<16`,0,`0xA9C0<<16`,0,`0xB5E0<<16`,0 630 .STRINGZ "GHASH for PA-RISC, GRYPTOGAMS by <appro\@openssl.org>" 631 .ALIGN 64 632___ 633 634# Explicitly encode PA-RISC 2.0 instructions used in this module, so 635# that it can be compiled with .LEVEL 1.0. It should be noted that I 636# wouldn't have to do this, if GNU assembler understood .ALLOW 2.0 637# directive... 638 639my $ldd = sub { 640 my ($mod,$args) = @_; 641 my $orig = "ldd$mod\t$args"; 642 643 if ($args =~ /%r([0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 4 644 { my $opcode=(0x03<<26)|($2<<21)|($1<<16)|(3<<6)|$3; 645 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; 646 } 647 elsif ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 5 648 { my $opcode=(0x03<<26)|($2<<21)|(1<<12)|(3<<6)|$3; 649 $opcode|=(($1&0xF)<<17)|(($1&0x10)<<12); # encode offset 650 $opcode|=(1<<5) if ($mod =~ /^,m/); 651 $opcode|=(1<<13) if ($mod =~ /^,mb/); 652 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; 653 } 654 else { "\t".$orig; } 655}; 656 657my $std = sub { 658 my ($mod,$args) = @_; 659 my $orig = "std$mod\t$args"; 660 661 if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices 662 { my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1); 663 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; 664 } 665 else { "\t".$orig; } 666}; 667 668my $extrd = sub { 669 my ($mod,$args) = @_; 670 my $orig = "extrd$mod\t$args"; 671 672 # I only have ",u" completer, it's implicitly encoded... 673 if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15 674 { my $opcode=(0x36<<26)|($1<<21)|($4<<16); 675 my $len=32-$3; 676 $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos 677 $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len 678 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; 679 } 680 elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12 681 { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9); 682 my $len=32-$2; 683 $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len 684 $opcode |= (1<<13) if ($mod =~ /,\**=/); 685 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; 686 } 687 else { "\t".$orig; } 688}; 689 690my $shrpd = sub { 691 my ($mod,$args) = @_; 692 my $orig = "shrpd$mod\t$args"; 693 694 if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14 695 { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4; 696 my $cpos=63-$3; 697 $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa 698 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; 699 } 700 elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/) # format 11 701 { sprintf "\t.WORD\t0x%08x\t; %s", 702 (0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig; 703 } 704 else { "\t".$orig; } 705}; 706 707my $depd = sub { 708 my ($mod,$args) = @_; 709 my $orig = "depd$mod\t$args"; 710 711 # I only have ",z" completer, it's implicitly encoded... 712 if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 16 713 { my $opcode=(0x3c<<26)|($4<<21)|($1<<16); 714 my $cpos=63-$2; 715 my $len=32-$3; 716 $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode pos 717 $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len 718 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; 719 } 720 else { "\t".$orig; } 721}; 722 723sub assemble { 724 my ($mnemonic,$mod,$args)=@_; 725 my $opcode = eval("\$$mnemonic"); 726 727 ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args"; 728} 729 730if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` 731 =~ /GNU assembler/) { 732 $gnuas = 1; 733} 734 735foreach (split("\n",$code)) { 736 s/\`([^\`]*)\`/eval $1/ge; 737 if ($SIZE_T==4) { 738 s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e; 739 s/cmpb,\*/comb,/; 740 s/,\*/,/; 741 } 742 743 s/(\.LEVEL\s+2\.0)W/$1w/ if ($gnuas && $SIZE_T==8); 744 s/\.SPACE\s+\$TEXT\$/.text/ if ($gnuas && $SIZE_T==8); 745 s/\.SUBSPA.*// if ($gnuas && $SIZE_T==8); 746 s/\bbv\b/bve/ if ($SIZE_T==8); 747 748 print $_,"\n"; 749} 750 751close STDOUT or die "error closing STDOUT: $!"; 752