1#! /usr/bin/env perl 2# Copyright 2009-2020 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the Apache License 2.0 (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9 10# ==================================================================== 11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12# project. The module is, however, dual licensed under OpenSSL and 13# CRYPTOGAMS licenses depending on where you obtain it. For further 14# details see http://www.openssl.org/~appro/cryptogams/. 15# ==================================================================== 16 17# SHA256/512 block procedure for PA-RISC. 18 19# June 2009. 20# 21# SHA256 performance is >75% better than gcc 3.2 generated code on 22# PA-7100LC. Compared to code generated by vendor compiler this 23# implementation is almost 70% faster in 64-bit build, but delivers 24# virtually same performance in 32-bit build on PA-8600. 25# 26# SHA512 performance is >2.9x better than gcc 3.2 generated code on 27# PA-7100LC, PA-RISC 1.1 processor. Then implementation detects if the 28# code is executed on PA-RISC 2.0 processor and switches to 64-bit 29# code path delivering adequate performance even in "blended" 32-bit 30# build. Though 64-bit code is not any faster than code generated by 31# vendor compiler on PA-8600... 32# 33# Special thanks to polarhome.com for providing HP-UX account. 34 35# $output is the last argument if it looks like a file (it has an extension) 36# $flavour is the first argument if it doesn't look like a file 37$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 38$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 39 40$output and open STDOUT,">$output"; 41 42if ($flavour =~ /64/) { 43 $LEVEL ="2.0W"; 44 $SIZE_T =8; 45 $FRAME_MARKER =80; 46 $SAVED_RP =16; 47 $PUSH ="std"; 48 $PUSHMA ="std,ma"; 49 $POP ="ldd"; 50 $POPMB ="ldd,mb"; 51} else { 52 $LEVEL ="1.0"; 53 $SIZE_T =4; 54 $FRAME_MARKER =48; 55 $SAVED_RP =20; 56 $PUSH ="stw"; 57 $PUSHMA ="stwm"; 58 $POP ="ldw"; 59 $POPMB ="ldwm"; 60} 61 62if ($output =~ /512/) { 63 $func="sha512_block_data_order"; 64 $SZ=8; 65 @Sigma0=(28,34,39); 66 @Sigma1=(14,18,41); 67 @sigma0=(1, 8, 7); 68 @sigma1=(19,61, 6); 69 $rounds=80; 70 $LAST10BITS=0x017; 71 $LD="ldd"; 72 $LDM="ldd,ma"; 73 $ST="std"; 74} else { 75 $func="sha256_block_data_order"; 76 $SZ=4; 77 @Sigma0=( 2,13,22); 78 @Sigma1=( 6,11,25); 79 @sigma0=( 7,18, 3); 80 @sigma1=(17,19,10); 81 $rounds=64; 82 $LAST10BITS=0x0f2; 83 $LD="ldw"; 84 $LDM="ldwm"; 85 $ST="stw"; 86} 87 88$FRAME=16*$SIZE_T+$FRAME_MARKER;# 16 saved regs + frame marker 89 # [+ argument transfer] 90$XOFF=16*$SZ+32; # local variables 91$FRAME+=$XOFF; 92$XOFF+=$FRAME_MARKER; # distance between %sp and local variables 93 94$ctx="%r26"; # zapped by $a0 95$inp="%r25"; # zapped by $a1 96$num="%r24"; # zapped by $t0 97 98$a0 ="%r26"; 99$a1 ="%r25"; 100$t0 ="%r24"; 101$t1 ="%r29"; 102$Tbl="%r31"; 103 104@V=($A,$B,$C,$D,$E,$F,$G,$H)=("%r17","%r18","%r19","%r20","%r21","%r22","%r23","%r28"); 105 106@X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8", 107 "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$inp); 108 109sub ROUND_00_15 { 110my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; 111$code.=<<___; 112 _ror $e,$Sigma1[0],$a0 113 and $f,$e,$t0 114 _ror $e,$Sigma1[1],$a1 115 addl $t1,$h,$h 116 andcm $g,$e,$t1 117 xor $a1,$a0,$a0 118 _ror $a1,`$Sigma1[2]-$Sigma1[1]`,$a1 119 or $t0,$t1,$t1 ; Ch(e,f,g) 120 addl @X[$i%16],$h,$h 121 xor $a0,$a1,$a1 ; Sigma1(e) 122 addl $t1,$h,$h 123 _ror $a,$Sigma0[0],$a0 124 addl $a1,$h,$h 125 126 _ror $a,$Sigma0[1],$a1 127 and $a,$b,$t0 128 and $a,$c,$t1 129 xor $a1,$a0,$a0 130 _ror $a1,`$Sigma0[2]-$Sigma0[1]`,$a1 131 xor $t1,$t0,$t0 132 and $b,$c,$t1 133 xor $a0,$a1,$a1 ; Sigma0(a) 134 addl $h,$d,$d 135 xor $t1,$t0,$t0 ; Maj(a,b,c) 136 `"$LDM $SZ($Tbl),$t1" if ($i<15)` 137 addl $a1,$h,$h 138 addl $t0,$h,$h 139 140___ 141} 142 143sub ROUND_16_xx { 144my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; 145$i-=16; 146$code.=<<___; 147 _ror @X[($i+1)%16],$sigma0[0],$a0 148 _ror @X[($i+1)%16],$sigma0[1],$a1 149 addl @X[($i+9)%16],@X[$i],@X[$i] 150 _ror @X[($i+14)%16],$sigma1[0],$t0 151 _ror @X[($i+14)%16],$sigma1[1],$t1 152 xor $a1,$a0,$a0 153 _shr @X[($i+1)%16],$sigma0[2],$a1 154 xor $t1,$t0,$t0 155 _shr @X[($i+14)%16],$sigma1[2],$t1 156 xor $a1,$a0,$a0 ; sigma0(X[(i+1)&0x0f]) 157 xor $t1,$t0,$t0 ; sigma1(X[(i+14)&0x0f]) 158 $LDM $SZ($Tbl),$t1 159 addl $a0,@X[$i],@X[$i] 160 addl $t0,@X[$i],@X[$i] 161___ 162$code.=<<___ if ($i==15); 163 extru $t1,31,10,$a1 164 comiclr,<> $LAST10BITS,$a1,%r0 165 ldo 1($Tbl),$Tbl ; signal end of $Tbl 166___ 167&ROUND_00_15($i+16,$a,$b,$c,$d,$e,$f,$g,$h); 168} 169 170$code=<<___; 171 .LEVEL $LEVEL 172 .SPACE \$TEXT\$ 173 .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY 174 175 .ALIGN 64 176L\$table 177___ 178$code.=<<___ if ($SZ==8); 179 .WORD 0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd 180 .WORD 0xb5c0fbcf,0xec4d3b2f,0xe9b5dba5,0x8189dbbc 181 .WORD 0x3956c25b,0xf348b538,0x59f111f1,0xb605d019 182 .WORD 0x923f82a4,0xaf194f9b,0xab1c5ed5,0xda6d8118 183 .WORD 0xd807aa98,0xa3030242,0x12835b01,0x45706fbe 184 .WORD 0x243185be,0x4ee4b28c,0x550c7dc3,0xd5ffb4e2 185 .WORD 0x72be5d74,0xf27b896f,0x80deb1fe,0x3b1696b1 186 .WORD 0x9bdc06a7,0x25c71235,0xc19bf174,0xcf692694 187 .WORD 0xe49b69c1,0x9ef14ad2,0xefbe4786,0x384f25e3 188 .WORD 0x0fc19dc6,0x8b8cd5b5,0x240ca1cc,0x77ac9c65 189 .WORD 0x2de92c6f,0x592b0275,0x4a7484aa,0x6ea6e483 190 .WORD 0x5cb0a9dc,0xbd41fbd4,0x76f988da,0x831153b5 191 .WORD 0x983e5152,0xee66dfab,0xa831c66d,0x2db43210 192 .WORD 0xb00327c8,0x98fb213f,0xbf597fc7,0xbeef0ee4 193 .WORD 0xc6e00bf3,0x3da88fc2,0xd5a79147,0x930aa725 194 .WORD 0x06ca6351,0xe003826f,0x14292967,0x0a0e6e70 195 .WORD 0x27b70a85,0x46d22ffc,0x2e1b2138,0x5c26c926 196 .WORD 0x4d2c6dfc,0x5ac42aed,0x53380d13,0x9d95b3df 197 .WORD 0x650a7354,0x8baf63de,0x766a0abb,0x3c77b2a8 198 .WORD 0x81c2c92e,0x47edaee6,0x92722c85,0x1482353b 199 .WORD 0xa2bfe8a1,0x4cf10364,0xa81a664b,0xbc423001 200 .WORD 0xc24b8b70,0xd0f89791,0xc76c51a3,0x0654be30 201 .WORD 0xd192e819,0xd6ef5218,0xd6990624,0x5565a910 202 .WORD 0xf40e3585,0x5771202a,0x106aa070,0x32bbd1b8 203 .WORD 0x19a4c116,0xb8d2d0c8,0x1e376c08,0x5141ab53 204 .WORD 0x2748774c,0xdf8eeb99,0x34b0bcb5,0xe19b48a8 205 .WORD 0x391c0cb3,0xc5c95a63,0x4ed8aa4a,0xe3418acb 206 .WORD 0x5b9cca4f,0x7763e373,0x682e6ff3,0xd6b2b8a3 207 .WORD 0x748f82ee,0x5defb2fc,0x78a5636f,0x43172f60 208 .WORD 0x84c87814,0xa1f0ab72,0x8cc70208,0x1a6439ec 209 .WORD 0x90befffa,0x23631e28,0xa4506ceb,0xde82bde9 210 .WORD 0xbef9a3f7,0xb2c67915,0xc67178f2,0xe372532b 211 .WORD 0xca273ece,0xea26619c,0xd186b8c7,0x21c0c207 212 .WORD 0xeada7dd6,0xcde0eb1e,0xf57d4f7f,0xee6ed178 213 .WORD 0x06f067aa,0x72176fba,0x0a637dc5,0xa2c898a6 214 .WORD 0x113f9804,0xbef90dae,0x1b710b35,0x131c471b 215 .WORD 0x28db77f5,0x23047d84,0x32caab7b,0x40c72493 216 .WORD 0x3c9ebe0a,0x15c9bebc,0x431d67c4,0x9c100d4c 217 .WORD 0x4cc5d4be,0xcb3e42b6,0x597f299c,0xfc657e2a 218 .WORD 0x5fcb6fab,0x3ad6faec,0x6c44198c,0x4a475817 219___ 220$code.=<<___ if ($SZ==4); 221 .WORD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 222 .WORD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 223 .WORD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 224 .WORD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 225 .WORD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc 226 .WORD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da 227 .WORD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 228 .WORD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 229 .WORD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 230 .WORD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 231 .WORD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 232 .WORD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 233 .WORD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 234 .WORD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 235 .WORD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 236 .WORD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 237___ 238$code.=<<___; 239 240 .EXPORT $func,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR 241 .ALIGN 64 242$func 243 .PROC 244 .CALLINFO FRAME=`$FRAME-16*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=18 245 .ENTRY 246 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue 247 $PUSHMA %r3,$FRAME(%sp) 248 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) 249 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) 250 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) 251 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp) 252 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp) 253 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp) 254 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp) 255 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp) 256 $PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp) 257 $PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp) 258 $PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp) 259 $PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp) 260 $PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp) 261 $PUSH %r17,`-$FRAME+14*$SIZE_T`(%sp) 262 $PUSH %r18,`-$FRAME+15*$SIZE_T`(%sp) 263 264 _shl $num,`log(16*$SZ)/log(2)`,$num 265 addl $inp,$num,$num ; $num to point at the end of $inp 266 267 $PUSH $num,`-$FRAME_MARKER-4*$SIZE_T`(%sp) ; save arguments 268 $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) 269 $PUSH $ctx,`-$FRAME_MARKER-2*$SIZE_T`(%sp) 270 271 blr %r0,$Tbl 272 ldi 3,$t1 273L\$pic 274 andcm $Tbl,$t1,$Tbl ; wipe privilege level 275 ldo L\$table-L\$pic($Tbl),$Tbl 276___ 277$code.=<<___ if ($SZ==8 && $SIZE_T==4); 278 ldi 31,$t1 279 mtctl $t1,%cr11 280 extrd,u,*= $t1,%sar,1,$t1 ; executes on PA-RISC 1.0 281 b L\$parisc1 282 nop 283___ 284$code.=<<___; 285 $LD `0*$SZ`($ctx),$A ; load context 286 $LD `1*$SZ`($ctx),$B 287 $LD `2*$SZ`($ctx),$C 288 $LD `3*$SZ`($ctx),$D 289 $LD `4*$SZ`($ctx),$E 290 $LD `5*$SZ`($ctx),$F 291 $LD `6*$SZ`($ctx),$G 292 $LD `7*$SZ`($ctx),$H 293 294 extru $inp,31,`log($SZ)/log(2)`,$t0 295 sh3addl $t0,%r0,$t0 296 subi `8*$SZ`,$t0,$t0 297 mtctl $t0,%cr11 ; load %sar with align factor 298 299L\$oop 300 ldi `$SZ-1`,$t0 301 $LDM $SZ($Tbl),$t1 302 andcm $inp,$t0,$t0 ; align $inp 303___ 304 for ($i=0;$i<15;$i++) { # load input block 305 $code.="\t$LD `$SZ*$i`($t0),@X[$i]\n"; } 306$code.=<<___; 307 cmpb,*= $inp,$t0,L\$aligned 308 $LD `$SZ*15`($t0),@X[15] 309 $LD `$SZ*16`($t0),@X[16] 310___ 311 for ($i=0;$i<16;$i++) { # align data 312 $code.="\t_align @X[$i],@X[$i+1],@X[$i]\n"; } 313$code.=<<___; 314L\$aligned 315 nop ; otherwise /usr/ccs/bin/as is confused by below .WORD 316___ 317 318for($i=0;$i<16;$i++) { &ROUND_00_15($i,@V); unshift(@V,pop(@V)); } 319$code.=<<___; 320L\$rounds 321 nop ; otherwise /usr/ccs/bin/as is confused by below .WORD 322___ 323for(;$i<32;$i++) { &ROUND_16_xx($i,@V); unshift(@V,pop(@V)); } 324$code.=<<___; 325 bb,>= $Tbl,31,L\$rounds ; end of $Tbl signalled? 326 nop 327 328 $POP `-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx ; restore arguments 329 $POP `-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp 330 $POP `-$FRAME_MARKER-4*$SIZE_T`(%sp),$num 331 ldo `-$rounds*$SZ-1`($Tbl),$Tbl ; rewind $Tbl 332 333 $LD `0*$SZ`($ctx),@X[0] ; load context 334 $LD `1*$SZ`($ctx),@X[1] 335 $LD `2*$SZ`($ctx),@X[2] 336 $LD `3*$SZ`($ctx),@X[3] 337 $LD `4*$SZ`($ctx),@X[4] 338 $LD `5*$SZ`($ctx),@X[5] 339 addl @X[0],$A,$A 340 $LD `6*$SZ`($ctx),@X[6] 341 addl @X[1],$B,$B 342 $LD `7*$SZ`($ctx),@X[7] 343 ldo `16*$SZ`($inp),$inp ; advance $inp 344 345 $ST $A,`0*$SZ`($ctx) ; save context 346 addl @X[2],$C,$C 347 $ST $B,`1*$SZ`($ctx) 348 addl @X[3],$D,$D 349 $ST $C,`2*$SZ`($ctx) 350 addl @X[4],$E,$E 351 $ST $D,`3*$SZ`($ctx) 352 addl @X[5],$F,$F 353 $ST $E,`4*$SZ`($ctx) 354 addl @X[6],$G,$G 355 $ST $F,`5*$SZ`($ctx) 356 addl @X[7],$H,$H 357 $ST $G,`6*$SZ`($ctx) 358 $ST $H,`7*$SZ`($ctx) 359 360 cmpb,*<>,n $inp,$num,L\$oop 361 $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) ; save $inp 362___ 363if ($SZ==8 && $SIZE_T==4) # SHA512 for 32-bit PA-RISC 1.0 364{{ 365$code.=<<___; 366 b L\$done 367 nop 368 369 .ALIGN 64 370L\$parisc1 371___ 372 373@V=( $Ahi, $Alo, $Bhi, $Blo, $Chi, $Clo, $Dhi, $Dlo, 374 $Ehi, $Elo, $Fhi, $Flo, $Ghi, $Glo, $Hhi, $Hlo) = 375 ( "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8", 376 "%r9","%r10","%r11","%r12","%r13","%r14","%r15","%r16"); 377$a0 ="%r17"; 378$a1 ="%r18"; 379$a2 ="%r19"; 380$a3 ="%r20"; 381$t0 ="%r21"; 382$t1 ="%r22"; 383$t2 ="%r28"; 384$t3 ="%r29"; 385$Tbl="%r31"; 386 387@X=("%r23","%r24","%r25","%r26"); # zaps $num,$inp,$ctx 388 389sub ROUND_00_15_pa1 { 390my ($i,$ahi,$alo,$bhi,$blo,$chi,$clo,$dhi,$dlo, 391 $ehi,$elo,$fhi,$flo,$ghi,$glo,$hhi,$hlo,$flag)=@_; 392my ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X; 393 394$code.=<<___ if (!$flag); 395 ldw `-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi 396 ldw `-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo ; load X[i+1] 397___ 398$code.=<<___; 399 shd $ehi,$elo,$Sigma1[0],$t0 400 add $Xlo,$hlo,$hlo 401 shd $elo,$ehi,$Sigma1[0],$t1 402 addc $Xhi,$hhi,$hhi ; h += X[i] 403 shd $ehi,$elo,$Sigma1[1],$t2 404 ldwm 8($Tbl),$Xhi 405 shd $elo,$ehi,$Sigma1[1],$t3 406 ldw -4($Tbl),$Xlo ; load K[i] 407 xor $t2,$t0,$t0 408 xor $t3,$t1,$t1 409 and $flo,$elo,$a0 410 and $fhi,$ehi,$a1 411 shd $ehi,$elo,$Sigma1[2],$t2 412 andcm $glo,$elo,$a2 413 shd $elo,$ehi,$Sigma1[2],$t3 414 andcm $ghi,$ehi,$a3 415 xor $t2,$t0,$t0 416 xor $t3,$t1,$t1 ; Sigma1(e) 417 add $Xlo,$hlo,$hlo 418 xor $a2,$a0,$a0 419 addc $Xhi,$hhi,$hhi ; h += K[i] 420 xor $a3,$a1,$a1 ; Ch(e,f,g) 421 422 add $t0,$hlo,$hlo 423 shd $ahi,$alo,$Sigma0[0],$t0 424 addc $t1,$hhi,$hhi ; h += Sigma1(e) 425 shd $alo,$ahi,$Sigma0[0],$t1 426 add $a0,$hlo,$hlo 427 shd $ahi,$alo,$Sigma0[1],$t2 428 addc $a1,$hhi,$hhi ; h += Ch(e,f,g) 429 shd $alo,$ahi,$Sigma0[1],$t3 430 431 xor $t2,$t0,$t0 432 xor $t3,$t1,$t1 433 shd $ahi,$alo,$Sigma0[2],$t2 434 and $alo,$blo,$a0 435 shd $alo,$ahi,$Sigma0[2],$t3 436 and $ahi,$bhi,$a1 437 xor $t2,$t0,$t0 438 xor $t3,$t1,$t1 ; Sigma0(a) 439 440 and $alo,$clo,$a2 441 and $ahi,$chi,$a3 442 xor $a2,$a0,$a0 443 add $hlo,$dlo,$dlo 444 xor $a3,$a1,$a1 445 addc $hhi,$dhi,$dhi ; d += h 446 and $blo,$clo,$a2 447 add $t0,$hlo,$hlo 448 and $bhi,$chi,$a3 449 addc $t1,$hhi,$hhi ; h += Sigma0(a) 450 xor $a2,$a0,$a0 451 add $a0,$hlo,$hlo 452 xor $a3,$a1,$a1 ; Maj(a,b,c) 453 addc $a1,$hhi,$hhi ; h += Maj(a,b,c) 454 455___ 456$code.=<<___ if ($i==15 && $flag); 457 extru $Xlo,31,10,$Xlo 458 comiclr,= $LAST10BITS,$Xlo,%r0 459 b L\$rounds_pa1 460 nop 461___ 462push(@X,shift(@X)); push(@X,shift(@X)); 463} 464 465sub ROUND_16_xx_pa1 { 466my ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X; 467my ($i)=shift; 468$i-=16; 469$code.=<<___; 470 ldw `-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi 471 ldw `-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo ; load X[i+1] 472 ldw `-$XOFF+8*(($i+9)%16)`(%sp),$a1 473 ldw `-$XOFF+8*(($i+9)%16)+4`(%sp),$a0 ; load X[i+9] 474 ldw `-$XOFF+8*(($i+14)%16)`(%sp),$a3 475 ldw `-$XOFF+8*(($i+14)%16)+4`(%sp),$a2 ; load X[i+14] 476 shd $Xnhi,$Xnlo,$sigma0[0],$t0 477 shd $Xnlo,$Xnhi,$sigma0[0],$t1 478 add $a0,$Xlo,$Xlo 479 shd $Xnhi,$Xnlo,$sigma0[1],$t2 480 addc $a1,$Xhi,$Xhi 481 shd $Xnlo,$Xnhi,$sigma0[1],$t3 482 xor $t2,$t0,$t0 483 shd $Xnhi,$Xnlo,$sigma0[2],$t2 484 xor $t3,$t1,$t1 485 extru $Xnhi,`31-$sigma0[2]`,`32-$sigma0[2]`,$t3 486 xor $t2,$t0,$t0 487 shd $a3,$a2,$sigma1[0],$a0 488 xor $t3,$t1,$t1 ; sigma0(X[i+1)&0x0f]) 489 shd $a2,$a3,$sigma1[0],$a1 490 add $t0,$Xlo,$Xlo 491 shd $a3,$a2,$sigma1[1],$t2 492 addc $t1,$Xhi,$Xhi 493 shd $a2,$a3,$sigma1[1],$t3 494 xor $t2,$a0,$a0 495 shd $a3,$a2,$sigma1[2],$t2 496 xor $t3,$a1,$a1 497 extru $a3,`31-$sigma1[2]`,`32-$sigma1[2]`,$t3 498 xor $t2,$a0,$a0 499 xor $t3,$a1,$a1 ; sigma0(X[i+14)&0x0f]) 500 add $a0,$Xlo,$Xlo 501 addc $a1,$Xhi,$Xhi 502 503 stw $Xhi,`-$XOFF+8*($i%16)`(%sp) 504 stw $Xlo,`-$XOFF+8*($i%16)+4`(%sp) 505___ 506&ROUND_00_15_pa1($i,@_,1); 507} 508$code.=<<___; 509 ldw `0*4`($ctx),$Ahi ; load context 510 ldw `1*4`($ctx),$Alo 511 ldw `2*4`($ctx),$Bhi 512 ldw `3*4`($ctx),$Blo 513 ldw `4*4`($ctx),$Chi 514 ldw `5*4`($ctx),$Clo 515 ldw `6*4`($ctx),$Dhi 516 ldw `7*4`($ctx),$Dlo 517 ldw `8*4`($ctx),$Ehi 518 ldw `9*4`($ctx),$Elo 519 ldw `10*4`($ctx),$Fhi 520 ldw `11*4`($ctx),$Flo 521 ldw `12*4`($ctx),$Ghi 522 ldw `13*4`($ctx),$Glo 523 ldw `14*4`($ctx),$Hhi 524 ldw `15*4`($ctx),$Hlo 525 526 extru $inp,31,2,$t0 527 sh3addl $t0,%r0,$t0 528 subi 32,$t0,$t0 529 mtctl $t0,%cr11 ; load %sar with align factor 530 531L\$oop_pa1 532 extru $inp,31,2,$a3 533 comib,= 0,$a3,L\$aligned_pa1 534 sub $inp,$a3,$inp 535 536 ldw `0*4`($inp),$X[0] 537 ldw `1*4`($inp),$X[1] 538 ldw `2*4`($inp),$t2 539 ldw `3*4`($inp),$t3 540 ldw `4*4`($inp),$a0 541 ldw `5*4`($inp),$a1 542 ldw `6*4`($inp),$a2 543 ldw `7*4`($inp),$a3 544 vshd $X[0],$X[1],$X[0] 545 vshd $X[1],$t2,$X[1] 546 stw $X[0],`-$XOFF+0*4`(%sp) 547 ldw `8*4`($inp),$t0 548 vshd $t2,$t3,$t2 549 stw $X[1],`-$XOFF+1*4`(%sp) 550 ldw `9*4`($inp),$t1 551 vshd $t3,$a0,$t3 552___ 553{ 554my @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1); 555for ($i=2;$i<=(128/4-8);$i++) { 556$code.=<<___; 557 stw $t[0],`-$XOFF+$i*4`(%sp) 558 ldw `(8+$i)*4`($inp),$t[0] 559 vshd $t[1],$t[2],$t[1] 560___ 561push(@t,shift(@t)); 562} 563for (;$i<(128/4-1);$i++) { 564$code.=<<___; 565 stw $t[0],`-$XOFF+$i*4`(%sp) 566 vshd $t[1],$t[2],$t[1] 567___ 568push(@t,shift(@t)); 569} 570$code.=<<___; 571 b L\$collected_pa1 572 stw $t[0],`-$XOFF+$i*4`(%sp) 573 574___ 575} 576$code.=<<___; 577L\$aligned_pa1 578 ldw `0*4`($inp),$X[0] 579 ldw `1*4`($inp),$X[1] 580 ldw `2*4`($inp),$t2 581 ldw `3*4`($inp),$t3 582 ldw `4*4`($inp),$a0 583 ldw `5*4`($inp),$a1 584 ldw `6*4`($inp),$a2 585 ldw `7*4`($inp),$a3 586 stw $X[0],`-$XOFF+0*4`(%sp) 587 ldw `8*4`($inp),$t0 588 stw $X[1],`-$XOFF+1*4`(%sp) 589 ldw `9*4`($inp),$t1 590___ 591{ 592my @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1); 593for ($i=2;$i<(128/4-8);$i++) { 594$code.=<<___; 595 stw $t[0],`-$XOFF+$i*4`(%sp) 596 ldw `(8+$i)*4`($inp),$t[0] 597___ 598push(@t,shift(@t)); 599} 600for (;$i<128/4;$i++) { 601$code.=<<___; 602 stw $t[0],`-$XOFF+$i*4`(%sp) 603___ 604push(@t,shift(@t)); 605} 606$code.="L\$collected_pa1\n"; 607} 608 609for($i=0;$i<16;$i++) { &ROUND_00_15_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); } 610$code.="L\$rounds_pa1\n"; 611for(;$i<32;$i++) { &ROUND_16_xx_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); } 612 613$code.=<<___; 614 $POP `-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx ; restore arguments 615 $POP `-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp 616 $POP `-$FRAME_MARKER-4*$SIZE_T`(%sp),$num 617 ldo `-$rounds*$SZ`($Tbl),$Tbl ; rewind $Tbl 618 619 ldw `0*4`($ctx),$t1 ; update context 620 ldw `1*4`($ctx),$t0 621 ldw `2*4`($ctx),$t3 622 ldw `3*4`($ctx),$t2 623 ldw `4*4`($ctx),$a1 624 ldw `5*4`($ctx),$a0 625 ldw `6*4`($ctx),$a3 626 add $t0,$Alo,$Alo 627 ldw `7*4`($ctx),$a2 628 addc $t1,$Ahi,$Ahi 629 ldw `8*4`($ctx),$t1 630 add $t2,$Blo,$Blo 631 ldw `9*4`($ctx),$t0 632 addc $t3,$Bhi,$Bhi 633 ldw `10*4`($ctx),$t3 634 add $a0,$Clo,$Clo 635 ldw `11*4`($ctx),$t2 636 addc $a1,$Chi,$Chi 637 ldw `12*4`($ctx),$a1 638 add $a2,$Dlo,$Dlo 639 ldw `13*4`($ctx),$a0 640 addc $a3,$Dhi,$Dhi 641 ldw `14*4`($ctx),$a3 642 add $t0,$Elo,$Elo 643 ldw `15*4`($ctx),$a2 644 addc $t1,$Ehi,$Ehi 645 stw $Ahi,`0*4`($ctx) 646 add $t2,$Flo,$Flo 647 stw $Alo,`1*4`($ctx) 648 addc $t3,$Fhi,$Fhi 649 stw $Bhi,`2*4`($ctx) 650 add $a0,$Glo,$Glo 651 stw $Blo,`3*4`($ctx) 652 addc $a1,$Ghi,$Ghi 653 stw $Chi,`4*4`($ctx) 654 add $a2,$Hlo,$Hlo 655 stw $Clo,`5*4`($ctx) 656 addc $a3,$Hhi,$Hhi 657 stw $Dhi,`6*4`($ctx) 658 ldo `16*$SZ`($inp),$inp ; advance $inp 659 stw $Dlo,`7*4`($ctx) 660 stw $Ehi,`8*4`($ctx) 661 stw $Elo,`9*4`($ctx) 662 stw $Fhi,`10*4`($ctx) 663 stw $Flo,`11*4`($ctx) 664 stw $Ghi,`12*4`($ctx) 665 stw $Glo,`13*4`($ctx) 666 stw $Hhi,`14*4`($ctx) 667 comb,= $inp,$num,L\$done 668 stw $Hlo,`15*4`($ctx) 669 b L\$oop_pa1 670 $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) ; save $inp 671L\$done 672___ 673}} 674$code.=<<___; 675 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue 676 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 677 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 678 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 679 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7 680 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8 681 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9 682 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10 683 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11 684 $POP `-$FRAME+9*$SIZE_T`(%sp),%r12 685 $POP `-$FRAME+10*$SIZE_T`(%sp),%r13 686 $POP `-$FRAME+11*$SIZE_T`(%sp),%r14 687 $POP `-$FRAME+12*$SIZE_T`(%sp),%r15 688 $POP `-$FRAME+13*$SIZE_T`(%sp),%r16 689 $POP `-$FRAME+14*$SIZE_T`(%sp),%r17 690 $POP `-$FRAME+15*$SIZE_T`(%sp),%r18 691 bv (%r2) 692 .EXIT 693 $POPMB -$FRAME(%sp),%r3 694 .PROCEND 695 .STRINGZ "SHA`64*$SZ` block transform for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>" 696___ 697 698# Explicitly encode PA-RISC 2.0 instructions used in this module, so 699# that it can be compiled with .LEVEL 1.0. It should be noted that I 700# wouldn't have to do this, if GNU assembler understood .ALLOW 2.0 701# directive... 702 703my $ldd = sub { 704 my ($mod,$args) = @_; 705 my $orig = "ldd$mod\t$args"; 706 707 if ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 3 suffices 708 { my $opcode=(0x14<<26)|($2<<21)|($3<<16)|(($1&0x1FF8)<<1)|(($1>>13)&1); 709 $opcode|=(1<<3) if ($mod =~ /^,m/); 710 $opcode|=(1<<2) if ($mod =~ /^,mb/); 711 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; 712 } 713 else { "\t".$orig; } 714}; 715 716my $std = sub { 717 my ($mod,$args) = @_; 718 my $orig = "std$mod\t$args"; 719 720 if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices 721 { my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1); 722 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; 723 } 724 else { "\t".$orig; } 725}; 726 727my $extrd = sub { 728 my ($mod,$args) = @_; 729 my $orig = "extrd$mod\t$args"; 730 731 # I only have ",u" completer, it's implicitly encoded... 732 if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15 733 { my $opcode=(0x36<<26)|($1<<21)|($4<<16); 734 my $len=32-$3; 735 $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos 736 $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len 737 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; 738 } 739 elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12 740 { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9); 741 my $len=32-$2; 742 $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len 743 $opcode |= (1<<13) if ($mod =~ /,\**=/); 744 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; 745 } 746 else { "\t".$orig; } 747}; 748 749my $shrpd = sub { 750 my ($mod,$args) = @_; 751 my $orig = "shrpd$mod\t$args"; 752 753 if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14 754 { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4; 755 my $cpos=63-$3; 756 $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa 757 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; 758 } 759 elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/) # format 11 760 { sprintf "\t.WORD\t0x%08x\t; %s", 761 (0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig; 762 } 763 else { "\t".$orig; } 764}; 765 766sub assemble { 767 my ($mnemonic,$mod,$args)=@_; 768 my $opcode = eval("\$$mnemonic"); 769 770 ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args"; 771} 772 773if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` 774 =~ /GNU assembler/) { 775 $gnuas = 1; 776} 777 778foreach (split("\n",$code)) { 779 s/\`([^\`]*)\`/eval $1/ge; 780 781 s/shd\s+(%r[0-9]+),(%r[0-9]+),([0-9]+)/ 782 $3>31 ? sprintf("shd\t%$2,%$1,%d",$3-32) # rotation for >=32 783 : sprintf("shd\t%$1,%$2,%d",$3)/e or 784 # translate made up instructions: _ror, _shr, _align, _shl 785 s/_ror(\s+)(%r[0-9]+),/ 786 ($SZ==4 ? "shd" : "shrpd")."$1$2,$2,"/e or 787 788 s/_shr(\s+%r[0-9]+),([0-9]+),/ 789 $SZ==4 ? sprintf("extru%s,%d,%d,",$1,31-$2,32-$2) 790 : sprintf("extrd,u%s,%d,%d,",$1,63-$2,64-$2)/e or 791 792 s/_align(\s+%r[0-9]+,%r[0-9]+),/ 793 ($SZ==4 ? "vshd$1," : "shrpd$1,%sar,")/e or 794 795 s/_shl(\s+%r[0-9]+),([0-9]+),/ 796 $SIZE_T==4 ? sprintf("zdep%s,%d,%d,",$1,31-$2,32-$2) 797 : sprintf("depd,z%s,%d,%d,",$1,63-$2,64-$2)/e; 798 799 s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e if ($SIZE_T==4); 800 801 s/(\.LEVEL\s+2\.0)W/$1w/ if ($gnuas && $SIZE_T==8); 802 s/\.SPACE\s+\$TEXT\$/.text/ if ($gnuas && $SIZE_T==8); 803 s/\.SUBSPA.*// if ($gnuas && $SIZE_T==8); 804 s/cmpb,\*/comb,/ if ($SIZE_T==4); 805 s/\bbv\b/bve/ if ($SIZE_T==8); 806 807 print $_,"\n"; 808} 809 810close STDOUT or die "error closing STDOUT: $!"; 811