1#! /usr/bin/env perl 2# Copyright 2009-2020 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the Apache License 2.0 (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9 10# ==================================================================== 11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12# project. The module is, however, dual licensed under OpenSSL and 13# CRYPTOGAMS licenses depending on where you obtain it. For further 14# details see http://www.openssl.org/~appro/cryptogams/. 15# ==================================================================== 16 17# RC4 for PA-RISC. 18 19# June 2009. 20# 21# Performance is 33% better than gcc 3.2 generated code on PA-7100LC. 22# For reference, [4x] unrolled loop is >40% faster than folded one. 23# It's possible to unroll loop 8 times on PA-RISC 2.0, but improvement 24# is believed to be not sufficient to justify the effort... 25# 26# Special thanks to polarhome.com for providing HP-UX account. 27 28$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 29 30# $output is the last argument if it looks like a file (it has an extension) 31# $flavour is the first argument if it doesn't look like a file 32$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 33$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 34 35$output and open STDOUT,">$output"; 36 37if ($flavour =~ /64/) { 38 $LEVEL ="2.0W"; 39 $SIZE_T =8; 40 $FRAME_MARKER =80; 41 $SAVED_RP =16; 42 $PUSH ="std"; 43 $PUSHMA ="std,ma"; 44 $POP ="ldd"; 45 $POPMB ="ldd,mb"; 46} else { 47 $LEVEL ="1.0"; 48 $SIZE_T =4; 49 $FRAME_MARKER =48; 50 $SAVED_RP =20; 51 $PUSH ="stw"; 52 $PUSHMA ="stwm"; 53 $POP ="ldw"; 54 $POPMB ="ldwm"; 55} 56 57$FRAME=4*$SIZE_T+$FRAME_MARKER; # 4 saved regs + frame marker 58 # [+ argument transfer] 59$SZ=1; # defaults to RC4_CHAR 60if (open CONF,"<${dir}../../opensslconf.h") { 61 while(<CONF>) { 62 if (m/#\s*define\s+RC4_INT\s+(.*)/) { 63 $SZ = ($1=~/char$/) ? 1 : 4; 64 last; 65 } 66 } 67 close CONF; 68} 69 70if ($SZ==1) { # RC4_CHAR 71 $LD="ldb"; 72 $LDX="ldbx"; 73 $MKX="addl"; 74 $ST="stb"; 75} else { # RC4_INT (~5% faster than RC4_CHAR on PA-7100LC) 76 $LD="ldw"; 77 $LDX="ldwx,s"; 78 $MKX="sh2addl"; 79 $ST="stw"; 80} 81 82$key="%r26"; 83$len="%r25"; 84$inp="%r24"; 85$out="%r23"; 86 87@XX=("%r19","%r20"); 88@TX=("%r21","%r22"); 89$YY="%r28"; 90$TY="%r29"; 91 92$acc="%r1"; 93$ix="%r2"; 94$iy="%r3"; 95$dat0="%r4"; 96$dat1="%r5"; 97$rem="%r6"; 98$mask="%r31"; 99 100sub unrolledloopbody { 101for ($i=0;$i<4;$i++) { 102$code.=<<___; 103 ldo 1($XX[0]),$XX[1] 104 `sprintf("$LDX %$TY(%$key),%$dat1") if ($i>0)` 105 and $mask,$XX[1],$XX[1] 106 $LDX $YY($key),$TY 107 $MKX $YY,$key,$ix 108 $LDX $XX[1]($key),$TX[1] 109 $MKX $XX[0],$key,$iy 110 $ST $TX[0],0($ix) 111 comclr,<> $XX[1],$YY,%r0 ; conditional 112 copy $TX[0],$TX[1] ; move 113 `sprintf("%sdep %$dat1,%d,8,%$acc",$i==1?"z":"",8*($i-1)+7) if ($i>0)` 114 $ST $TY,0($iy) 115 addl $TX[0],$TY,$TY 116 addl $TX[1],$YY,$YY 117 and $mask,$TY,$TY 118 and $mask,$YY,$YY 119___ 120push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers 121} } 122 123sub foldedloop { 124my ($label,$count)=@_; 125$code.=<<___; 126$label 127 $MKX $YY,$key,$iy 128 $LDX $YY($key),$TY 129 $MKX $XX[0],$key,$ix 130 $ST $TX[0],0($iy) 131 ldo 1($XX[0]),$XX[0] 132 $ST $TY,0($ix) 133 addl $TX[0],$TY,$TY 134 ldbx $inp($out),$dat1 135 and $mask,$TY,$TY 136 and $mask,$XX[0],$XX[0] 137 $LDX $TY($key),$acc 138 $LDX $XX[0]($key),$TX[0] 139 ldo 1($out),$out 140 xor $dat1,$acc,$acc 141 addl $TX[0],$YY,$YY 142 stb $acc,-1($out) 143 addib,<> -1,$count,$label ; $count is always small 144 and $mask,$YY,$YY 145___ 146} 147 148$code=<<___; 149 .LEVEL $LEVEL 150 .SPACE \$TEXT\$ 151 .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY 152 153 .EXPORT RC4,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR 154RC4 155 .PROC 156 .CALLINFO FRAME=`$FRAME-4*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=6 157 .ENTRY 158 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue 159 $PUSHMA %r3,$FRAME(%sp) 160 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) 161 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) 162 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) 163 164 cmpib,*= 0,$len,L\$abort 165 sub $inp,$out,$inp ; distance between $inp and $out 166 167 $LD `0*$SZ`($key),$XX[0] 168 $LD `1*$SZ`($key),$YY 169 ldo `2*$SZ`($key),$key 170 171 ldi 0xff,$mask 172 ldi 3,$dat0 173 174 ldo 1($XX[0]),$XX[0] ; warm up loop 175 and $mask,$XX[0],$XX[0] 176 $LDX $XX[0]($key),$TX[0] 177 addl $TX[0],$YY,$YY 178 cmpib,*>>= 6,$len,L\$oop1 ; is $len large enough to bother? 179 and $mask,$YY,$YY 180 181 and,<> $out,$dat0,$rem ; is $out aligned? 182 b L\$alignedout 183 subi 4,$rem,$rem 184 sub $len,$rem,$len 185___ 186&foldedloop("L\$alignout",$rem); # process till $out is aligned 187 188$code.=<<___; 189L\$alignedout ; $len is at least 4 here 190 and,<> $inp,$dat0,$acc ; is $inp aligned? 191 b L\$oop4 192 sub $inp,$acc,$rem ; align $inp 193 194 sh3addl $acc,%r0,$acc 195 subi 32,$acc,$acc 196 mtctl $acc,%cr11 ; load %sar with vshd align factor 197 ldwx $rem($out),$dat0 198 ldo 4($rem),$rem 199L\$oop4misalignedinp 200___ 201&unrolledloopbody(); 202$code.=<<___; 203 $LDX $TY($key),$ix 204 ldwx $rem($out),$dat1 205 ldo -4($len),$len 206 or $ix,$acc,$acc ; last piece, no need to dep 207 vshd $dat0,$dat1,$iy ; align data 208 copy $dat1,$dat0 209 xor $iy,$acc,$acc 210 stw $acc,0($out) 211 cmpib,*<< 3,$len,L\$oop4misalignedinp 212 ldo 4($out),$out 213 cmpib,*= 0,$len,L\$done 214 nop 215 b L\$oop1 216 nop 217 218 .ALIGN 8 219L\$oop4 220___ 221&unrolledloopbody(); 222$code.=<<___; 223 $LDX $TY($key),$ix 224 ldwx $inp($out),$dat0 225 ldo -4($len),$len 226 or $ix,$acc,$acc ; last piece, no need to dep 227 xor $dat0,$acc,$acc 228 stw $acc,0($out) 229 cmpib,*<< 3,$len,L\$oop4 230 ldo 4($out),$out 231 cmpib,*= 0,$len,L\$done 232 nop 233___ 234&foldedloop("L\$oop1",$len); 235$code.=<<___; 236L\$done 237 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 238 ldo -1($XX[0]),$XX[0] ; chill out loop 239 sub $YY,$TX[0],$YY 240 and $mask,$XX[0],$XX[0] 241 and $mask,$YY,$YY 242 $ST $XX[0],`-2*$SZ`($key) 243 $ST $YY,`-1*$SZ`($key) 244 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 245 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 246 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 247L\$abort 248 bv (%r2) 249 .EXIT 250 $POPMB -$FRAME(%sp),%r3 251 .PROCEND 252___ 253 254$code.=<<___; 255 256 .EXPORT RC4_set_key,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR 257 .ALIGN 8 258RC4_set_key 259 .PROC 260 .CALLINFO NO_CALLS 261 .ENTRY 262 $ST %r0,`0*$SZ`($key) 263 $ST %r0,`1*$SZ`($key) 264 ldo `2*$SZ`($key),$key 265 copy %r0,@XX[0] 266L\$1st 267 $ST @XX[0],0($key) 268 ldo 1(@XX[0]),@XX[0] 269 bb,>= @XX[0],`31-8`,L\$1st ; @XX[0]<256 270 ldo $SZ($key),$key 271 272 ldo `-256*$SZ`($key),$key ; rewind $key 273 addl $len,$inp,$inp ; $inp to point at the end 274 sub %r0,$len,%r23 ; inverse index 275 copy %r0,@XX[0] 276 copy %r0,@XX[1] 277 ldi 0xff,$mask 278 279L\$2nd 280 $LDX @XX[0]($key),@TX[0] 281 ldbx %r23($inp),@TX[1] 282 addi,nuv 1,%r23,%r23 ; increment and conditional 283 sub %r0,$len,%r23 ; inverse index 284 addl @TX[0],@XX[1],@XX[1] 285 addl @TX[1],@XX[1],@XX[1] 286 and $mask,@XX[1],@XX[1] 287 $MKX @XX[0],$key,$TY 288 $LDX @XX[1]($key),@TX[1] 289 $MKX @XX[1],$key,$YY 290 ldo 1(@XX[0]),@XX[0] 291 $ST @TX[0],0($YY) 292 bb,>= @XX[0],`31-8`,L\$2nd ; @XX[0]<256 293 $ST @TX[1],0($TY) 294 295 bv,n (%r2) 296 .EXIT 297 nop 298 .PROCEND 299 300 .EXPORT RC4_options,ENTRY 301 .ALIGN 8 302RC4_options 303 .PROC 304 .CALLINFO NO_CALLS 305 .ENTRY 306 blr %r0,%r28 307 ldi 3,%r1 308L\$pic 309 andcm %r28,%r1,%r28 310 bv (%r2) 311 .EXIT 312 ldo L\$opts-L\$pic(%r28),%r28 313 .PROCEND 314 .ALIGN 8 315L\$opts 316 .STRINGZ "rc4(4x,`$SZ==1?"char":"int"`)" 317 .STRINGZ "RC4 for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>" 318___ 319 320if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` 321 =~ /GNU assembler/) { 322 $gnuas = 1; 323} 324 325foreach(split("\n",$code)) { 326 s/\`([^\`]*)\`/eval $1/ge; 327 328 s/(\.LEVEL\s+2\.0)W/$1w/ if ($gnuas && $SIZE_T==8); 329 s/\.SPACE\s+\$TEXT\$/.text/ if ($gnuas && $SIZE_T==8); 330 s/\.SUBSPA.*// if ($gnuas && $SIZE_T==8); 331 s/cmpib,\*/comib,/ if ($SIZE_T==4); 332 s/\bbv\b/bve/ if ($SIZE_T==8); 333 334 print $_,"\n"; 335} 336close STDOUT or die "error closing STDOUT: $!"; 337