1#! /usr/bin/env perl 2# Copyright 2006-2020 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the Apache License 2.0 (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9 10# ==================================================================== 11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12# project. The module is, however, dual licensed under OpenSSL and 13# CRYPTOGAMS licenses depending on where you obtain it. For further 14# details see http://www.openssl.org/~appro/cryptogams/. 15# ==================================================================== 16 17# I let hardware handle unaligned input(*), except on page boundaries 18# (see below for details). Otherwise straightforward implementation 19# with X vector in register bank. 20# 21# (*) this means that this module is inappropriate for PPC403? Does 22# anybody know if pre-POWER3 can sustain unaligned load? 23 24# -m64 -m32 25# ---------------------------------- 26# PPC970,gcc-4.0.0 +76% +59% 27# Power6,xlc-7 +68% +33% 28 29# $output is the last argument if it looks like a file (it has an extension) 30# $flavour is the first argument if it doesn't look like a file 31$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 32$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 33 34if ($flavour =~ /64/) { 35 $SIZE_T =8; 36 $LRSAVE =2*$SIZE_T; 37 $UCMP ="cmpld"; 38 $STU ="stdu"; 39 $POP ="ld"; 40 $PUSH ="std"; 41} elsif ($flavour =~ /32/) { 42 $SIZE_T =4; 43 $LRSAVE =$SIZE_T; 44 $UCMP ="cmplw"; 45 $STU ="stwu"; 46 $POP ="lwz"; 47 $PUSH ="stw"; 48} else { die "nonsense $flavour"; } 49 50# Define endianness based on flavour 51# i.e.: linux64le 52$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; 53 54$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 55( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or 56( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or 57die "can't locate ppc-xlate.pl"; 58 59open STDOUT,"| $^X $xlate $flavour \"$output\"" 60 or die "can't call $xlate: $!"; 61 62$FRAME=24*$SIZE_T+64; 63$LOCALS=6*$SIZE_T; 64 65$K ="r0"; 66$sp ="r1"; 67$toc="r2"; 68$ctx="r3"; 69$inp="r4"; 70$num="r5"; 71$t0 ="r15"; 72$t1 ="r6"; 73 74$A ="r7"; 75$B ="r8"; 76$C ="r9"; 77$D ="r10"; 78$E ="r11"; 79$T ="r12"; 80 81@V=($A,$B,$C,$D,$E,$T); 82@X=("r16","r17","r18","r19","r20","r21","r22","r23", 83 "r24","r25","r26","r27","r28","r29","r30","r31"); 84 85sub loadbe { 86my ($dst, $src, $temp_reg) = @_; 87$code.=<<___ if (!$LITTLE_ENDIAN); 88 lwz $dst,$src 89___ 90$code.=<<___ if ($LITTLE_ENDIAN); 91 lwz $temp_reg,$src 92 rotlwi $dst,$temp_reg,8 93 rlwimi $dst,$temp_reg,24,0,7 94 rlwimi $dst,$temp_reg,24,16,23 95___ 96} 97 98sub BODY_00_19 { 99my ($i,$a,$b,$c,$d,$e,$f)=@_; 100my $j=$i+1; 101 102 # Since the last value of $f is discarded, we can use 103 # it as a temp reg to swap byte-order when needed. 104 loadbe("@X[$i]","`$i*4`($inp)",$f) if ($i==0); 105 loadbe("@X[$j]","`$j*4`($inp)",$f) if ($i<15); 106$code.=<<___ if ($i<15); 107 add $f,$K,$e 108 rotlwi $e,$a,5 109 add $f,$f,@X[$i] 110 and $t0,$c,$b 111 add $f,$f,$e 112 andc $t1,$d,$b 113 rotlwi $b,$b,30 114 or $t0,$t0,$t1 115 add $f,$f,$t0 116___ 117$code.=<<___ if ($i>=15); 118 add $f,$K,$e 119 rotlwi $e,$a,5 120 xor @X[$j%16],@X[$j%16],@X[($j+2)%16] 121 add $f,$f,@X[$i%16] 122 and $t0,$c,$b 123 xor @X[$j%16],@X[$j%16],@X[($j+8)%16] 124 add $f,$f,$e 125 andc $t1,$d,$b 126 rotlwi $b,$b,30 127 or $t0,$t0,$t1 128 xor @X[$j%16],@X[$j%16],@X[($j+13)%16] 129 add $f,$f,$t0 130 rotlwi @X[$j%16],@X[$j%16],1 131___ 132} 133 134sub BODY_20_39 { 135my ($i,$a,$b,$c,$d,$e,$f)=@_; 136my $j=$i+1; 137$code.=<<___ if ($i<79); 138 add $f,$K,$e 139 xor $t0,$b,$d 140 rotlwi $e,$a,5 141 xor @X[$j%16],@X[$j%16],@X[($j+2)%16] 142 add $f,$f,@X[$i%16] 143 xor $t0,$t0,$c 144 xor @X[$j%16],@X[$j%16],@X[($j+8)%16] 145 add $f,$f,$t0 146 rotlwi $b,$b,30 147 xor @X[$j%16],@X[$j%16],@X[($j+13)%16] 148 add $f,$f,$e 149 rotlwi @X[$j%16],@X[$j%16],1 150___ 151$code.=<<___ if ($i==79); 152 add $f,$K,$e 153 xor $t0,$b,$d 154 rotlwi $e,$a,5 155 lwz r16,0($ctx) 156 add $f,$f,@X[$i%16] 157 xor $t0,$t0,$c 158 lwz r17,4($ctx) 159 add $f,$f,$t0 160 rotlwi $b,$b,30 161 lwz r18,8($ctx) 162 lwz r19,12($ctx) 163 add $f,$f,$e 164 lwz r20,16($ctx) 165___ 166} 167 168sub BODY_40_59 { 169my ($i,$a,$b,$c,$d,$e,$f)=@_; 170my $j=$i+1; 171$code.=<<___; 172 add $f,$K,$e 173 rotlwi $e,$a,5 174 xor @X[$j%16],@X[$j%16],@X[($j+2)%16] 175 add $f,$f,@X[$i%16] 176 and $t0,$b,$c 177 xor @X[$j%16],@X[$j%16],@X[($j+8)%16] 178 add $f,$f,$e 179 or $t1,$b,$c 180 rotlwi $b,$b,30 181 xor @X[$j%16],@X[$j%16],@X[($j+13)%16] 182 and $t1,$t1,$d 183 or $t0,$t0,$t1 184 rotlwi @X[$j%16],@X[$j%16],1 185 add $f,$f,$t0 186___ 187} 188 189$code=<<___; 190.machine "any" 191.text 192 193.globl .sha1_block_data_order 194.align 4 195.sha1_block_data_order: 196 $STU $sp,-$FRAME($sp) 197 mflr r0 198 $PUSH r15,`$FRAME-$SIZE_T*17`($sp) 199 $PUSH r16,`$FRAME-$SIZE_T*16`($sp) 200 $PUSH r17,`$FRAME-$SIZE_T*15`($sp) 201 $PUSH r18,`$FRAME-$SIZE_T*14`($sp) 202 $PUSH r19,`$FRAME-$SIZE_T*13`($sp) 203 $PUSH r20,`$FRAME-$SIZE_T*12`($sp) 204 $PUSH r21,`$FRAME-$SIZE_T*11`($sp) 205 $PUSH r22,`$FRAME-$SIZE_T*10`($sp) 206 $PUSH r23,`$FRAME-$SIZE_T*9`($sp) 207 $PUSH r24,`$FRAME-$SIZE_T*8`($sp) 208 $PUSH r25,`$FRAME-$SIZE_T*7`($sp) 209 $PUSH r26,`$FRAME-$SIZE_T*6`($sp) 210 $PUSH r27,`$FRAME-$SIZE_T*5`($sp) 211 $PUSH r28,`$FRAME-$SIZE_T*4`($sp) 212 $PUSH r29,`$FRAME-$SIZE_T*3`($sp) 213 $PUSH r30,`$FRAME-$SIZE_T*2`($sp) 214 $PUSH r31,`$FRAME-$SIZE_T*1`($sp) 215 $PUSH r0,`$FRAME+$LRSAVE`($sp) 216 lwz $A,0($ctx) 217 lwz $B,4($ctx) 218 lwz $C,8($ctx) 219 lwz $D,12($ctx) 220 lwz $E,16($ctx) 221 andi. r0,$inp,3 222 bne Lunaligned 223Laligned: 224 mtctr $num 225 bl Lsha1_block_private 226 b Ldone 227 228; PowerPC specification allows an implementation to be ill-behaved 229; upon unaligned access which crosses page boundary. "Better safe 230; than sorry" principle makes me treat it specially. But I don't 231; look for particular offending word, but rather for 64-byte input 232; block which crosses the boundary. Once found that block is aligned 233; and hashed separately... 234.align 4 235Lunaligned: 236 subfic $t1,$inp,4096 237 andi. $t1,$t1,4095 ; distance to closest page boundary 238 srwi. $t1,$t1,6 ; t1/=64 239 beq Lcross_page 240 $UCMP $num,$t1 241 ble Laligned ; didn't cross the page boundary 242 mtctr $t1 243 subfc $num,$t1,$num 244 bl Lsha1_block_private 245Lcross_page: 246 li $t1,16 247 mtctr $t1 248 addi r20,$sp,$LOCALS ; spot within the frame 249Lmemcpy: 250 lbz r16,0($inp) 251 lbz r17,1($inp) 252 lbz r18,2($inp) 253 lbz r19,3($inp) 254 addi $inp,$inp,4 255 stb r16,0(r20) 256 stb r17,1(r20) 257 stb r18,2(r20) 258 stb r19,3(r20) 259 addi r20,r20,4 260 bdnz Lmemcpy 261 262 $PUSH $inp,`$FRAME-$SIZE_T*18`($sp) 263 li $t1,1 264 addi $inp,$sp,$LOCALS 265 mtctr $t1 266 bl Lsha1_block_private 267 $POP $inp,`$FRAME-$SIZE_T*18`($sp) 268 addic. $num,$num,-1 269 bne Lunaligned 270 271Ldone: 272 $POP r0,`$FRAME+$LRSAVE`($sp) 273 $POP r15,`$FRAME-$SIZE_T*17`($sp) 274 $POP r16,`$FRAME-$SIZE_T*16`($sp) 275 $POP r17,`$FRAME-$SIZE_T*15`($sp) 276 $POP r18,`$FRAME-$SIZE_T*14`($sp) 277 $POP r19,`$FRAME-$SIZE_T*13`($sp) 278 $POP r20,`$FRAME-$SIZE_T*12`($sp) 279 $POP r21,`$FRAME-$SIZE_T*11`($sp) 280 $POP r22,`$FRAME-$SIZE_T*10`($sp) 281 $POP r23,`$FRAME-$SIZE_T*9`($sp) 282 $POP r24,`$FRAME-$SIZE_T*8`($sp) 283 $POP r25,`$FRAME-$SIZE_T*7`($sp) 284 $POP r26,`$FRAME-$SIZE_T*6`($sp) 285 $POP r27,`$FRAME-$SIZE_T*5`($sp) 286 $POP r28,`$FRAME-$SIZE_T*4`($sp) 287 $POP r29,`$FRAME-$SIZE_T*3`($sp) 288 $POP r30,`$FRAME-$SIZE_T*2`($sp) 289 $POP r31,`$FRAME-$SIZE_T*1`($sp) 290 mtlr r0 291 addi $sp,$sp,$FRAME 292 blr 293 .long 0 294 .byte 0,12,4,1,0x80,18,3,0 295 .long 0 296___ 297 298# This is private block function, which uses tailored calling 299# interface, namely upon entry SHA_CTX is pre-loaded to given 300# registers and counter register contains amount of chunks to 301# digest... 302$code.=<<___; 303.align 4 304Lsha1_block_private: 305___ 306$code.=<<___; # load K_00_19 307 lis $K,0x5a82 308 ori $K,$K,0x7999 309___ 310for($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } 311$code.=<<___; # load K_20_39 312 lis $K,0x6ed9 313 ori $K,$K,0xeba1 314___ 315for(;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } 316$code.=<<___; # load K_40_59 317 lis $K,0x8f1b 318 ori $K,$K,0xbcdc 319___ 320for(;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } 321$code.=<<___; # load K_60_79 322 lis $K,0xca62 323 ori $K,$K,0xc1d6 324___ 325for(;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } 326$code.=<<___; 327 add r16,r16,$E 328 add r17,r17,$T 329 add r18,r18,$A 330 add r19,r19,$B 331 add r20,r20,$C 332 stw r16,0($ctx) 333 mr $A,r16 334 stw r17,4($ctx) 335 mr $B,r17 336 stw r18,8($ctx) 337 mr $C,r18 338 stw r19,12($ctx) 339 mr $D,r19 340 stw r20,16($ctx) 341 mr $E,r20 342 addi $inp,$inp,`16*4` 343 bdnz Lsha1_block_private 344 blr 345 .long 0 346 .byte 0,12,0x14,0,0,0,0,0 347.size .sha1_block_data_order,.-.sha1_block_data_order 348___ 349$code.=<<___; 350.asciz "SHA1 block transform for PPC, CRYPTOGAMS by <appro\@fy.chalmers.se>" 351___ 352 353$code =~ s/\`([^\`]*)\`/eval $1/gem; 354print $code; 355close STDOUT or die "error closing STDOUT: $!"; 356