1#! /usr/bin/env perl 2# Copyright 2009-2020 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the Apache License 2.0 (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9 10# ==================================================================== 11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12# project. The module is, however, dual licensed under OpenSSL and 13# CRYPTOGAMS licenses depending on where you obtain it. For further 14# details see http://www.openssl.org/~appro/cryptogams/. 15# ==================================================================== 16 17# SHA1 block procedure for MIPS. 18 19# Performance improvement is 30% on unaligned input. The "secret" is 20# to deploy lwl/lwr pair to load unaligned input. One could have 21# vectorized Xupdate on MIPSIII/IV, but the goal was to code MIPS32- 22# compatible subroutine. There is room for minor optimization on 23# little-endian platforms... 24 25# September 2012. 26# 27# Add MIPS32r2 code (>25% less instructions). 28 29###################################################################### 30# There is a number of MIPS ABI in use, O32 and N32/64 are most 31# widely used. Then there is a new contender: NUBI. It appears that if 32# one picks the latter, it's possible to arrange code in ABI neutral 33# manner. Therefore let's stick to NUBI register layout: 34# 35($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25)); 36($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); 37($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23)); 38($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31)); 39# 40# The return value is placed in $a0. Following coding rules facilitate 41# interoperability: 42# 43# - never ever touch $tp, "thread pointer", former $gp; 44# - copy return value to $t0, former $v0 [or to $a0 if you're adapting 45# old code]; 46# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary; 47# 48# For reference here is register layout for N32/64 MIPS ABIs: 49# 50# ($zero,$at,$v0,$v1)=map("\$$_",(0..3)); 51# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); 52# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25)); 53# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); 54# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); 55 56# $output is the last argument if it looks like a file (it has an extension) 57# $flavour is the first argument if it doesn't look like a file 58$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 59# supported flavours are o32,n32,64,nubi32,nubi64, default is o32 60$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : "o32"; 61 62if ($flavour =~ /64|n32/i) { 63 $PTR_ADD="daddu"; # incidentally works even on n32 64 $PTR_SUB="dsubu"; # incidentally works even on n32 65 $REG_S="sd"; 66 $REG_L="ld"; 67 $PTR_SLL="dsll"; # incidentally works even on n32 68 $SZREG=8; 69} else { 70 $PTR_ADD="addu"; 71 $PTR_SUB="subu"; 72 $REG_S="sw"; 73 $REG_L="lw"; 74 $PTR_SLL="sll"; 75 $SZREG=4; 76} 77# 78# <appro@openssl.org> 79# 80###################################################################### 81 82$big_endian=(`echo MIPSEB | $ENV{CC} -E -`=~/MIPSEB/)?0:1 if ($ENV{CC}); 83 84$output and open STDOUT,">$output"; 85 86if (!defined($big_endian)) 87 { $big_endian=(unpack('L',pack('N',1))==1); } 88 89# offsets of the Most and Least Significant Bytes 90$MSB=$big_endian?0:3; 91$LSB=3&~$MSB; 92 93@X=map("\$$_",(8..23)); # a4-a7,s0-s11 94 95$ctx=$a0; 96$inp=$a1; 97$num=$a2; 98$A="\$1"; 99$B="\$2"; 100$C="\$3"; 101$D="\$7"; 102$E="\$24"; @V=($A,$B,$C,$D,$E); 103$t0="\$25"; 104$t1=$num; # $num is offloaded to stack 105$t2="\$30"; # fp 106$K="\$31"; # ra 107 108sub BODY_00_14 { 109my ($i,$a,$b,$c,$d,$e)=@_; 110my $j=$i+1; 111$code.=<<___ if (!$big_endian); 112#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) 113 wsbh @X[$i],@X[$i] # byte swap($i) 114 rotr @X[$i],@X[$i],16 115#else 116 srl $t0,@X[$i],24 # byte swap($i) 117 srl $t1,@X[$i],8 118 andi $t2,@X[$i],0xFF00 119 sll @X[$i],@X[$i],24 120 andi $t1,0xFF00 121 sll $t2,$t2,8 122 or @X[$i],$t0 123 or $t1,$t2 124 or @X[$i],$t1 125#endif 126___ 127$code.=<<___; 128#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) 129 addu $e,$K # $i 130 xor $t0,$c,$d 131 rotr $t1,$a,27 132 and $t0,$b 133 addu $e,$t1 134#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6) 135 lw @X[$j],$j*4($inp) 136#else 137 lwl @X[$j],$j*4+$MSB($inp) 138 lwr @X[$j],$j*4+$LSB($inp) 139#endif 140 xor $t0,$d 141 addu $e,@X[$i] 142 rotr $b,$b,2 143 addu $e,$t0 144#else 145 lwl @X[$j],$j*4+$MSB($inp) 146 sll $t0,$a,5 # $i 147 addu $e,$K 148 lwr @X[$j],$j*4+$LSB($inp) 149 srl $t1,$a,27 150 addu $e,$t0 151 xor $t0,$c,$d 152 addu $e,$t1 153 sll $t2,$b,30 154 and $t0,$b 155 srl $b,$b,2 156 xor $t0,$d 157 addu $e,@X[$i] 158 or $b,$t2 159 addu $e,$t0 160#endif 161___ 162} 163 164sub BODY_15_19 { 165my ($i,$a,$b,$c,$d,$e)=@_; 166my $j=$i+1; 167 168$code.=<<___ if (!$big_endian && $i==15); 169#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) 170 wsbh @X[$i],@X[$i] # byte swap($i) 171 rotr @X[$i],@X[$i],16 172#else 173 srl $t0,@X[$i],24 # byte swap($i) 174 srl $t1,@X[$i],8 175 andi $t2,@X[$i],0xFF00 176 sll @X[$i],@X[$i],24 177 andi $t1,0xFF00 178 sll $t2,$t2,8 179 or @X[$i],$t0 180 or @X[$i],$t1 181 or @X[$i],$t2 182#endif 183___ 184$code.=<<___; 185#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) 186 addu $e,$K # $i 187 xor @X[$j%16],@X[($j+2)%16] 188 xor $t0,$c,$d 189 rotr $t1,$a,27 190 xor @X[$j%16],@X[($j+8)%16] 191 and $t0,$b 192 addu $e,$t1 193 xor @X[$j%16],@X[($j+13)%16] 194 xor $t0,$d 195 addu $e,@X[$i%16] 196 rotr @X[$j%16],@X[$j%16],31 197 rotr $b,$b,2 198 addu $e,$t0 199#else 200 xor @X[$j%16],@X[($j+2)%16] 201 sll $t0,$a,5 # $i 202 addu $e,$K 203 srl $t1,$a,27 204 addu $e,$t0 205 xor @X[$j%16],@X[($j+8)%16] 206 xor $t0,$c,$d 207 addu $e,$t1 208 xor @X[$j%16],@X[($j+13)%16] 209 sll $t2,$b,30 210 and $t0,$b 211 srl $t1,@X[$j%16],31 212 addu @X[$j%16],@X[$j%16] 213 srl $b,$b,2 214 xor $t0,$d 215 or @X[$j%16],$t1 216 addu $e,@X[$i%16] 217 or $b,$t2 218 addu $e,$t0 219#endif 220___ 221} 222 223sub BODY_20_39 { 224my ($i,$a,$b,$c,$d,$e)=@_; 225my $j=$i+1; 226$code.=<<___ if ($i<79); 227#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) 228 xor @X[$j%16],@X[($j+2)%16] 229 addu $e,$K # $i 230 rotr $t1,$a,27 231 xor @X[$j%16],@X[($j+8)%16] 232 xor $t0,$c,$d 233 addu $e,$t1 234 xor @X[$j%16],@X[($j+13)%16] 235 xor $t0,$b 236 addu $e,@X[$i%16] 237 rotr @X[$j%16],@X[$j%16],31 238 rotr $b,$b,2 239 addu $e,$t0 240#else 241 xor @X[$j%16],@X[($j+2)%16] 242 sll $t0,$a,5 # $i 243 addu $e,$K 244 srl $t1,$a,27 245 addu $e,$t0 246 xor @X[$j%16],@X[($j+8)%16] 247 xor $t0,$c,$d 248 addu $e,$t1 249 xor @X[$j%16],@X[($j+13)%16] 250 sll $t2,$b,30 251 xor $t0,$b 252 srl $t1,@X[$j%16],31 253 addu @X[$j%16],@X[$j%16] 254 srl $b,$b,2 255 addu $e,@X[$i%16] 256 or @X[$j%16],$t1 257 or $b,$t2 258 addu $e,$t0 259#endif 260___ 261$code.=<<___ if ($i==79); 262#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) 263 lw @X[0],0($ctx) 264 addu $e,$K # $i 265 lw @X[1],4($ctx) 266 rotr $t1,$a,27 267 lw @X[2],8($ctx) 268 xor $t0,$c,$d 269 addu $e,$t1 270 lw @X[3],12($ctx) 271 xor $t0,$b 272 addu $e,@X[$i%16] 273 lw @X[4],16($ctx) 274 rotr $b,$b,2 275 addu $e,$t0 276#else 277 lw @X[0],0($ctx) 278 sll $t0,$a,5 # $i 279 addu $e,$K 280 lw @X[1],4($ctx) 281 srl $t1,$a,27 282 addu $e,$t0 283 lw @X[2],8($ctx) 284 xor $t0,$c,$d 285 addu $e,$t1 286 lw @X[3],12($ctx) 287 sll $t2,$b,30 288 xor $t0,$b 289 lw @X[4],16($ctx) 290 srl $b,$b,2 291 addu $e,@X[$i%16] 292 or $b,$t2 293 addu $e,$t0 294#endif 295___ 296} 297 298sub BODY_40_59 { 299my ($i,$a,$b,$c,$d,$e)=@_; 300my $j=$i+1; 301$code.=<<___ if ($i<79); 302#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) 303 addu $e,$K # $i 304 and $t0,$c,$d 305 xor @X[$j%16],@X[($j+2)%16] 306 rotr $t1,$a,27 307 addu $e,$t0 308 xor @X[$j%16],@X[($j+8)%16] 309 xor $t0,$c,$d 310 addu $e,$t1 311 xor @X[$j%16],@X[($j+13)%16] 312 and $t0,$b 313 addu $e,@X[$i%16] 314 rotr @X[$j%16],@X[$j%16],31 315 rotr $b,$b,2 316 addu $e,$t0 317#else 318 xor @X[$j%16],@X[($j+2)%16] 319 sll $t0,$a,5 # $i 320 addu $e,$K 321 srl $t1,$a,27 322 addu $e,$t0 323 xor @X[$j%16],@X[($j+8)%16] 324 and $t0,$c,$d 325 addu $e,$t1 326 xor @X[$j%16],@X[($j+13)%16] 327 sll $t2,$b,30 328 addu $e,$t0 329 srl $t1,@X[$j%16],31 330 xor $t0,$c,$d 331 addu @X[$j%16],@X[$j%16] 332 and $t0,$b 333 srl $b,$b,2 334 or @X[$j%16],$t1 335 addu $e,@X[$i%16] 336 or $b,$t2 337 addu $e,$t0 338#endif 339___ 340} 341 342$FRAMESIZE=16; # large enough to accommodate NUBI saved registers 343$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0xc0fff008" : "0xc0ff0000"; 344 345$code=<<___; 346#include "mips_arch.h" 347 348.text 349 350.set noat 351.set noreorder 352.align 5 353.globl sha1_block_data_order 354.ent sha1_block_data_order 355sha1_block_data_order: 356 .frame $sp,$FRAMESIZE*$SZREG,$ra 357 .mask $SAVED_REGS_MASK,-$SZREG 358 .set noreorder 359 $PTR_SUB $sp,$FRAMESIZE*$SZREG 360 $REG_S $ra,($FRAMESIZE-1)*$SZREG($sp) 361 $REG_S $fp,($FRAMESIZE-2)*$SZREG($sp) 362 $REG_S $s11,($FRAMESIZE-3)*$SZREG($sp) 363 $REG_S $s10,($FRAMESIZE-4)*$SZREG($sp) 364 $REG_S $s9,($FRAMESIZE-5)*$SZREG($sp) 365 $REG_S $s8,($FRAMESIZE-6)*$SZREG($sp) 366 $REG_S $s7,($FRAMESIZE-7)*$SZREG($sp) 367 $REG_S $s6,($FRAMESIZE-8)*$SZREG($sp) 368 $REG_S $s5,($FRAMESIZE-9)*$SZREG($sp) 369 $REG_S $s4,($FRAMESIZE-10)*$SZREG($sp) 370___ 371$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue 372 $REG_S $s3,($FRAMESIZE-11)*$SZREG($sp) 373 $REG_S $s2,($FRAMESIZE-12)*$SZREG($sp) 374 $REG_S $s1,($FRAMESIZE-13)*$SZREG($sp) 375 $REG_S $s0,($FRAMESIZE-14)*$SZREG($sp) 376 $REG_S $gp,($FRAMESIZE-15)*$SZREG($sp) 377___ 378$code.=<<___; 379 $PTR_SLL $num,6 380 $PTR_ADD $num,$inp 381 $REG_S $num,0($sp) 382 lw $A,0($ctx) 383 lw $B,4($ctx) 384 lw $C,8($ctx) 385 lw $D,12($ctx) 386 b .Loop 387 lw $E,16($ctx) 388.align 4 389.Loop: 390 .set reorder 391#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6) 392 lui $K,0x5a82 393 lw @X[0],($inp) 394 ori $K,0x7999 # K_00_19 395#else 396 lwl @X[0],$MSB($inp) 397 lui $K,0x5a82 398 lwr @X[0],$LSB($inp) 399 ori $K,0x7999 # K_00_19 400#endif 401___ 402for ($i=0;$i<15;$i++) { &BODY_00_14($i,@V); unshift(@V,pop(@V)); } 403for (;$i<20;$i++) { &BODY_15_19($i,@V); unshift(@V,pop(@V)); } 404$code.=<<___; 405 lui $K,0x6ed9 406 ori $K,0xeba1 # K_20_39 407___ 408for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } 409$code.=<<___; 410 lui $K,0x8f1b 411 ori $K,0xbcdc # K_40_59 412___ 413for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } 414$code.=<<___; 415 lui $K,0xca62 416 ori $K,0xc1d6 # K_60_79 417___ 418for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } 419$code.=<<___; 420 $PTR_ADD $inp,64 421 $REG_L $num,0($sp) 422 423 addu $A,$X[0] 424 addu $B,$X[1] 425 sw $A,0($ctx) 426 addu $C,$X[2] 427 addu $D,$X[3] 428 sw $B,4($ctx) 429 addu $E,$X[4] 430 sw $C,8($ctx) 431 sw $D,12($ctx) 432 sw $E,16($ctx) 433 .set noreorder 434 bne $inp,$num,.Loop 435 nop 436 437 .set noreorder 438 $REG_L $ra,($FRAMESIZE-1)*$SZREG($sp) 439 $REG_L $fp,($FRAMESIZE-2)*$SZREG($sp) 440 $REG_L $s11,($FRAMESIZE-3)*$SZREG($sp) 441 $REG_L $s10,($FRAMESIZE-4)*$SZREG($sp) 442 $REG_L $s9,($FRAMESIZE-5)*$SZREG($sp) 443 $REG_L $s8,($FRAMESIZE-6)*$SZREG($sp) 444 $REG_L $s7,($FRAMESIZE-7)*$SZREG($sp) 445 $REG_L $s6,($FRAMESIZE-8)*$SZREG($sp) 446 $REG_L $s5,($FRAMESIZE-9)*$SZREG($sp) 447 $REG_L $s4,($FRAMESIZE-10)*$SZREG($sp) 448___ 449$code.=<<___ if ($flavour =~ /nubi/i); 450 $REG_L $s3,($FRAMESIZE-11)*$SZREG($sp) 451 $REG_L $s2,($FRAMESIZE-12)*$SZREG($sp) 452 $REG_L $s1,($FRAMESIZE-13)*$SZREG($sp) 453 $REG_L $s0,($FRAMESIZE-14)*$SZREG($sp) 454 $REG_L $gp,($FRAMESIZE-15)*$SZREG($sp) 455___ 456$code.=<<___; 457 jr $ra 458 $PTR_ADD $sp,$FRAMESIZE*$SZREG 459.end sha1_block_data_order 460.rdata 461.asciiz "SHA1 for MIPS, CRYPTOGAMS by <appro\@openssl.org>" 462___ 463print $code; 464close STDOUT or die "error closing STDOUT: $!"; 465