1#! /usr/bin/env perl 2# Copyright 2009-2020 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the Apache License 2.0 (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9 10# ==================================================================== 11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12# project. The module is, however, dual licensed under OpenSSL and 13# CRYPTOGAMS licenses depending on where you obtain it. For further 14# details see http://www.openssl.org/~appro/cryptogams/. 15# ==================================================================== 16 17# SHA1 block procedure for Alpha. 18 19# On 21264 performance is 33% better than code generated by vendor 20# compiler, and 75% better than GCC [3.4], and in absolute terms is 21# 8.7 cycles per processed byte. Implementation features vectorized 22# byte swap, but not Xupdate. 23 24@X=( "\$0", "\$1", "\$2", "\$3", "\$4", "\$5", "\$6", "\$7", 25 "\$8", "\$9", "\$10", "\$11", "\$12", "\$13", "\$14", "\$15"); 26$ctx="a0"; # $16 27$inp="a1"; 28$num="a2"; 29$A="a3"; 30$B="a4"; # 20 31$C="a5"; 32$D="t8"; 33$E="t9"; @V=($A,$B,$C,$D,$E); 34$t0="t10"; # 24 35$t1="t11"; 36$t2="ra"; 37$t3="t12"; 38$K="AT"; # 28 39 40sub BODY_00_19 { 41my ($i,$a,$b,$c,$d,$e)=@_; 42my $j=$i+1; 43$code.=<<___ if ($i==0); 44 ldq_u @X[0],0+0($inp) 45 ldq_u @X[1],0+7($inp) 46___ 47$code.=<<___ if (!($i&1) && $i<14); 48 ldq_u @X[$i+2],($i+2)*4+0($inp) 49 ldq_u @X[$i+3],($i+2)*4+7($inp) 50___ 51$code.=<<___ if (!($i&1) && $i<15); 52 extql @X[$i],$inp,@X[$i] 53 extqh @X[$i+1],$inp,@X[$i+1] 54 55 or @X[$i+1],@X[$i],@X[$i] # pair of 32-bit values are fetched 56 57 srl @X[$i],24,$t0 # vectorized byte swap 58 srl @X[$i],8,$t2 59 60 sll @X[$i],8,$t3 61 sll @X[$i],24,@X[$i] 62 zapnot $t0,0x11,$t0 63 zapnot $t2,0x22,$t2 64 65 zapnot @X[$i],0x88,@X[$i] 66 or $t0,$t2,$t0 67 zapnot $t3,0x44,$t3 68 sll $a,5,$t1 69 70 or @X[$i],$t0,@X[$i] 71 addl $K,$e,$e 72 and $b,$c,$t2 73 zapnot $a,0xf,$a 74 75 or @X[$i],$t3,@X[$i] 76 srl $a,27,$t0 77 bic $d,$b,$t3 78 sll $b,30,$b 79 80 extll @X[$i],4,@X[$i+1] # extract upper half 81 or $t2,$t3,$t2 82 addl @X[$i],$e,$e 83 84 addl $t1,$e,$e 85 srl $b,32,$t3 86 zapnot @X[$i],0xf,@X[$i] 87 88 addl $t0,$e,$e 89 addl $t2,$e,$e 90 or $t3,$b,$b 91___ 92$code.=<<___ if (($i&1) && $i<15); 93 sll $a,5,$t1 94 addl $K,$e,$e 95 and $b,$c,$t2 96 zapnot $a,0xf,$a 97 98 srl $a,27,$t0 99 addl @X[$i%16],$e,$e 100 bic $d,$b,$t3 101 sll $b,30,$b 102 103 or $t2,$t3,$t2 104 addl $t1,$e,$e 105 srl $b,32,$t3 106 zapnot @X[$i],0xf,@X[$i] 107 108 addl $t0,$e,$e 109 addl $t2,$e,$e 110 or $t3,$b,$b 111___ 112$code.=<<___ if ($i>=15); # with forward Xupdate 113 sll $a,5,$t1 114 addl $K,$e,$e 115 and $b,$c,$t2 116 xor @X[($j+2)%16],@X[$j%16],@X[$j%16] 117 118 zapnot $a,0xf,$a 119 addl @X[$i%16],$e,$e 120 bic $d,$b,$t3 121 xor @X[($j+8)%16],@X[$j%16],@X[$j%16] 122 123 srl $a,27,$t0 124 addl $t1,$e,$e 125 or $t2,$t3,$t2 126 xor @X[($j+13)%16],@X[$j%16],@X[$j%16] 127 128 sll $b,30,$b 129 addl $t0,$e,$e 130 srl @X[$j%16],31,$t1 131 132 addl $t2,$e,$e 133 srl $b,32,$t3 134 addl @X[$j%16],@X[$j%16],@X[$j%16] 135 136 or $t3,$b,$b 137 zapnot @X[$i%16],0xf,@X[$i%16] 138 or $t1,@X[$j%16],@X[$j%16] 139___ 140} 141 142sub BODY_20_39 { 143my ($i,$a,$b,$c,$d,$e)=@_; 144my $j=$i+1; 145$code.=<<___ if ($i<79); # with forward Xupdate 146 sll $a,5,$t1 147 addl $K,$e,$e 148 zapnot $a,0xf,$a 149 xor @X[($j+2)%16],@X[$j%16],@X[$j%16] 150 151 sll $b,30,$t3 152 addl $t1,$e,$e 153 xor $b,$c,$t2 154 xor @X[($j+8)%16],@X[$j%16],@X[$j%16] 155 156 srl $b,2,$b 157 addl @X[$i%16],$e,$e 158 xor $d,$t2,$t2 159 xor @X[($j+13)%16],@X[$j%16],@X[$j%16] 160 161 srl @X[$j%16],31,$t1 162 addl $t2,$e,$e 163 srl $a,27,$t0 164 addl @X[$j%16],@X[$j%16],@X[$j%16] 165 166 or $t3,$b,$b 167 addl $t0,$e,$e 168 or $t1,@X[$j%16],@X[$j%16] 169___ 170$code.=<<___ if ($i<77); 171 zapnot @X[$i%16],0xf,@X[$i%16] 172___ 173$code.=<<___ if ($i==79); # with context fetch 174 sll $a,5,$t1 175 addl $K,$e,$e 176 zapnot $a,0xf,$a 177 ldl @X[0],0($ctx) 178 179 sll $b,30,$t3 180 addl $t1,$e,$e 181 xor $b,$c,$t2 182 ldl @X[1],4($ctx) 183 184 srl $b,2,$b 185 addl @X[$i%16],$e,$e 186 xor $d,$t2,$t2 187 ldl @X[2],8($ctx) 188 189 srl $a,27,$t0 190 addl $t2,$e,$e 191 ldl @X[3],12($ctx) 192 193 or $t3,$b,$b 194 addl $t0,$e,$e 195 ldl @X[4],16($ctx) 196___ 197} 198 199sub BODY_40_59 { 200my ($i,$a,$b,$c,$d,$e)=@_; 201my $j=$i+1; 202$code.=<<___; # with forward Xupdate 203 sll $a,5,$t1 204 addl $K,$e,$e 205 zapnot $a,0xf,$a 206 xor @X[($j+2)%16],@X[$j%16],@X[$j%16] 207 208 srl $a,27,$t0 209 and $b,$c,$t2 210 and $b,$d,$t3 211 xor @X[($j+8)%16],@X[$j%16],@X[$j%16] 212 213 sll $b,30,$b 214 addl $t1,$e,$e 215 xor @X[($j+13)%16],@X[$j%16],@X[$j%16] 216 217 srl @X[$j%16],31,$t1 218 addl $t0,$e,$e 219 or $t2,$t3,$t2 220 and $c,$d,$t3 221 222 or $t2,$t3,$t2 223 srl $b,32,$t3 224 addl @X[$i%16],$e,$e 225 addl @X[$j%16],@X[$j%16],@X[$j%16] 226 227 or $t3,$b,$b 228 addl $t2,$e,$e 229 or $t1,@X[$j%16],@X[$j%16] 230 zapnot @X[$i%16],0xf,@X[$i%16] 231___ 232} 233 234$code=<<___; 235#ifdef __linux__ 236#include <asm/regdef.h> 237#else 238#include <asm.h> 239#include <regdef.h> 240#endif 241 242.text 243 244.set noat 245.set noreorder 246.globl sha1_block_data_order 247.align 5 248.ent sha1_block_data_order 249sha1_block_data_order: 250 lda sp,-64(sp) 251 stq ra,0(sp) 252 stq s0,8(sp) 253 stq s1,16(sp) 254 stq s2,24(sp) 255 stq s3,32(sp) 256 stq s4,40(sp) 257 stq s5,48(sp) 258 stq fp,56(sp) 259 .mask 0x0400fe00,-64 260 .frame sp,64,ra 261 .prologue 0 262 263 ldl $A,0($ctx) 264 ldl $B,4($ctx) 265 sll $num,6,$num 266 ldl $C,8($ctx) 267 ldl $D,12($ctx) 268 ldl $E,16($ctx) 269 addq $inp,$num,$num 270 271.Lloop: 272 .set noreorder 273 ldah $K,23170(zero) 274 zapnot $B,0xf,$B 275 lda $K,31129($K) # K_00_19 276___ 277for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } 278 279$code.=<<___; 280 ldah $K,28378(zero) 281 lda $K,-5215($K) # K_20_39 282___ 283for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } 284 285$code.=<<___; 286 ldah $K,-28900(zero) 287 lda $K,-17188($K) # K_40_59 288___ 289for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } 290 291$code.=<<___; 292 ldah $K,-13725(zero) 293 lda $K,-15914($K) # K_60_79 294___ 295for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } 296 297$code.=<<___; 298 addl @X[0],$A,$A 299 addl @X[1],$B,$B 300 addl @X[2],$C,$C 301 addl @X[3],$D,$D 302 addl @X[4],$E,$E 303 stl $A,0($ctx) 304 stl $B,4($ctx) 305 addq $inp,64,$inp 306 stl $C,8($ctx) 307 stl $D,12($ctx) 308 stl $E,16($ctx) 309 cmpult $inp,$num,$t1 310 bne $t1,.Lloop 311 312 .set noreorder 313 ldq ra,0(sp) 314 ldq s0,8(sp) 315 ldq s1,16(sp) 316 ldq s2,24(sp) 317 ldq s3,32(sp) 318 ldq s4,40(sp) 319 ldq s5,48(sp) 320 ldq fp,56(sp) 321 lda sp,64(sp) 322 ret (ra) 323.end sha1_block_data_order 324.ascii "SHA1 block transform for Alpha, CRYPTOGAMS by <appro\@openssl.org>" 325.align 2 326___ 327$output=pop and open STDOUT,">$output"; 328print $code; 329close STDOUT or die "error closing STDOUT: $!"; 330