1#! /usr/bin/env perl 2# Copyright 2007-2020 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the Apache License 2.0 (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9 10# ==================================================================== 11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12# project. The module is, however, dual licensed under OpenSSL and 13# CRYPTOGAMS licenses depending on where you obtain it. For further 14# details see http://www.openssl.org/~appro/cryptogams/. 15# ==================================================================== 16 17# SHA1 block procedure for s390x. 18 19# April 2007. 20# 21# Performance is >30% better than gcc 3.3 generated code. But the real 22# twist is that SHA1 hardware support is detected and utilized. In 23# which case performance can reach further >4.5x for larger chunks. 24 25# January 2009. 26# 27# Optimize Xupdate for amount of memory references and reschedule 28# instructions to favour dual-issue z10 pipeline. On z10 hardware is 29# "only" ~2.3x faster than software. 30 31# November 2010. 32# 33# Adapt for -m31 build. If kernel supports what's called "highgprs" 34# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit 35# instructions and achieve "64-bit" performance even in 31-bit legacy 36# application context. The feature is not specific to any particular 37# processor, as long as it's "z-CPU". Latter implies that the code 38# remains z/Architecture specific. On z990 it was measured to perform 39# 23% better than code generated by gcc 4.3. 40 41$kimdfunc=1; # magic function code for kimd instruction 42 43# $output is the last argument if it looks like a file (it has an extension) 44# $flavour is the first argument if it doesn't look like a file 45$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 46$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 47 48if ($flavour =~ /3[12]/) { 49 $SIZE_T=4; 50 $g=""; 51} else { 52 $SIZE_T=8; 53 $g="g"; 54} 55 56$output and open STDOUT,">$output"; 57 58$K_00_39="%r0"; $K=$K_00_39; 59$K_40_79="%r1"; 60$ctx="%r2"; $prefetch="%r2"; 61$inp="%r3"; 62$len="%r4"; 63 64$A="%r5"; 65$B="%r6"; 66$C="%r7"; 67$D="%r8"; 68$E="%r9"; @V=($A,$B,$C,$D,$E); 69$t0="%r10"; 70$t1="%r11"; 71@X=("%r12","%r13","%r14"); 72$sp="%r15"; 73 74$stdframe=16*$SIZE_T+4*8; 75$frame=$stdframe+16*4; 76 77sub Xupdate { 78my $i=shift; 79 80$code.=<<___ if ($i==15); 81 lg $prefetch,$stdframe($sp) ### Xupdate(16) warm-up 82 lr $X[0],$X[2] 83___ 84return if ($i&1); # Xupdate is vectorized and executed every 2nd cycle 85$code.=<<___ if ($i<16); 86 lg $X[0],`$i*4`($inp) ### Xload($i) 87 rllg $X[1],$X[0],32 88___ 89$code.=<<___ if ($i>=16); 90 xgr $X[0],$prefetch ### Xupdate($i) 91 lg $prefetch,`$stdframe+4*(($i+2)%16)`($sp) 92 xg $X[0],`$stdframe+4*(($i+8)%16)`($sp) 93 xgr $X[0],$prefetch 94 rll $X[0],$X[0],1 95 rllg $X[1],$X[0],32 96 rll $X[1],$X[1],1 97 rllg $X[0],$X[1],32 98 lr $X[2],$X[1] # feedback 99___ 100$code.=<<___ if ($i<=70); 101 stg $X[0],`$stdframe+4*($i%16)`($sp) 102___ 103unshift(@X,pop(@X)); 104} 105 106sub BODY_00_19 { 107my ($i,$a,$b,$c,$d,$e)=@_; 108my $xi=$X[1]; 109 110 &Xupdate($i); 111$code.=<<___; 112 alr $e,$K ### $i 113 rll $t1,$a,5 114 lr $t0,$d 115 xr $t0,$c 116 alr $e,$t1 117 nr $t0,$b 118 alr $e,$xi 119 xr $t0,$d 120 rll $b,$b,30 121 alr $e,$t0 122___ 123} 124 125sub BODY_20_39 { 126my ($i,$a,$b,$c,$d,$e)=@_; 127my $xi=$X[1]; 128 129 &Xupdate($i); 130$code.=<<___; 131 alr $e,$K ### $i 132 rll $t1,$a,5 133 lr $t0,$b 134 alr $e,$t1 135 xr $t0,$c 136 alr $e,$xi 137 xr $t0,$d 138 rll $b,$b,30 139 alr $e,$t0 140___ 141} 142 143sub BODY_40_59 { 144my ($i,$a,$b,$c,$d,$e)=@_; 145my $xi=$X[1]; 146 147 &Xupdate($i); 148$code.=<<___; 149 alr $e,$K ### $i 150 rll $t1,$a,5 151 lr $t0,$b 152 alr $e,$t1 153 or $t0,$c 154 lr $t1,$b 155 nr $t0,$d 156 nr $t1,$c 157 alr $e,$xi 158 or $t0,$t1 159 rll $b,$b,30 160 alr $e,$t0 161___ 162} 163 164$code.=<<___; 165#include "s390x_arch.h" 166 167.text 168.align 64 169.type Ktable,\@object 170Ktable: .long 0x5a827999,0x6ed9eba1,0x8f1bbcdc,0xca62c1d6 171 .skip 48 #.long 0,0,0,0,0,0,0,0,0,0,0,0 172.size Ktable,.-Ktable 173.globl sha1_block_data_order 174.type sha1_block_data_order,\@function 175sha1_block_data_order: 176___ 177$code.=<<___ if ($kimdfunc); 178 larl %r1,OPENSSL_s390xcap_P 179 lg %r0,S390X_KIMD(%r1) # check kimd capabilities 180 tmhh %r0,`0x8000>>$kimdfunc` 181 jz .Lsoftware 182 lghi %r0,$kimdfunc 183 lgr %r1,$ctx 184 lgr %r2,$inp 185 sllg %r3,$len,6 186 .long 0xb93e0002 # kimd %r0,%r2 187 brc 1,.-4 # pay attention to "partial completion" 188 br %r14 189.align 16 190.Lsoftware: 191___ 192$code.=<<___; 193 lghi %r1,-$frame 194 st${g} $ctx,`2*$SIZE_T`($sp) 195 stm${g} %r6,%r15,`6*$SIZE_T`($sp) 196 lgr %r0,$sp 197 la $sp,0(%r1,$sp) 198 st${g} %r0,0($sp) 199 200 larl $t0,Ktable 201 llgf $A,0($ctx) 202 llgf $B,4($ctx) 203 llgf $C,8($ctx) 204 llgf $D,12($ctx) 205 llgf $E,16($ctx) 206 207 lg $K_00_39,0($t0) 208 lg $K_40_79,8($t0) 209 210.Lloop: 211 rllg $K_00_39,$K_00_39,32 212___ 213for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } 214$code.=<<___; 215 rllg $K_00_39,$K_00_39,32 216___ 217for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } 218$code.=<<___; $K=$K_40_79; 219 rllg $K_40_79,$K_40_79,32 220___ 221for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } 222$code.=<<___; 223 rllg $K_40_79,$K_40_79,32 224___ 225for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } 226$code.=<<___; 227 228 l${g} $ctx,`$frame+2*$SIZE_T`($sp) 229 la $inp,64($inp) 230 al $A,0($ctx) 231 al $B,4($ctx) 232 al $C,8($ctx) 233 al $D,12($ctx) 234 al $E,16($ctx) 235 st $A,0($ctx) 236 st $B,4($ctx) 237 st $C,8($ctx) 238 st $D,12($ctx) 239 st $E,16($ctx) 240 brct${g} $len,.Lloop 241 242 lm${g} %r6,%r15,`$frame+6*$SIZE_T`($sp) 243 br %r14 244.size sha1_block_data_order,.-sha1_block_data_order 245.string "SHA1 block transform for s390x, CRYPTOGAMS by <appro\@openssl.org>" 246___ 247 248$code =~ s/\`([^\`]*)\`/eval $1/gem; 249 250print $code; 251close STDOUT or die "error closing STDOUT: $!"; 252