1#! /usr/bin/env perl 2# Copyright 2009-2020 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the Apache License 2.0 (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9# 10# ==================================================================== 11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12# project. The module is, however, dual licensed under OpenSSL and 13# CRYPTOGAMS licenses depending on where you obtain it. For further 14# details see http://www.openssl.org/~appro/cryptogams/. 15# ==================================================================== 16# 17# February 2009 18# 19# Performance is 2x of gcc 3.4.6 on z10. Coding "secret" is to 20# "cluster" Address Generation Interlocks, so that one pipeline stall 21# resolves several dependencies. 22 23# November 2010. 24# 25# Adapt for -m31 build. If kernel supports what's called "highgprs" 26# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit 27# instructions and achieve "64-bit" performance even in 31-bit legacy 28# application context. The feature is not specific to any particular 29# processor, as long as it's "z-CPU". Latter implies that the code 30# remains z/Architecture specific. On z990 it was measured to perform 31# 50% better than code generated by gcc 4.3. 32 33# $output is the last argument if it looks like a file (it has an extension) 34# $flavour is the first argument if it doesn't look like a file 35$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 36$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 37 38if ($flavour =~ /3[12]/) { 39 $SIZE_T=4; 40 $g=""; 41} else { 42 $SIZE_T=8; 43 $g="g"; 44} 45 46$output and open STDOUT,">$output"; 47 48$rp="%r14"; 49$sp="%r15"; 50$code=<<___; 51.text 52 53___ 54 55# void RC4(RC4_KEY *key,size_t len,const void *inp,void *out) 56{ 57$acc="%r0"; 58$cnt="%r1"; 59$key="%r2"; 60$len="%r3"; 61$inp="%r4"; 62$out="%r5"; 63 64@XX=("%r6","%r7"); 65@TX=("%r8","%r9"); 66$YY="%r10"; 67$TY="%r11"; 68 69$code.=<<___; 70.globl RC4 71.type RC4,\@function 72.align 64 73RC4: 74 stm${g} %r6,%r11,6*$SIZE_T($sp) 75___ 76$code.=<<___ if ($flavour =~ /3[12]/); 77 llgfr $len,$len 78___ 79$code.=<<___; 80 llgc $XX[0],0($key) 81 llgc $YY,1($key) 82 la $XX[0],1($XX[0]) 83 nill $XX[0],0xff 84 srlg $cnt,$len,3 85 ltgr $cnt,$cnt 86 llgc $TX[0],2($XX[0],$key) 87 jz .Lshort 88 j .Loop8 89 90.align 64 91.Loop8: 92___ 93for ($i=0;$i<8;$i++) { 94$code.=<<___; 95 la $YY,0($YY,$TX[0]) # $i 96 nill $YY,255 97 la $XX[1],1($XX[0]) 98 nill $XX[1],255 99___ 100$code.=<<___ if ($i==1); 101 llgc $acc,2($TY,$key) 102___ 103$code.=<<___ if ($i>1); 104 sllg $acc,$acc,8 105 ic $acc,2($TY,$key) 106___ 107$code.=<<___; 108 llgc $TY,2($YY,$key) 109 stc $TX[0],2($YY,$key) 110 llgc $TX[1],2($XX[1],$key) 111 stc $TY,2($XX[0],$key) 112 cr $XX[1],$YY 113 jne .Lcmov$i 114 la $TX[1],0($TX[0]) 115.Lcmov$i: 116 la $TY,0($TY,$TX[0]) 117 nill $TY,255 118___ 119push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers 120} 121 122$code.=<<___; 123 lg $TX[1],0($inp) 124 sllg $acc,$acc,8 125 la $inp,8($inp) 126 ic $acc,2($TY,$key) 127 xgr $acc,$TX[1] 128 stg $acc,0($out) 129 la $out,8($out) 130 brctg $cnt,.Loop8 131 132.Lshort: 133 lghi $acc,7 134 ngr $len,$acc 135 jz .Lexit 136 j .Loop1 137 138.align 16 139.Loop1: 140 la $YY,0($YY,$TX[0]) 141 nill $YY,255 142 llgc $TY,2($YY,$key) 143 stc $TX[0],2($YY,$key) 144 stc $TY,2($XX[0],$key) 145 ar $TY,$TX[0] 146 ahi $XX[0],1 147 nill $TY,255 148 nill $XX[0],255 149 llgc $acc,0($inp) 150 la $inp,1($inp) 151 llgc $TY,2($TY,$key) 152 llgc $TX[0],2($XX[0],$key) 153 xr $acc,$TY 154 stc $acc,0($out) 155 la $out,1($out) 156 brct $len,.Loop1 157 158.Lexit: 159 ahi $XX[0],-1 160 stc $XX[0],0($key) 161 stc $YY,1($key) 162 lm${g} %r6,%r11,6*$SIZE_T($sp) 163 br $rp 164.size RC4,.-RC4 165.string "RC4 for s390x, CRYPTOGAMS by <appro\@openssl.org>" 166 167___ 168} 169 170# void RC4_set_key(RC4_KEY *key,unsigned int len,const void *inp) 171{ 172$cnt="%r0"; 173$idx="%r1"; 174$key="%r2"; 175$len="%r3"; 176$inp="%r4"; 177$acc="%r5"; 178$dat="%r6"; 179$ikey="%r7"; 180$iinp="%r8"; 181 182$code.=<<___; 183.globl RC4_set_key 184.type RC4_set_key,\@function 185.align 64 186RC4_set_key: 187 stm${g} %r6,%r8,6*$SIZE_T($sp) 188 lhi $cnt,256 189 la $idx,0 190 sth $idx,0($key) 191.align 4 192.L1stloop: 193 stc $idx,2($idx,$key) 194 la $idx,1($idx) 195 brct $cnt,.L1stloop 196 197 lghi $ikey,-256 198 lr $cnt,$len 199 la $iinp,0 200 la $idx,0 201.align 16 202.L2ndloop: 203 llgc $acc,2+256($ikey,$key) 204 llgc $dat,0($iinp,$inp) 205 la $idx,0($idx,$acc) 206 la $ikey,1($ikey) 207 la $idx,0($idx,$dat) 208 nill $idx,255 209 la $iinp,1($iinp) 210 tml $ikey,255 211 llgc $dat,2($idx,$key) 212 stc $dat,2+256-1($ikey,$key) 213 stc $acc,2($idx,$key) 214 jz .Ldone 215 brct $cnt,.L2ndloop 216 lr $cnt,$len 217 la $iinp,0 218 j .L2ndloop 219.Ldone: 220 lm${g} %r6,%r8,6*$SIZE_T($sp) 221 br $rp 222.size RC4_set_key,.-RC4_set_key 223 224___ 225} 226 227# const char *RC4_options() 228$code.=<<___; 229.globl RC4_options 230.type RC4_options,\@function 231.align 16 232RC4_options: 233 larl %r2,.Loptions 234 br %r14 235.size RC4_options,.-RC4_options 236.section .rodata 237.Loptions: 238.align 8 239.string "rc4(8x,char)" 240___ 241 242print $code; 243close STDOUT or die "error closing STDOUT: $!"; # force flush 244