xref: /openssl/crypto/rc4/asm/rc4-s390x.pl (revision 33388b44)
1#! /usr/bin/env perl
2# Copyright 2009-2020 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the Apache License 2.0 (the "License").  You may not use
5# this file except in compliance with the License.  You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9#
10# ====================================================================
11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12# project. The module is, however, dual licensed under OpenSSL and
13# CRYPTOGAMS licenses depending on where you obtain it. For further
14# details see http://www.openssl.org/~appro/cryptogams/.
15# ====================================================================
16#
17# February 2009
18#
19# Performance is 2x of gcc 3.4.6 on z10. Coding "secret" is to
20# "cluster" Address Generation Interlocks, so that one pipeline stall
21# resolves several dependencies.
22
23# November 2010.
24#
25# Adapt for -m31 build. If kernel supports what's called "highgprs"
26# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit
27# instructions and achieve "64-bit" performance even in 31-bit legacy
28# application context. The feature is not specific to any particular
29# processor, as long as it's "z-CPU". Latter implies that the code
30# remains z/Architecture specific. On z990 it was measured to perform
31# 50% better than code generated by gcc 4.3.
32
33# $output is the last argument if it looks like a file (it has an extension)
34# $flavour is the first argument if it doesn't look like a file
35$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
36$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
37
38if ($flavour =~ /3[12]/) {
39	$SIZE_T=4;
40	$g="";
41} else {
42	$SIZE_T=8;
43	$g="g";
44}
45
46$output and open STDOUT,">$output";
47
48$rp="%r14";
49$sp="%r15";
50$code=<<___;
51.text
52
53___
54
55# void RC4(RC4_KEY *key,size_t len,const void *inp,void *out)
56{
57$acc="%r0";
58$cnt="%r1";
59$key="%r2";
60$len="%r3";
61$inp="%r4";
62$out="%r5";
63
64@XX=("%r6","%r7");
65@TX=("%r8","%r9");
66$YY="%r10";
67$TY="%r11";
68
69$code.=<<___;
70.globl	RC4
71.type	RC4,\@function
72.align	64
73RC4:
74	stm${g}	%r6,%r11,6*$SIZE_T($sp)
75___
76$code.=<<___ if ($flavour =~ /3[12]/);
77	llgfr	$len,$len
78___
79$code.=<<___;
80	llgc	$XX[0],0($key)
81	llgc	$YY,1($key)
82	la	$XX[0],1($XX[0])
83	nill	$XX[0],0xff
84	srlg	$cnt,$len,3
85	ltgr	$cnt,$cnt
86	llgc	$TX[0],2($XX[0],$key)
87	jz	.Lshort
88	j	.Loop8
89
90.align	64
91.Loop8:
92___
93for ($i=0;$i<8;$i++) {
94$code.=<<___;
95	la	$YY,0($YY,$TX[0])	# $i
96	nill	$YY,255
97	la	$XX[1],1($XX[0])
98	nill	$XX[1],255
99___
100$code.=<<___ if ($i==1);
101	llgc	$acc,2($TY,$key)
102___
103$code.=<<___ if ($i>1);
104	sllg	$acc,$acc,8
105	ic	$acc,2($TY,$key)
106___
107$code.=<<___;
108	llgc	$TY,2($YY,$key)
109	stc	$TX[0],2($YY,$key)
110	llgc	$TX[1],2($XX[1],$key)
111	stc	$TY,2($XX[0],$key)
112	cr	$XX[1],$YY
113	jne	.Lcmov$i
114	la	$TX[1],0($TX[0])
115.Lcmov$i:
116	la	$TY,0($TY,$TX[0])
117	nill	$TY,255
118___
119push(@TX,shift(@TX)); push(@XX,shift(@XX));     # "rotate" registers
120}
121
122$code.=<<___;
123	lg	$TX[1],0($inp)
124	sllg	$acc,$acc,8
125	la	$inp,8($inp)
126	ic	$acc,2($TY,$key)
127	xgr	$acc,$TX[1]
128	stg	$acc,0($out)
129	la	$out,8($out)
130	brctg	$cnt,.Loop8
131
132.Lshort:
133	lghi	$acc,7
134	ngr	$len,$acc
135	jz	.Lexit
136	j	.Loop1
137
138.align	16
139.Loop1:
140	la	$YY,0($YY,$TX[0])
141	nill	$YY,255
142	llgc	$TY,2($YY,$key)
143	stc	$TX[0],2($YY,$key)
144	stc	$TY,2($XX[0],$key)
145	ar	$TY,$TX[0]
146	ahi	$XX[0],1
147	nill	$TY,255
148	nill	$XX[0],255
149	llgc	$acc,0($inp)
150	la	$inp,1($inp)
151	llgc	$TY,2($TY,$key)
152	llgc	$TX[0],2($XX[0],$key)
153	xr	$acc,$TY
154	stc	$acc,0($out)
155	la	$out,1($out)
156	brct	$len,.Loop1
157
158.Lexit:
159	ahi	$XX[0],-1
160	stc	$XX[0],0($key)
161	stc	$YY,1($key)
162	lm${g}	%r6,%r11,6*$SIZE_T($sp)
163	br	$rp
164.size	RC4,.-RC4
165.string	"RC4 for s390x, CRYPTOGAMS by <appro\@openssl.org>"
166
167___
168}
169
170# void RC4_set_key(RC4_KEY *key,unsigned int len,const void *inp)
171{
172$cnt="%r0";
173$idx="%r1";
174$key="%r2";
175$len="%r3";
176$inp="%r4";
177$acc="%r5";
178$dat="%r6";
179$ikey="%r7";
180$iinp="%r8";
181
182$code.=<<___;
183.globl	RC4_set_key
184.type	RC4_set_key,\@function
185.align	64
186RC4_set_key:
187	stm${g}	%r6,%r8,6*$SIZE_T($sp)
188	lhi	$cnt,256
189	la	$idx,0
190	sth	$idx,0($key)
191.align	4
192.L1stloop:
193	stc	$idx,2($idx,$key)
194	la	$idx,1($idx)
195	brct	$cnt,.L1stloop
196
197	lghi	$ikey,-256
198	lr	$cnt,$len
199	la	$iinp,0
200	la	$idx,0
201.align	16
202.L2ndloop:
203	llgc	$acc,2+256($ikey,$key)
204	llgc	$dat,0($iinp,$inp)
205	la	$idx,0($idx,$acc)
206	la	$ikey,1($ikey)
207	la	$idx,0($idx,$dat)
208	nill	$idx,255
209	la	$iinp,1($iinp)
210	tml	$ikey,255
211	llgc	$dat,2($idx,$key)
212	stc	$dat,2+256-1($ikey,$key)
213	stc	$acc,2($idx,$key)
214	jz	.Ldone
215	brct	$cnt,.L2ndloop
216	lr	$cnt,$len
217	la	$iinp,0
218	j	.L2ndloop
219.Ldone:
220	lm${g}	%r6,%r8,6*$SIZE_T($sp)
221	br	$rp
222.size	RC4_set_key,.-RC4_set_key
223
224___
225}
226
227# const char *RC4_options()
228$code.=<<___;
229.globl	RC4_options
230.type	RC4_options,\@function
231.align	16
232RC4_options:
233	larl	%r2,.Loptions
234	br	%r14
235.size	RC4_options,.-RC4_options
236.section	.rodata
237.Loptions:
238.align	8
239.string	"rc4(8x,char)"
240___
241
242print $code;
243close STDOUT or die "error closing STDOUT: $!";	# force flush
244