xref: /openssl/crypto/sha/asm/sha1-alpha.pl (revision 33388b44)
1#! /usr/bin/env perl
2# Copyright 2009-2020 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the Apache License 2.0 (the "License").  You may not use
5# this file except in compliance with the License.  You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9
10# ====================================================================
11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12# project. The module is, however, dual licensed under OpenSSL and
13# CRYPTOGAMS licenses depending on where you obtain it. For further
14# details see http://www.openssl.org/~appro/cryptogams/.
15# ====================================================================
16
17# SHA1 block procedure for Alpha.
18
19# On 21264 performance is 33% better than code generated by vendor
20# compiler, and 75% better than GCC [3.4], and in absolute terms is
21# 8.7 cycles per processed byte. Implementation features vectorized
22# byte swap, but not Xupdate.
23
24@X=(	"\$0",	"\$1",	"\$2",	"\$3",	"\$4",	"\$5",	"\$6",	"\$7",
25	"\$8",	"\$9",	"\$10",	"\$11",	"\$12",	"\$13",	"\$14",	"\$15");
26$ctx="a0";	# $16
27$inp="a1";
28$num="a2";
29$A="a3";
30$B="a4";	# 20
31$C="a5";
32$D="t8";
33$E="t9";	@V=($A,$B,$C,$D,$E);
34$t0="t10";	# 24
35$t1="t11";
36$t2="ra";
37$t3="t12";
38$K="AT";	# 28
39
40sub BODY_00_19 {
41my ($i,$a,$b,$c,$d,$e)=@_;
42my $j=$i+1;
43$code.=<<___ if ($i==0);
44	ldq_u	@X[0],0+0($inp)
45	ldq_u	@X[1],0+7($inp)
46___
47$code.=<<___ if (!($i&1) && $i<14);
48	ldq_u	@X[$i+2],($i+2)*4+0($inp)
49	ldq_u	@X[$i+3],($i+2)*4+7($inp)
50___
51$code.=<<___ if (!($i&1) && $i<15);
52	extql	@X[$i],$inp,@X[$i]
53	extqh	@X[$i+1],$inp,@X[$i+1]
54
55	or	@X[$i+1],@X[$i],@X[$i]	# pair of 32-bit values are fetched
56
57	srl	@X[$i],24,$t0		# vectorized byte swap
58	srl	@X[$i],8,$t2
59
60	sll	@X[$i],8,$t3
61	sll	@X[$i],24,@X[$i]
62	zapnot	$t0,0x11,$t0
63	zapnot	$t2,0x22,$t2
64
65	zapnot	@X[$i],0x88,@X[$i]
66	or	$t0,$t2,$t0
67	zapnot	$t3,0x44,$t3
68	sll	$a,5,$t1
69
70	or	@X[$i],$t0,@X[$i]
71	addl	$K,$e,$e
72	and	$b,$c,$t2
73	zapnot	$a,0xf,$a
74
75	or	@X[$i],$t3,@X[$i]
76	srl	$a,27,$t0
77	bic	$d,$b,$t3
78	sll	$b,30,$b
79
80	extll	@X[$i],4,@X[$i+1]	# extract upper half
81	or	$t2,$t3,$t2
82	addl	@X[$i],$e,$e
83
84	addl	$t1,$e,$e
85	srl	$b,32,$t3
86	zapnot	@X[$i],0xf,@X[$i]
87
88	addl	$t0,$e,$e
89	addl	$t2,$e,$e
90	or	$t3,$b,$b
91___
92$code.=<<___ if (($i&1) && $i<15);
93	sll	$a,5,$t1
94	addl	$K,$e,$e
95	and	$b,$c,$t2
96	zapnot	$a,0xf,$a
97
98	srl	$a,27,$t0
99	addl	@X[$i%16],$e,$e
100	bic	$d,$b,$t3
101	sll	$b,30,$b
102
103	or	$t2,$t3,$t2
104	addl	$t1,$e,$e
105	srl	$b,32,$t3
106	zapnot	@X[$i],0xf,@X[$i]
107
108	addl	$t0,$e,$e
109	addl	$t2,$e,$e
110	or	$t3,$b,$b
111___
112$code.=<<___ if ($i>=15);	# with forward Xupdate
113	sll	$a,5,$t1
114	addl	$K,$e,$e
115	and	$b,$c,$t2
116	xor	@X[($j+2)%16],@X[$j%16],@X[$j%16]
117
118	zapnot	$a,0xf,$a
119	addl	@X[$i%16],$e,$e
120	bic	$d,$b,$t3
121	xor	@X[($j+8)%16],@X[$j%16],@X[$j%16]
122
123	srl	$a,27,$t0
124	addl	$t1,$e,$e
125	or	$t2,$t3,$t2
126	xor	@X[($j+13)%16],@X[$j%16],@X[$j%16]
127
128	sll	$b,30,$b
129	addl	$t0,$e,$e
130	srl	@X[$j%16],31,$t1
131
132	addl	$t2,$e,$e
133	srl	$b,32,$t3
134	addl	@X[$j%16],@X[$j%16],@X[$j%16]
135
136	or	$t3,$b,$b
137	zapnot	@X[$i%16],0xf,@X[$i%16]
138	or	$t1,@X[$j%16],@X[$j%16]
139___
140}
141
142sub BODY_20_39 {
143my ($i,$a,$b,$c,$d,$e)=@_;
144my $j=$i+1;
145$code.=<<___ if ($i<79);	# with forward Xupdate
146	sll	$a,5,$t1
147	addl	$K,$e,$e
148	zapnot	$a,0xf,$a
149	xor	@X[($j+2)%16],@X[$j%16],@X[$j%16]
150
151	sll	$b,30,$t3
152	addl	$t1,$e,$e
153	xor	$b,$c,$t2
154	xor	@X[($j+8)%16],@X[$j%16],@X[$j%16]
155
156	srl	$b,2,$b
157	addl	@X[$i%16],$e,$e
158	xor	$d,$t2,$t2
159	xor	@X[($j+13)%16],@X[$j%16],@X[$j%16]
160
161	srl	@X[$j%16],31,$t1
162	addl	$t2,$e,$e
163	srl	$a,27,$t0
164	addl	@X[$j%16],@X[$j%16],@X[$j%16]
165
166	or	$t3,$b,$b
167	addl	$t0,$e,$e
168	or	$t1,@X[$j%16],@X[$j%16]
169___
170$code.=<<___ if ($i<77);
171	zapnot	@X[$i%16],0xf,@X[$i%16]
172___
173$code.=<<___ if ($i==79);	# with context fetch
174	sll	$a,5,$t1
175	addl	$K,$e,$e
176	zapnot	$a,0xf,$a
177	ldl	@X[0],0($ctx)
178
179	sll	$b,30,$t3
180	addl	$t1,$e,$e
181	xor	$b,$c,$t2
182	ldl	@X[1],4($ctx)
183
184	srl	$b,2,$b
185	addl	@X[$i%16],$e,$e
186	xor	$d,$t2,$t2
187	ldl	@X[2],8($ctx)
188
189	srl	$a,27,$t0
190	addl	$t2,$e,$e
191	ldl	@X[3],12($ctx)
192
193	or	$t3,$b,$b
194	addl	$t0,$e,$e
195	ldl	@X[4],16($ctx)
196___
197}
198
199sub BODY_40_59 {
200my ($i,$a,$b,$c,$d,$e)=@_;
201my $j=$i+1;
202$code.=<<___;	# with forward Xupdate
203	sll	$a,5,$t1
204	addl	$K,$e,$e
205	zapnot	$a,0xf,$a
206	xor	@X[($j+2)%16],@X[$j%16],@X[$j%16]
207
208	srl	$a,27,$t0
209	and	$b,$c,$t2
210	and	$b,$d,$t3
211	xor	@X[($j+8)%16],@X[$j%16],@X[$j%16]
212
213	sll	$b,30,$b
214	addl	$t1,$e,$e
215	xor	@X[($j+13)%16],@X[$j%16],@X[$j%16]
216
217	srl	@X[$j%16],31,$t1
218	addl	$t0,$e,$e
219	or	$t2,$t3,$t2
220	and	$c,$d,$t3
221
222	or	$t2,$t3,$t2
223	srl	$b,32,$t3
224	addl	@X[$i%16],$e,$e
225	addl	@X[$j%16],@X[$j%16],@X[$j%16]
226
227	or	$t3,$b,$b
228	addl	$t2,$e,$e
229	or	$t1,@X[$j%16],@X[$j%16]
230	zapnot	@X[$i%16],0xf,@X[$i%16]
231___
232}
233
234$code=<<___;
235#ifdef __linux__
236#include <asm/regdef.h>
237#else
238#include <asm.h>
239#include <regdef.h>
240#endif
241
242.text
243
244.set	noat
245.set	noreorder
246.globl	sha1_block_data_order
247.align	5
248.ent	sha1_block_data_order
249sha1_block_data_order:
250	lda	sp,-64(sp)
251	stq	ra,0(sp)
252	stq	s0,8(sp)
253	stq	s1,16(sp)
254	stq	s2,24(sp)
255	stq	s3,32(sp)
256	stq	s4,40(sp)
257	stq	s5,48(sp)
258	stq	fp,56(sp)
259	.mask	0x0400fe00,-64
260	.frame	sp,64,ra
261	.prologue 0
262
263	ldl	$A,0($ctx)
264	ldl	$B,4($ctx)
265	sll	$num,6,$num
266	ldl	$C,8($ctx)
267	ldl	$D,12($ctx)
268	ldl	$E,16($ctx)
269	addq	$inp,$num,$num
270
271.Lloop:
272	.set	noreorder
273	ldah	$K,23170(zero)
274	zapnot	$B,0xf,$B
275	lda	$K,31129($K)	# K_00_19
276___
277for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
278
279$code.=<<___;
280	ldah	$K,28378(zero)
281	lda	$K,-5215($K)	# K_20_39
282___
283for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
284
285$code.=<<___;
286	ldah	$K,-28900(zero)
287	lda	$K,-17188($K)	# K_40_59
288___
289for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
290
291$code.=<<___;
292	ldah	$K,-13725(zero)
293	lda	$K,-15914($K)	# K_60_79
294___
295for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
296
297$code.=<<___;
298	addl	@X[0],$A,$A
299	addl	@X[1],$B,$B
300	addl	@X[2],$C,$C
301	addl	@X[3],$D,$D
302	addl	@X[4],$E,$E
303	stl	$A,0($ctx)
304	stl	$B,4($ctx)
305	addq	$inp,64,$inp
306	stl	$C,8($ctx)
307	stl	$D,12($ctx)
308	stl	$E,16($ctx)
309	cmpult	$inp,$num,$t1
310	bne	$t1,.Lloop
311
312	.set	noreorder
313	ldq	ra,0(sp)
314	ldq	s0,8(sp)
315	ldq	s1,16(sp)
316	ldq	s2,24(sp)
317	ldq	s3,32(sp)
318	ldq	s4,40(sp)
319	ldq	s5,48(sp)
320	ldq	fp,56(sp)
321	lda	sp,64(sp)
322	ret	(ra)
323.end	sha1_block_data_order
324.ascii	"SHA1 block transform for Alpha, CRYPTOGAMS by <appro\@openssl.org>"
325.align	2
326___
327$output=pop and open STDOUT,">$output";
328print $code;
329close STDOUT or die "error closing STDOUT: $!";
330