xref: /openssl/crypto/sha/asm/sha1-ppc.pl (revision 33388b44)
1#! /usr/bin/env perl
2# Copyright 2006-2020 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the Apache License 2.0 (the "License").  You may not use
5# this file except in compliance with the License.  You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9
10# ====================================================================
11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12# project. The module is, however, dual licensed under OpenSSL and
13# CRYPTOGAMS licenses depending on where you obtain it. For further
14# details see http://www.openssl.org/~appro/cryptogams/.
15# ====================================================================
16
17# I let hardware handle unaligned input(*), except on page boundaries
18# (see below for details). Otherwise straightforward implementation
19# with X vector in register bank.
20#
21# (*) this means that this module is inappropriate for PPC403? Does
22#     anybody know if pre-POWER3 can sustain unaligned load?
23
24# 			-m64	-m32
25# ----------------------------------
26# PPC970,gcc-4.0.0	+76%	+59%
27# Power6,xlc-7		+68%	+33%
28
29# $output is the last argument if it looks like a file (it has an extension)
30# $flavour is the first argument if it doesn't look like a file
31$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
32$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
33
34if ($flavour =~ /64/) {
35	$SIZE_T	=8;
36	$LRSAVE	=2*$SIZE_T;
37	$UCMP	="cmpld";
38	$STU	="stdu";
39	$POP	="ld";
40	$PUSH	="std";
41} elsif ($flavour =~ /32/) {
42	$SIZE_T	=4;
43	$LRSAVE	=$SIZE_T;
44	$UCMP	="cmplw";
45	$STU	="stwu";
46	$POP	="lwz";
47	$PUSH	="stw";
48} else { die "nonsense $flavour"; }
49
50# Define endianness based on flavour
51# i.e.: linux64le
52$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
53
54$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
55( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
56( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
57die "can't locate ppc-xlate.pl";
58
59open STDOUT,"| $^X $xlate $flavour \"$output\""
60    or die "can't call $xlate: $!";
61
62$FRAME=24*$SIZE_T+64;
63$LOCALS=6*$SIZE_T;
64
65$K  ="r0";
66$sp ="r1";
67$toc="r2";
68$ctx="r3";
69$inp="r4";
70$num="r5";
71$t0 ="r15";
72$t1 ="r6";
73
74$A  ="r7";
75$B  ="r8";
76$C  ="r9";
77$D  ="r10";
78$E  ="r11";
79$T  ="r12";
80
81@V=($A,$B,$C,$D,$E,$T);
82@X=("r16","r17","r18","r19","r20","r21","r22","r23",
83    "r24","r25","r26","r27","r28","r29","r30","r31");
84
85sub loadbe {
86my ($dst, $src, $temp_reg) = @_;
87$code.=<<___ if (!$LITTLE_ENDIAN);
88	lwz	$dst,$src
89___
90$code.=<<___ if ($LITTLE_ENDIAN);
91	lwz	$temp_reg,$src
92	rotlwi	$dst,$temp_reg,8
93	rlwimi	$dst,$temp_reg,24,0,7
94	rlwimi	$dst,$temp_reg,24,16,23
95___
96}
97
98sub BODY_00_19 {
99my ($i,$a,$b,$c,$d,$e,$f)=@_;
100my $j=$i+1;
101
102	# Since the last value of $f is discarded, we can use
103	# it as a temp reg to swap byte-order when needed.
104	loadbe("@X[$i]","`$i*4`($inp)",$f) if ($i==0);
105	loadbe("@X[$j]","`$j*4`($inp)",$f) if ($i<15);
106$code.=<<___ if ($i<15);
107	add	$f,$K,$e
108	rotlwi	$e,$a,5
109	add	$f,$f,@X[$i]
110	and	$t0,$c,$b
111	add	$f,$f,$e
112	andc	$t1,$d,$b
113	rotlwi	$b,$b,30
114	or	$t0,$t0,$t1
115	add	$f,$f,$t0
116___
117$code.=<<___ if ($i>=15);
118	add	$f,$K,$e
119	rotlwi	$e,$a,5
120	xor	@X[$j%16],@X[$j%16],@X[($j+2)%16]
121	add	$f,$f,@X[$i%16]
122	and	$t0,$c,$b
123	xor	@X[$j%16],@X[$j%16],@X[($j+8)%16]
124	add	$f,$f,$e
125	andc	$t1,$d,$b
126	rotlwi	$b,$b,30
127	or	$t0,$t0,$t1
128	xor	@X[$j%16],@X[$j%16],@X[($j+13)%16]
129	add	$f,$f,$t0
130	rotlwi	@X[$j%16],@X[$j%16],1
131___
132}
133
134sub BODY_20_39 {
135my ($i,$a,$b,$c,$d,$e,$f)=@_;
136my $j=$i+1;
137$code.=<<___ if ($i<79);
138	add	$f,$K,$e
139	xor	$t0,$b,$d
140	rotlwi	$e,$a,5
141	xor	@X[$j%16],@X[$j%16],@X[($j+2)%16]
142	add	$f,$f,@X[$i%16]
143	xor	$t0,$t0,$c
144	xor	@X[$j%16],@X[$j%16],@X[($j+8)%16]
145	add	$f,$f,$t0
146	rotlwi	$b,$b,30
147	xor	@X[$j%16],@X[$j%16],@X[($j+13)%16]
148	add	$f,$f,$e
149	rotlwi	@X[$j%16],@X[$j%16],1
150___
151$code.=<<___ if ($i==79);
152	add	$f,$K,$e
153	xor	$t0,$b,$d
154	rotlwi	$e,$a,5
155	lwz	r16,0($ctx)
156	add	$f,$f,@X[$i%16]
157	xor	$t0,$t0,$c
158	lwz	r17,4($ctx)
159	add	$f,$f,$t0
160	rotlwi	$b,$b,30
161	lwz	r18,8($ctx)
162	lwz	r19,12($ctx)
163	add	$f,$f,$e
164	lwz	r20,16($ctx)
165___
166}
167
168sub BODY_40_59 {
169my ($i,$a,$b,$c,$d,$e,$f)=@_;
170my $j=$i+1;
171$code.=<<___;
172	add	$f,$K,$e
173	rotlwi	$e,$a,5
174	xor	@X[$j%16],@X[$j%16],@X[($j+2)%16]
175	add	$f,$f,@X[$i%16]
176	and	$t0,$b,$c
177	xor	@X[$j%16],@X[$j%16],@X[($j+8)%16]
178	add	$f,$f,$e
179	or	$t1,$b,$c
180	rotlwi	$b,$b,30
181	xor	@X[$j%16],@X[$j%16],@X[($j+13)%16]
182	and	$t1,$t1,$d
183	or	$t0,$t0,$t1
184	rotlwi	@X[$j%16],@X[$j%16],1
185	add	$f,$f,$t0
186___
187}
188
189$code=<<___;
190.machine	"any"
191.text
192
193.globl	.sha1_block_data_order
194.align	4
195.sha1_block_data_order:
196	$STU	$sp,-$FRAME($sp)
197	mflr	r0
198	$PUSH	r15,`$FRAME-$SIZE_T*17`($sp)
199	$PUSH	r16,`$FRAME-$SIZE_T*16`($sp)
200	$PUSH	r17,`$FRAME-$SIZE_T*15`($sp)
201	$PUSH	r18,`$FRAME-$SIZE_T*14`($sp)
202	$PUSH	r19,`$FRAME-$SIZE_T*13`($sp)
203	$PUSH	r20,`$FRAME-$SIZE_T*12`($sp)
204	$PUSH	r21,`$FRAME-$SIZE_T*11`($sp)
205	$PUSH	r22,`$FRAME-$SIZE_T*10`($sp)
206	$PUSH	r23,`$FRAME-$SIZE_T*9`($sp)
207	$PUSH	r24,`$FRAME-$SIZE_T*8`($sp)
208	$PUSH	r25,`$FRAME-$SIZE_T*7`($sp)
209	$PUSH	r26,`$FRAME-$SIZE_T*6`($sp)
210	$PUSH	r27,`$FRAME-$SIZE_T*5`($sp)
211	$PUSH	r28,`$FRAME-$SIZE_T*4`($sp)
212	$PUSH	r29,`$FRAME-$SIZE_T*3`($sp)
213	$PUSH	r30,`$FRAME-$SIZE_T*2`($sp)
214	$PUSH	r31,`$FRAME-$SIZE_T*1`($sp)
215	$PUSH	r0,`$FRAME+$LRSAVE`($sp)
216	lwz	$A,0($ctx)
217	lwz	$B,4($ctx)
218	lwz	$C,8($ctx)
219	lwz	$D,12($ctx)
220	lwz	$E,16($ctx)
221	andi.	r0,$inp,3
222	bne	Lunaligned
223Laligned:
224	mtctr	$num
225	bl	Lsha1_block_private
226	b	Ldone
227
228; PowerPC specification allows an implementation to be ill-behaved
229; upon unaligned access which crosses page boundary. "Better safe
230; than sorry" principle makes me treat it specially. But I don't
231; look for particular offending word, but rather for 64-byte input
232; block which crosses the boundary. Once found that block is aligned
233; and hashed separately...
234.align	4
235Lunaligned:
236	subfic	$t1,$inp,4096
237	andi.	$t1,$t1,4095	; distance to closest page boundary
238	srwi.	$t1,$t1,6	; t1/=64
239	beq	Lcross_page
240	$UCMP	$num,$t1
241	ble	Laligned	; didn't cross the page boundary
242	mtctr	$t1
243	subfc	$num,$t1,$num
244	bl	Lsha1_block_private
245Lcross_page:
246	li	$t1,16
247	mtctr	$t1
248	addi	r20,$sp,$LOCALS	; spot within the frame
249Lmemcpy:
250	lbz	r16,0($inp)
251	lbz	r17,1($inp)
252	lbz	r18,2($inp)
253	lbz	r19,3($inp)
254	addi	$inp,$inp,4
255	stb	r16,0(r20)
256	stb	r17,1(r20)
257	stb	r18,2(r20)
258	stb	r19,3(r20)
259	addi	r20,r20,4
260	bdnz	Lmemcpy
261
262	$PUSH	$inp,`$FRAME-$SIZE_T*18`($sp)
263	li	$t1,1
264	addi	$inp,$sp,$LOCALS
265	mtctr	$t1
266	bl	Lsha1_block_private
267	$POP	$inp,`$FRAME-$SIZE_T*18`($sp)
268	addic.	$num,$num,-1
269	bne	Lunaligned
270
271Ldone:
272	$POP	r0,`$FRAME+$LRSAVE`($sp)
273	$POP	r15,`$FRAME-$SIZE_T*17`($sp)
274	$POP	r16,`$FRAME-$SIZE_T*16`($sp)
275	$POP	r17,`$FRAME-$SIZE_T*15`($sp)
276	$POP	r18,`$FRAME-$SIZE_T*14`($sp)
277	$POP	r19,`$FRAME-$SIZE_T*13`($sp)
278	$POP	r20,`$FRAME-$SIZE_T*12`($sp)
279	$POP	r21,`$FRAME-$SIZE_T*11`($sp)
280	$POP	r22,`$FRAME-$SIZE_T*10`($sp)
281	$POP	r23,`$FRAME-$SIZE_T*9`($sp)
282	$POP	r24,`$FRAME-$SIZE_T*8`($sp)
283	$POP	r25,`$FRAME-$SIZE_T*7`($sp)
284	$POP	r26,`$FRAME-$SIZE_T*6`($sp)
285	$POP	r27,`$FRAME-$SIZE_T*5`($sp)
286	$POP	r28,`$FRAME-$SIZE_T*4`($sp)
287	$POP	r29,`$FRAME-$SIZE_T*3`($sp)
288	$POP	r30,`$FRAME-$SIZE_T*2`($sp)
289	$POP	r31,`$FRAME-$SIZE_T*1`($sp)
290	mtlr	r0
291	addi	$sp,$sp,$FRAME
292	blr
293	.long	0
294	.byte	0,12,4,1,0x80,18,3,0
295	.long	0
296___
297
298# This is private block function, which uses tailored calling
299# interface, namely upon entry SHA_CTX is pre-loaded to given
300# registers and counter register contains amount of chunks to
301# digest...
302$code.=<<___;
303.align	4
304Lsha1_block_private:
305___
306$code.=<<___;	# load K_00_19
307	lis	$K,0x5a82
308	ori	$K,$K,0x7999
309___
310for($i=0;$i<20;$i++)	{ &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
311$code.=<<___;	# load K_20_39
312	lis	$K,0x6ed9
313	ori	$K,$K,0xeba1
314___
315for(;$i<40;$i++)	{ &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
316$code.=<<___;	# load K_40_59
317	lis	$K,0x8f1b
318	ori	$K,$K,0xbcdc
319___
320for(;$i<60;$i++)	{ &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
321$code.=<<___;	# load K_60_79
322	lis	$K,0xca62
323	ori	$K,$K,0xc1d6
324___
325for(;$i<80;$i++)	{ &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
326$code.=<<___;
327	add	r16,r16,$E
328	add	r17,r17,$T
329	add	r18,r18,$A
330	add	r19,r19,$B
331	add	r20,r20,$C
332	stw	r16,0($ctx)
333	mr	$A,r16
334	stw	r17,4($ctx)
335	mr	$B,r17
336	stw	r18,8($ctx)
337	mr	$C,r18
338	stw	r19,12($ctx)
339	mr	$D,r19
340	stw	r20,16($ctx)
341	mr	$E,r20
342	addi	$inp,$inp,`16*4`
343	bdnz	Lsha1_block_private
344	blr
345	.long	0
346	.byte	0,12,0x14,0,0,0,0,0
347.size	.sha1_block_data_order,.-.sha1_block_data_order
348___
349$code.=<<___;
350.asciz	"SHA1 block transform for PPC, CRYPTOGAMS by <appro\@fy.chalmers.se>"
351___
352
353$code =~ s/\`([^\`]*)\`/eval $1/gem;
354print $code;
355close STDOUT or die "error closing STDOUT: $!";
356