xref: /openssl/crypto/des/asm/dest4-sparcv9.pl (revision 54b40531)
1#! /usr/bin/env perl
2# Copyright 2013-2021 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the Apache License 2.0 (the "License").  You may not use
5# this file except in compliance with the License.  You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9
10# ====================================================================
11# Written by David S. Miller and Andy Polyakov.
12# The module is licensed under 2-clause BSD
13# license. March 2013. All rights reserved.
14# ====================================================================
15
16######################################################################
17# DES for SPARC T4.
18#
19# As with other hardware-assisted ciphers CBC encrypt results [for
20# aligned data] are virtually identical to critical path lengths:
21#
22#		DES		Triple-DES
23# CBC encrypt	4.14/4.15(*)	11.7/11.7
24# CBC decrypt	1.77/4.11(**)	6.42/7.47
25#
26#			 (*)	numbers after slash are for
27#				misaligned data;
28#			 (**)	this is result for largest
29#				block size, unlike all other
30#				cases smaller blocks results
31#				are better[?];
32
33$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
34push(@INC,"${dir}","${dir}../../perlasm");
35require "sparcv9_modes.pl";
36
37$output=pop and open STDOUT,">$output";
38
39$code.=<<___;
40#ifndef __ASSEMBLER__
41# define __ASSEMBLER__ 1
42#endif
43#include "crypto/sparc_arch.h"
44
45#ifdef	__arch64__
46.register       %g2,#scratch
47.register       %g3,#scratch
48#endif
49
50.text
51___
52
53{ my ($inp,$out)=("%o0","%o1");
54
55$code.=<<___;
56.align	32
57.globl	des_t4_key_expand
58.type	des_t4_key_expand,#function
59des_t4_key_expand:
60	andcc		$inp, 0x7, %g0
61	alignaddr	$inp, %g0, $inp
62	bz,pt		%icc, 1f
63	ldd		[$inp + 0x00], %f0
64	ldd		[$inp + 0x08], %f2
65	faligndata	%f0, %f2, %f0
661:	des_kexpand	%f0, 0, %f0
67	des_kexpand	%f0, 1, %f2
68	std		%f0, [$out + 0x00]
69	des_kexpand	%f2, 3, %f6
70	std		%f2, [$out + 0x08]
71	des_kexpand	%f2, 2, %f4
72	des_kexpand	%f6, 3, %f10
73	std		%f6, [$out + 0x18]
74	des_kexpand	%f6, 2, %f8
75	std		%f4, [$out + 0x10]
76	des_kexpand	%f10, 3, %f14
77	std		%f10, [$out + 0x28]
78	des_kexpand	%f10, 2, %f12
79	std		%f8, [$out + 0x20]
80	des_kexpand	%f14, 1, %f16
81	std		%f14, [$out + 0x38]
82	des_kexpand	%f16, 3, %f20
83	std		%f12, [$out + 0x30]
84	des_kexpand	%f16, 2, %f18
85	std		%f16, [$out + 0x40]
86	des_kexpand	%f20, 3, %f24
87	std		%f20, [$out + 0x50]
88	des_kexpand	%f20, 2, %f22
89	std		%f18, [$out + 0x48]
90	des_kexpand	%f24, 3, %f28
91	std		%f24, [$out + 0x60]
92	des_kexpand	%f24, 2, %f26
93	std		%f22, [$out + 0x58]
94	des_kexpand	%f28, 1, %f30
95	std		%f28, [$out + 0x70]
96	std		%f26, [$out + 0x68]
97	retl
98	std		%f30, [$out + 0x78]
99.size	des_t4_key_expand,.-des_t4_key_expand
100___
101}
102{ my ($inp,$out,$len,$key,$ivec) = map("%o$_",(0..4));
103  my ($ileft,$iright,$omask) = map("%g$_",(1..3));
104
105$code.=<<___;
106.globl	des_t4_cbc_encrypt
107.align	32
108des_t4_cbc_encrypt:
109	cmp		$len, 0
110	be,pn		$::size_t_cc, .Lcbc_abort
111	srln		$len, 0, $len		! needed on v8+, "nop" on v9
112	ld		[$ivec + 0], %f0	! load ivec
113	ld		[$ivec + 4], %f1
114
115	and		$inp, 7, $ileft
116	andn		$inp, 7, $inp
117	sll		$ileft, 3, $ileft
118	mov		0xff, $omask
119	prefetch	[$inp], 20
120	prefetch	[$inp + 63], 20
121	sub		%g0, $ileft, $iright
122	and		$out, 7, %g4
123	alignaddrl	$out, %g0, $out
124	srl		$omask, %g4, $omask
125	srlx		$len, 3, $len
126	movrz		%g4, 0, $omask
127	prefetch	[$out], 22
128
129	ldd		[$key + 0x00], %f4	! load key schedule
130	ldd		[$key + 0x08], %f6
131	ldd		[$key + 0x10], %f8
132	ldd		[$key + 0x18], %f10
133	ldd		[$key + 0x20], %f12
134	ldd		[$key + 0x28], %f14
135	ldd		[$key + 0x30], %f16
136	ldd		[$key + 0x38], %f18
137	ldd		[$key + 0x40], %f20
138	ldd		[$key + 0x48], %f22
139	ldd		[$key + 0x50], %f24
140	ldd		[$key + 0x58], %f26
141	ldd		[$key + 0x60], %f28
142	ldd		[$key + 0x68], %f30
143	ldd		[$key + 0x70], %f32
144	ldd		[$key + 0x78], %f34
145
146.Ldes_cbc_enc_loop:
147	ldx		[$inp + 0], %g4
148	brz,pt		$ileft, 4f
149	nop
150
151	ldx		[$inp + 8], %g5
152	sllx		%g4, $ileft, %g4
153	srlx		%g5, $iright, %g5
154	or		%g5, %g4, %g4
1554:
156	movxtod		%g4, %f2
157	prefetch	[$inp + 8+63], 20
158	add		$inp, 8, $inp
159	fxor		%f2, %f0, %f0		! ^= ivec
160	prefetch	[$out + 63], 22
161
162	des_ip		%f0, %f0
163	des_round	%f4, %f6, %f0, %f0
164	des_round	%f8, %f10, %f0, %f0
165	des_round	%f12, %f14, %f0, %f0
166	des_round	%f16, %f18, %f0, %f0
167	des_round	%f20, %f22, %f0, %f0
168	des_round	%f24, %f26, %f0, %f0
169	des_round	%f28, %f30, %f0, %f0
170	des_round	%f32, %f34, %f0, %f0
171	des_iip		%f0, %f0
172
173	brnz,pn		$omask, 2f
174	sub		$len, 1, $len
175
176	std		%f0, [$out + 0]
177	brnz,pt		$len, .Ldes_cbc_enc_loop
178	add		$out, 8, $out
179
180	st		%f0, [$ivec + 0]	! write out ivec
181	retl
182	st		%f1, [$ivec + 4]
183.Lcbc_abort:
184	retl
185	nop
186
187.align	16
1882:	ldxa		[$inp]0x82, %g4		! avoid read-after-write hazard
189						! and ~4x deterioration
190						! in inp==out case
191	faligndata	%f0, %f0, %f2		! handle unaligned output
192
193	stda		%f2, [$out + $omask]0xc0	! partial store
194	add		$out, 8, $out
195	orn		%g0, $omask, $omask
196	stda		%f2, [$out + $omask]0xc0	! partial store
197
198	brnz,pt		$len, .Ldes_cbc_enc_loop+4
199	orn		%g0, $omask, $omask
200
201	st		%f0, [$ivec + 0]	! write out ivec
202	retl
203	st		%f1, [$ivec + 4]
204.type	des_t4_cbc_encrypt,#function
205.size	des_t4_cbc_encrypt,.-des_t4_cbc_encrypt
206
207.globl	des_t4_cbc_decrypt
208.align	32
209des_t4_cbc_decrypt:
210	cmp		$len, 0
211	be,pn		$::size_t_cc, .Lcbc_abort
212	srln		$len, 0, $len		! needed on v8+, "nop" on v9
213	ld		[$ivec + 0], %f2	! load ivec
214	ld		[$ivec + 4], %f3
215
216	and		$inp, 7, $ileft
217	andn		$inp, 7, $inp
218	sll		$ileft, 3, $ileft
219	mov		0xff, $omask
220	prefetch	[$inp], 20
221	prefetch	[$inp + 63], 20
222	sub		%g0, $ileft, $iright
223	and		$out, 7, %g4
224	alignaddrl	$out, %g0, $out
225	srl		$omask, %g4, $omask
226	srlx		$len, 3, $len
227	movrz		%g4, 0, $omask
228	prefetch	[$out], 22
229
230	ldd		[$key + 0x78], %f4	! load key schedule
231	ldd		[$key + 0x70], %f6
232	ldd		[$key + 0x68], %f8
233	ldd		[$key + 0x60], %f10
234	ldd		[$key + 0x58], %f12
235	ldd		[$key + 0x50], %f14
236	ldd		[$key + 0x48], %f16
237	ldd		[$key + 0x40], %f18
238	ldd		[$key + 0x38], %f20
239	ldd		[$key + 0x30], %f22
240	ldd		[$key + 0x28], %f24
241	ldd		[$key + 0x20], %f26
242	ldd		[$key + 0x18], %f28
243	ldd		[$key + 0x10], %f30
244	ldd		[$key + 0x08], %f32
245	ldd		[$key + 0x00], %f34
246
247.Ldes_cbc_dec_loop:
248	ldx		[$inp + 0], %g4
249	brz,pt		$ileft, 4f
250	nop
251
252	ldx		[$inp + 8], %g5
253	sllx		%g4, $ileft, %g4
254	srlx		%g5, $iright, %g5
255	or		%g5, %g4, %g4
2564:
257	movxtod		%g4, %f0
258	prefetch	[$inp + 8+63], 20
259	add		$inp, 8, $inp
260	prefetch	[$out + 63], 22
261
262	des_ip		%f0, %f0
263	des_round	%f4, %f6, %f0, %f0
264	des_round	%f8, %f10, %f0, %f0
265	des_round	%f12, %f14, %f0, %f0
266	des_round	%f16, %f18, %f0, %f0
267	des_round	%f20, %f22, %f0, %f0
268	des_round	%f24, %f26, %f0, %f0
269	des_round	%f28, %f30, %f0, %f0
270	des_round	%f32, %f34, %f0, %f0
271	des_iip		%f0, %f0
272
273	fxor		%f2, %f0, %f0		! ^= ivec
274	movxtod		%g4, %f2
275
276	brnz,pn		$omask, 2f
277	sub		$len, 1, $len
278
279	std		%f0, [$out + 0]
280	brnz,pt		$len, .Ldes_cbc_dec_loop
281	add		$out, 8, $out
282
283	st		%f2, [$ivec + 0]	! write out ivec
284	retl
285	st		%f3, [$ivec + 4]
286
287.align	16
2882:	ldxa		[$inp]0x82, %g4		! avoid read-after-write hazard
289						! and ~4x deterioration
290						! in inp==out case
291	faligndata	%f0, %f0, %f0		! handle unaligned output
292
293	stda		%f0, [$out + $omask]0xc0	! partial store
294	add		$out, 8, $out
295	orn		%g0, $omask, $omask
296	stda		%f0, [$out + $omask]0xc0	! partial store
297
298	brnz,pt		$len, .Ldes_cbc_dec_loop+4
299	orn		%g0, $omask, $omask
300
301	st		%f2, [$ivec + 0]	! write out ivec
302	retl
303	st		%f3, [$ivec + 4]
304.type	des_t4_cbc_decrypt,#function
305.size	des_t4_cbc_decrypt,.-des_t4_cbc_decrypt
306___
307
308# One might wonder why does one have back-to-back des_iip/des_ip
309# pairs between EDE passes. Indeed, aren't they inverse of each other?
310# They almost are. Outcome of the pair is 32-bit words being swapped
311# in target register. Consider pair of des_iip/des_ip as a way to
312# perform the due swap, it's actually fastest way in this case.
313
314$code.=<<___;
315.globl	des_t4_ede3_cbc_encrypt
316.align	32
317des_t4_ede3_cbc_encrypt:
318	cmp		$len, 0
319	be,pn		$::size_t_cc, .Lcbc_abort
320	srln		$len, 0, $len		! needed on v8+, "nop" on v9
321	ld		[$ivec + 0], %f0	! load ivec
322	ld		[$ivec + 4], %f1
323
324	and		$inp, 7, $ileft
325	andn		$inp, 7, $inp
326	sll		$ileft, 3, $ileft
327	mov		0xff, $omask
328	prefetch	[$inp], 20
329	prefetch	[$inp + 63], 20
330	sub		%g0, $ileft, $iright
331	and		$out, 7, %g4
332	alignaddrl	$out, %g0, $out
333	srl		$omask, %g4, $omask
334	srlx		$len, 3, $len
335	movrz		%g4, 0, $omask
336	prefetch	[$out], 22
337
338	ldd		[$key + 0x00], %f4	! load key schedule
339	ldd		[$key + 0x08], %f6
340	ldd		[$key + 0x10], %f8
341	ldd		[$key + 0x18], %f10
342	ldd		[$key + 0x20], %f12
343	ldd		[$key + 0x28], %f14
344	ldd		[$key + 0x30], %f16
345	ldd		[$key + 0x38], %f18
346	ldd		[$key + 0x40], %f20
347	ldd		[$key + 0x48], %f22
348	ldd		[$key + 0x50], %f24
349	ldd		[$key + 0x58], %f26
350	ldd		[$key + 0x60], %f28
351	ldd		[$key + 0x68], %f30
352	ldd		[$key + 0x70], %f32
353	ldd		[$key + 0x78], %f34
354
355.Ldes_ede3_cbc_enc_loop:
356	ldx		[$inp + 0], %g4
357	brz,pt		$ileft, 4f
358	nop
359
360	ldx		[$inp + 8], %g5
361	sllx		%g4, $ileft, %g4
362	srlx		%g5, $iright, %g5
363	or		%g5, %g4, %g4
3644:
365	movxtod		%g4, %f2
366	prefetch	[$inp + 8+63], 20
367	add		$inp, 8, $inp
368	fxor		%f2, %f0, %f0		! ^= ivec
369	prefetch	[$out + 63], 22
370
371	des_ip		%f0, %f0
372	des_round	%f4, %f6, %f0, %f0
373	des_round	%f8, %f10, %f0, %f0
374	des_round	%f12, %f14, %f0, %f0
375	des_round	%f16, %f18, %f0, %f0
376	ldd		[$key + 0x100-0x08], %f36
377	ldd		[$key + 0x100-0x10], %f38
378	des_round	%f20, %f22, %f0, %f0
379	ldd		[$key + 0x100-0x18], %f40
380	ldd		[$key + 0x100-0x20], %f42
381	des_round	%f24, %f26, %f0, %f0
382	ldd		[$key + 0x100-0x28], %f44
383	ldd		[$key + 0x100-0x30], %f46
384	des_round	%f28, %f30, %f0, %f0
385	ldd		[$key + 0x100-0x38], %f48
386	ldd		[$key + 0x100-0x40], %f50
387	des_round	%f32, %f34, %f0, %f0
388	ldd		[$key + 0x100-0x48], %f52
389	ldd		[$key + 0x100-0x50], %f54
390	des_iip		%f0, %f0
391
392	ldd		[$key + 0x100-0x58], %f56
393	ldd		[$key + 0x100-0x60], %f58
394	des_ip		%f0, %f0
395	ldd		[$key + 0x100-0x68], %f60
396	ldd		[$key + 0x100-0x70], %f62
397	des_round	%f36, %f38, %f0, %f0
398	ldd		[$key + 0x100-0x78], %f36
399	ldd		[$key + 0x100-0x80], %f38
400	des_round	%f40, %f42, %f0, %f0
401	des_round	%f44, %f46, %f0, %f0
402	des_round	%f48, %f50, %f0, %f0
403	ldd		[$key + 0x100+0x00], %f40
404	ldd		[$key + 0x100+0x08], %f42
405	des_round	%f52, %f54, %f0, %f0
406	ldd		[$key + 0x100+0x10], %f44
407	ldd		[$key + 0x100+0x18], %f46
408	des_round	%f56, %f58, %f0, %f0
409	ldd		[$key + 0x100+0x20], %f48
410	ldd		[$key + 0x100+0x28], %f50
411	des_round	%f60, %f62, %f0, %f0
412	ldd		[$key + 0x100+0x30], %f52
413	ldd		[$key + 0x100+0x38], %f54
414	des_round	%f36, %f38, %f0, %f0
415	ldd		[$key + 0x100+0x40], %f56
416	ldd		[$key + 0x100+0x48], %f58
417	des_iip		%f0, %f0
418
419	ldd		[$key + 0x100+0x50], %f60
420	ldd		[$key + 0x100+0x58], %f62
421	des_ip		%f0, %f0
422	ldd		[$key + 0x100+0x60], %f36
423	ldd		[$key + 0x100+0x68], %f38
424	des_round	%f40, %f42, %f0, %f0
425	ldd		[$key + 0x100+0x70], %f40
426	ldd		[$key + 0x100+0x78], %f42
427	des_round	%f44, %f46, %f0, %f0
428	des_round	%f48, %f50, %f0, %f0
429	des_round	%f52, %f54, %f0, %f0
430	des_round	%f56, %f58, %f0, %f0
431	des_round	%f60, %f62, %f0, %f0
432	des_round	%f36, %f38, %f0, %f0
433	des_round	%f40, %f42, %f0, %f0
434	des_iip		%f0, %f0
435
436	brnz,pn		$omask, 2f
437	sub		$len, 1, $len
438
439	std		%f0, [$out + 0]
440	brnz,pt		$len, .Ldes_ede3_cbc_enc_loop
441	add		$out, 8, $out
442
443	st		%f0, [$ivec + 0]	! write out ivec
444	retl
445	st		%f1, [$ivec + 4]
446
447.align	16
4482:	ldxa		[$inp]0x82, %g4		! avoid read-after-write hazard
449						! and ~2x deterioration
450						! in inp==out case
451	faligndata	%f0, %f0, %f2		! handle unaligned output
452
453	stda		%f2, [$out + $omask]0xc0	! partial store
454	add		$out, 8, $out
455	orn		%g0, $omask, $omask
456	stda		%f2, [$out + $omask]0xc0	! partial store
457
458	brnz,pt		$len, .Ldes_ede3_cbc_enc_loop+4
459	orn		%g0, $omask, $omask
460
461	st		%f0, [$ivec + 0]	! write out ivec
462	retl
463	st		%f1, [$ivec + 4]
464.type	des_t4_ede3_cbc_encrypt,#function
465.size	des_t4_ede3_cbc_encrypt,.-des_t4_ede3_cbc_encrypt
466
467.globl	des_t4_ede3_cbc_decrypt
468.align	32
469des_t4_ede3_cbc_decrypt:
470	cmp		$len, 0
471	be,pn		$::size_t_cc, .Lcbc_abort
472	srln		$len, 0, $len		! needed on v8+, "nop" on v9
473	ld		[$ivec + 0], %f2	! load ivec
474	ld		[$ivec + 4], %f3
475
476	and		$inp, 7, $ileft
477	andn		$inp, 7, $inp
478	sll		$ileft, 3, $ileft
479	mov		0xff, $omask
480	prefetch	[$inp], 20
481	prefetch	[$inp + 63], 20
482	sub		%g0, $ileft, $iright
483	and		$out, 7, %g4
484	alignaddrl	$out, %g0, $out
485	srl		$omask, %g4, $omask
486	srlx		$len, 3, $len
487	movrz		%g4, 0, $omask
488	prefetch	[$out], 22
489
490	ldd		[$key + 0x100+0x78], %f4	! load key schedule
491	ldd		[$key + 0x100+0x70], %f6
492	ldd		[$key + 0x100+0x68], %f8
493	ldd		[$key + 0x100+0x60], %f10
494	ldd		[$key + 0x100+0x58], %f12
495	ldd		[$key + 0x100+0x50], %f14
496	ldd		[$key + 0x100+0x48], %f16
497	ldd		[$key + 0x100+0x40], %f18
498	ldd		[$key + 0x100+0x38], %f20
499	ldd		[$key + 0x100+0x30], %f22
500	ldd		[$key + 0x100+0x28], %f24
501	ldd		[$key + 0x100+0x20], %f26
502	ldd		[$key + 0x100+0x18], %f28
503	ldd		[$key + 0x100+0x10], %f30
504	ldd		[$key + 0x100+0x08], %f32
505	ldd		[$key + 0x100+0x00], %f34
506
507.Ldes_ede3_cbc_dec_loop:
508	ldx		[$inp + 0], %g4
509	brz,pt		$ileft, 4f
510	nop
511
512	ldx		[$inp + 8], %g5
513	sllx		%g4, $ileft, %g4
514	srlx		%g5, $iright, %g5
515	or		%g5, %g4, %g4
5164:
517	movxtod		%g4, %f0
518	prefetch	[$inp + 8+63], 20
519	add		$inp, 8, $inp
520	prefetch	[$out + 63], 22
521
522	des_ip		%f0, %f0
523	des_round	%f4, %f6, %f0, %f0
524	des_round	%f8, %f10, %f0, %f0
525	des_round	%f12, %f14, %f0, %f0
526	des_round	%f16, %f18, %f0, %f0
527	ldd		[$key + 0x80+0x00], %f36
528	ldd		[$key + 0x80+0x08], %f38
529	des_round	%f20, %f22, %f0, %f0
530	ldd		[$key + 0x80+0x10], %f40
531	ldd		[$key + 0x80+0x18], %f42
532	des_round	%f24, %f26, %f0, %f0
533	ldd		[$key + 0x80+0x20], %f44
534	ldd		[$key + 0x80+0x28], %f46
535	des_round	%f28, %f30, %f0, %f0
536	ldd		[$key + 0x80+0x30], %f48
537	ldd		[$key + 0x80+0x38], %f50
538	des_round	%f32, %f34, %f0, %f0
539	ldd		[$key + 0x80+0x40], %f52
540	ldd		[$key + 0x80+0x48], %f54
541	des_iip		%f0, %f0
542
543	ldd		[$key + 0x80+0x50], %f56
544	ldd		[$key + 0x80+0x58], %f58
545	des_ip		%f0, %f0
546	ldd		[$key + 0x80+0x60], %f60
547	ldd		[$key + 0x80+0x68], %f62
548	des_round	%f36, %f38, %f0, %f0
549	ldd		[$key + 0x80+0x70], %f36
550	ldd		[$key + 0x80+0x78], %f38
551	des_round	%f40, %f42, %f0, %f0
552	des_round	%f44, %f46, %f0, %f0
553	des_round	%f48, %f50, %f0, %f0
554	ldd		[$key + 0x80-0x08], %f40
555	ldd		[$key + 0x80-0x10], %f42
556	des_round	%f52, %f54, %f0, %f0
557	ldd		[$key + 0x80-0x18], %f44
558	ldd		[$key + 0x80-0x20], %f46
559	des_round	%f56, %f58, %f0, %f0
560	ldd		[$key + 0x80-0x28], %f48
561	ldd		[$key + 0x80-0x30], %f50
562	des_round	%f60, %f62, %f0, %f0
563	ldd		[$key + 0x80-0x38], %f52
564	ldd		[$key + 0x80-0x40], %f54
565	des_round	%f36, %f38, %f0, %f0
566	ldd		[$key + 0x80-0x48], %f56
567	ldd		[$key + 0x80-0x50], %f58
568	des_iip		%f0, %f0
569
570	ldd		[$key + 0x80-0x58], %f60
571	ldd		[$key + 0x80-0x60], %f62
572	des_ip		%f0, %f0
573	ldd		[$key + 0x80-0x68], %f36
574	ldd		[$key + 0x80-0x70], %f38
575	des_round	%f40, %f42, %f0, %f0
576	ldd		[$key + 0x80-0x78], %f40
577	ldd		[$key + 0x80-0x80], %f42
578	des_round	%f44, %f46, %f0, %f0
579	des_round	%f48, %f50, %f0, %f0
580	des_round	%f52, %f54, %f0, %f0
581	des_round	%f56, %f58, %f0, %f0
582	des_round	%f60, %f62, %f0, %f0
583	des_round	%f36, %f38, %f0, %f0
584	des_round	%f40, %f42, %f0, %f0
585	des_iip		%f0, %f0
586
587	fxor		%f2, %f0, %f0		! ^= ivec
588	movxtod		%g4, %f2
589
590	brnz,pn		$omask, 2f
591	sub		$len, 1, $len
592
593	std		%f0, [$out + 0]
594	brnz,pt		$len, .Ldes_ede3_cbc_dec_loop
595	add		$out, 8, $out
596
597	st		%f2, [$ivec + 0]	! write out ivec
598	retl
599	st		%f3, [$ivec + 4]
600
601.align	16
6022:	ldxa		[$inp]0x82, %g4		! avoid read-after-write hazard
603						! and ~3x deterioration
604						! in inp==out case
605	faligndata	%f0, %f0, %f0		! handle unaligned output
606
607	stda		%f0, [$out + $omask]0xc0	! partial store
608	add		$out, 8, $out
609	orn		%g0, $omask, $omask
610	stda		%f0, [$out + $omask]0xc0	! partial store
611
612	brnz,pt		$len, .Ldes_ede3_cbc_dec_loop+4
613	orn		%g0, $omask, $omask
614
615	st		%f2, [$ivec + 0]	! write out ivec
616	retl
617	st		%f3, [$ivec + 4]
618.type	des_t4_ede3_cbc_decrypt,#function
619.size	des_t4_ede3_cbc_decrypt,.-des_t4_ede3_cbc_decrypt
620___
621}
622$code.=<<___;
623.asciz  "DES for SPARC T4, David S. Miller, Andy Polyakov"
624.align  4
625___
626
627&emit_assembler();
628
629close STDOUT or die "error closing STDOUT: $!";
630