xref: /openssl/crypto/des/asm/des_enc.m4 (revision 79c44b4e)
1! Copyright 2000-2018 The OpenSSL Project Authors. All Rights Reserved.
2!
3! Licensed under the Apache License 2.0 (the "License").  You may not use
4! this file except in compliance with the License.  You can obtain a copy
5! in the file LICENSE in the source distribution or at
6! https://www.openssl.org/source/license.html
7!
8!  To expand the m4 macros: m4 -B 8192 des_enc.m4 > des_enc.S
9!
10!  Global registers 1 to 5 are used. This is the same as done by the
11!  cc compiler. The UltraSPARC load/store little endian feature is used.
12!
13!  Instruction grouping often refers to one CPU cycle.
14!
15!  Assemble through gcc: gcc -c -mcpu=ultrasparc -o des_enc.o des_enc.S
16!
17!  Assemble through cc:  cc -c -xarch=v8plusa -o des_enc.o des_enc.S
18!
19!  Performance improvement according to './apps/openssl speed des'
20!
21!	32-bit build:
22!		23%  faster than cc-5.2 -xarch=v8plus -xO5
23!		115% faster than gcc-3.2.1 -m32 -mcpu=ultrasparc -O5
24!	64-bit build:
25!		50%  faster than cc-5.2 -xarch=v9 -xO5
26!		100% faster than gcc-3.2.1 -m64 -mcpu=ultrasparc -O5
27!
28
29.ident "des_enc.m4 2.1"
30.file  "des_enc-sparc.S"
31
32#if defined(__SUNPRO_C) && defined(__sparcv9)
33# define ABI64  /* They've said -xarch=v9 at command line */
34#elif defined(__GNUC__) && defined(__arch64__)
35# define ABI64  /* They've said -m64 at command line */
36#endif
37
38#ifdef ABI64
39  .register	%g2,#scratch
40  .register	%g3,#scratch
41# define	FRAME	-192
42# define	BIAS	2047
43# define	LDPTR	ldx
44# define	STPTR	stx
45# define	ARG0	128
46# define	ARGSZ	8
47#else
48# define	FRAME	-96
49# define	BIAS	0
50# define	LDPTR	ld
51# define	STPTR	st
52# define	ARG0	68
53# define	ARGSZ	4
54#endif
55
56#define LOOPS 7
57
58#define global0 %g0
59#define global1 %g1
60#define global2 %g2
61#define global3 %g3
62#define global4 %g4
63#define global5 %g5
64
65#define local0 %l0
66#define local1 %l1
67#define local2 %l2
68#define local3 %l3
69#define local4 %l4
70#define local5 %l5
71#define local7 %l6
72#define local6 %l7
73
74#define in0 %i0
75#define in1 %i1
76#define in2 %i2
77#define in3 %i3
78#define in4 %i4
79#define in5 %i5
80#define in6 %i6
81#define in7 %i7
82
83#define out0 %o0
84#define out1 %o1
85#define out2 %o2
86#define out3 %o3
87#define out4 %o4
88#define out5 %o5
89#define out6 %o6
90#define out7 %o7
91
92#define stub stb
93
94changequote({,})
95
96
97! Macro definitions:
98
99
100! {ip_macro}
101!
102! The logic used in initial and final permutations is the same as in
103! the C code. The permutations are done with a clever shift, xor, and
104! technique.
105!
106! The macro also loads address sbox 1 to 5 to global 1 to 5, address
107! sbox 6 to local6, and address sbox 8 to out3.
108!
109! Rotates the halves 3 left to bring the sbox bits in convenient positions.
110!
111! Loads key first round from address in parameter 5 to out0, out1.
112!
113! After the original LibDES initial permutation, the resulting left
114! is in the variable initially used for right and vice versa. The macro
115! implements the possibility to keep the halves in the original registers.
116!
117! parameter 1  left
118! parameter 2  right
119! parameter 3  result left (modify in first round)
120! parameter 4  result right (use in first round)
121! parameter 5  key address
122! parameter 6  1/2 for include encryption/decryption
123! parameter 7  1 for move in1 to in3
124! parameter 8  1 for move in3 to in4, 2 for move in4 to in3
125! parameter 9  1 for load ks3 and ks2 to in4 and in3
126
127define(ip_macro, {
128
129! {ip_macro}
130! $1 $2 $4 $3 $5 $6 $7 $8 $9
131
132	ld	[out2+256], local1
133	srl	$2, 4, local4
134
135	xor	local4, $1, local4
136	ifelse($7,1,{mov in1, in3},{nop})
137
138	ld	[out2+260], local2
139	and	local4, local1, local4
140	ifelse($8,1,{mov in3, in4},{})
141	ifelse($8,2,{mov in4, in3},{})
142
143	ld	[out2+280], out4          ! loop counter
144	sll	local4, 4, local1
145	xor	$1, local4, $1
146
147	ld	[out2+264], local3
148	srl	$1, 16, local4
149	xor	$2, local1, $2
150
151	ifelse($9,1,{LDPTR	KS3, in4},{})
152	xor	local4, $2, local4
153	nop	!sethi	%hi(DES_SPtrans), global1 ! sbox addr
154
155	ifelse($9,1,{LDPTR	KS2, in3},{})
156	and	local4, local2, local4
157	nop	!or	global1, %lo(DES_SPtrans), global1   ! sbox addr
158
159	sll	local4, 16, local1
160	xor	$2, local4, $2
161
162	srl	$2, 2, local4
163	xor	$1, local1, $1
164
165	sethi	%hi(16711680), local5
166	xor	local4, $1, local4
167
168	and	local4, local3, local4
169	or	local5, 255, local5
170
171	sll	local4, 2, local2
172	xor	$1, local4, $1
173
174	srl	$1, 8, local4
175	xor	$2, local2, $2
176
177	xor	local4, $2, local4
178	add	global1, 768, global4
179
180	and	local4, local5, local4
181	add	global1, 1024, global5
182
183	ld	[out2+272], local7
184	sll	local4, 8, local1
185	xor	$2, local4, $2
186
187	srl	$2, 1, local4
188	xor	$1, local1, $1
189
190	ld	[$5], out0                ! key 7531
191	xor	local4, $1, local4
192	add	global1, 256, global2
193
194	ld	[$5+4], out1              ! key 8642
195	and	local4, local7, local4
196	add	global1, 512, global3
197
198	sll	local4, 1, local1
199	xor	$1, local4, $1
200
201	sll	$1, 3, local3
202	xor	$2, local1, $2
203
204	sll	$2, 3, local2
205	add	global1, 1280, local6     ! address sbox 8
206
207	srl	$1, 29, local4
208	add	global1, 1792, out3       ! address sbox 8
209
210	srl	$2, 29, local1
211	or	local4, local3, $4
212
213	or	local2, local1, $3
214
215	ifelse($6, 1, {
216
217		ld	[out2+284], local5     ! 0x0000FC00 used in the rounds
218		or	local2, local1, $3
219		xor	$4, out0, local1
220
221		call .des_enc.1
222		and	local1, 252, local1
223
224	},{})
225
226	ifelse($6, 2, {
227
228		ld	[out2+284], local5     ! 0x0000FC00 used in the rounds
229		or	local2, local1, $3
230		xor	$4, out0, local1
231
232		call .des_dec.1
233		and	local1, 252, local1
234
235	},{})
236})
237
238
239! {rounds_macro}
240!
241! The logic used in the DES rounds is the same as in the C code,
242! except that calculations for sbox 1 and sbox 5 begin before
243! the previous round is finished.
244!
245! In each round one half (work) is modified based on key and the
246! other half (use).
247!
248! In this version we do two rounds in a loop repeated 7 times
249! and two rounds separately.
250!
251! One half has the bits for the sboxes in the following positions:
252!
253!	777777xx555555xx333333xx111111xx
254!
255!	88xx666666xx444444xx222222xx8888
256!
257! The bits for each sbox are xor-ed with the key bits for that box.
258! The above xx bits are cleared, and the result used for lookup in
259! the sbox table. Each sbox entry contains the 4 output bits permuted
260! into 32 bits according to the P permutation.
261!
262! In the description of DES, left and right are switched after
263! each round, except after last round. In this code the original
264! left and right are kept in the same register in all rounds, meaning
265! that after the 16 rounds the result for right is in the register
266! originally used for left.
267!
268! parameter 1  first work (left in first round)
269! parameter 2  first use (right in first round)
270! parameter 3  enc/dec  1/-1
271! parameter 4  loop label
272! parameter 5  key address register
273! parameter 6  optional address for key next encryption/decryption
274! parameter 7  not empty for include retl
275!
276! also compares in2 to 8
277
278define(rounds_macro, {
279
280! {rounds_macro}
281! $1 $2 $3 $4 $5 $6 $7 $8 $9
282
283	xor	$2, out0, local1
284
285	ld	[out2+284], local5        ! 0x0000FC00
286	ba	$4
287	and	local1, 252, local1
288
289	.align 32
290
291$4:
292	! local6 is address sbox 6
293	! out3   is address sbox 8
294	! out4   is loop counter
295
296	ld	[global1+local1], local1
297	xor	$2, out1, out1            ! 8642
298	xor	$2, out0, out0            ! 7531
299	! fmovs	%f0, %f0                  ! fxor used for alignment
300
301	srl	out1, 4, local0           ! rotate 4 right
302	and	out0, local5, local3      ! 3
303	! fmovs	%f0, %f0
304
305	ld	[$5+$3*8], local7         ! key 7531 next round
306	srl	local3, 8, local3         ! 3
307	and	local0, 252, local2       ! 2
308	! fmovs	%f0, %f0
309
310	ld	[global3+local3],local3   ! 3
311	sll	out1, 28, out1            ! rotate
312	xor	$1, local1, $1            ! 1 finished, local1 now sbox 7
313
314	ld	[global2+local2], local2  ! 2
315	srl	out0, 24, local1          ! 7
316	or	out1, local0, out1        ! rotate
317
318	ldub	[out2+local1], local1     ! 7 (and 0xFC)
319	srl	out1, 24, local0          ! 8
320	and	out1, local5, local4      ! 4
321
322	ldub	[out2+local0], local0     ! 8 (and 0xFC)
323	srl	local4, 8, local4         ! 4
324	xor	$1, local2, $1            ! 2 finished local2 now sbox 6
325
326	ld	[global4+local4],local4   ! 4
327	srl	out1, 16, local2          ! 6
328	xor	$1, local3, $1            ! 3 finished local3 now sbox 5
329
330	ld	[out3+local0],local0      ! 8
331	and	local2, 252, local2       ! 6
332	add	global1, 1536, local5     ! address sbox 7
333
334	ld	[local6+local2], local2   ! 6
335	srl	out0, 16, local3          ! 5
336	xor	$1, local4, $1            ! 4 finished
337
338	ld	[local5+local1],local1    ! 7
339	and	local3, 252, local3       ! 5
340	xor	$1, local0, $1            ! 8 finished
341
342	ld	[global5+local3],local3   ! 5
343	xor	$1, local2, $1            ! 6 finished
344	subcc	out4, 1, out4
345
346	ld	[$5+$3*8+4], out0         ! key 8642 next round
347	xor	$1, local7, local2        ! sbox 5 next round
348	xor	$1, local1, $1            ! 7 finished
349
350	srl	local2, 16, local2        ! sbox 5 next round
351	xor	$1, local3, $1            ! 5 finished
352
353	ld	[$5+$3*16+4], out1        ! key 8642 next round again
354	and	local2, 252, local2       ! sbox5 next round
355! next round
356	xor	$1, local7, local7        ! 7531
357
358	ld	[global5+local2], local2  ! 5
359	srl	local7, 24, local3        ! 7
360	xor	$1, out0, out0            ! 8642
361
362	ldub	[out2+local3], local3     ! 7 (and 0xFC)
363	srl	out0, 4, local0           ! rotate 4 right
364	and	local7, 252, local1       ! 1
365
366	sll	out0, 28, out0            ! rotate
367	xor	$2, local2, $2            ! 5 finished local2 used
368
369	srl	local0, 8, local4         ! 4
370	and	local0, 252, local2       ! 2
371	ld	[local5+local3], local3   ! 7
372
373	srl	local0, 16, local5        ! 6
374	or	out0, local0, out0        ! rotate
375	ld	[global2+local2], local2  ! 2
376
377	srl	out0, 24, local0
378	ld	[$5+$3*16], out0          ! key 7531 next round
379	and	local4, 252, local4	  ! 4
380
381	and	local5, 252, local5       ! 6
382	ld	[global4+local4], local4  ! 4
383	xor	$2, local3, $2            ! 7 finished local3 used
384
385	and	local0, 252, local0       ! 8
386	ld	[local6+local5], local5   ! 6
387	xor	$2, local2, $2            ! 2 finished local2 now sbox 3
388
389	srl	local7, 8, local2         ! 3 start
390	ld	[out3+local0], local0     ! 8
391	xor	$2, local4, $2            ! 4 finished
392
393	and	local2, 252, local2       ! 3
394	ld	[global1+local1], local1  ! 1
395	xor	$2, local5, $2            ! 6 finished local5 used
396
397	ld	[global3+local2], local2  ! 3
398	xor	$2, local0, $2            ! 8 finished
399	add	$5, $3*16, $5             ! enc add 8, dec add -8 to key pointer
400
401	ld	[out2+284], local5        ! 0x0000FC00
402	xor	$2, out0, local4          ! sbox 1 next round
403	xor	$2, local1, $2            ! 1 finished
404
405	xor	$2, local2, $2            ! 3 finished
406	bne	$4
407	and	local4, 252, local1       ! sbox 1 next round
408
409! two rounds more:
410
411	ld	[global1+local1], local1
412	xor	$2, out1, out1
413	xor	$2, out0, out0
414
415	srl	out1, 4, local0           ! rotate
416	and	out0, local5, local3
417
418	ld	[$5+$3*8], local7         ! key 7531
419	srl	local3, 8, local3
420	and	local0, 252, local2
421
422	ld	[global3+local3],local3
423	sll	out1, 28, out1            ! rotate
424	xor	$1, local1, $1            ! 1 finished, local1 now sbox 7
425
426	ld	[global2+local2], local2
427	srl	out0, 24, local1
428	or	out1, local0, out1        ! rotate
429
430	ldub	[out2+local1], local1
431	srl	out1, 24, local0
432	and	out1, local5, local4
433
434	ldub	[out2+local0], local0
435	srl	local4, 8, local4
436	xor	$1, local2, $1            ! 2 finished local2 now sbox 6
437
438	ld	[global4+local4],local4
439	srl	out1, 16, local2
440	xor	$1, local3, $1            ! 3 finished local3 now sbox 5
441
442	ld	[out3+local0],local0
443	and	local2, 252, local2
444	add	global1, 1536, local5     ! address sbox 7
445
446	ld	[local6+local2], local2
447	srl	out0, 16, local3
448	xor	$1, local4, $1            ! 4 finished
449
450	ld	[local5+local1],local1
451	and	local3, 252, local3
452	xor	$1, local0, $1
453
454	ld	[global5+local3],local3
455	xor	$1, local2, $1            ! 6 finished
456	cmp	in2, 8
457
458	ifelse($6,{}, {}, {ld	[out2+280], out4})  ! loop counter
459	xor	$1, local7, local2        ! sbox 5 next round
460	xor	$1, local1, $1            ! 7 finished
461
462	ld	[$5+$3*8+4], out0
463	srl	local2, 16, local2        ! sbox 5 next round
464	xor	$1, local3, $1            ! 5 finished
465
466	and	local2, 252, local2
467! next round (two rounds more)
468	xor	$1, local7, local7        ! 7531
469
470	ld	[global5+local2], local2
471	srl	local7, 24, local3
472	xor	$1, out0, out0            ! 8642
473
474	ldub	[out2+local3], local3
475	srl	out0, 4, local0           ! rotate
476	and	local7, 252, local1
477
478	sll	out0, 28, out0            ! rotate
479	xor	$2, local2, $2            ! 5 finished local2 used
480
481	srl	local0, 8, local4
482	and	local0, 252, local2
483	ld	[local5+local3], local3
484
485	srl	local0, 16, local5
486	or	out0, local0, out0        ! rotate
487	ld	[global2+local2], local2
488
489	srl	out0, 24, local0
490	ifelse($6,{}, {}, {ld	[$6], out0})   ! key next encryption/decryption
491	and	local4, 252, local4
492
493	and	local5, 252, local5
494	ld	[global4+local4], local4
495	xor	$2, local3, $2            ! 7 finished local3 used
496
497	and	local0, 252, local0
498	ld	[local6+local5], local5
499	xor	$2, local2, $2            ! 2 finished local2 now sbox 3
500
501	srl	local7, 8, local2         ! 3 start
502	ld	[out3+local0], local0
503	xor	$2, local4, $2
504
505	and	local2, 252, local2
506	ld	[global1+local1], local1
507	xor	$2, local5, $2            ! 6 finished local5 used
508
509	ld	[global3+local2], local2
510	srl	$1, 3, local3
511	xor	$2, local0, $2
512
513	ifelse($6,{}, {}, {ld	[$6+4], out1}) ! key next encryption/decryption
514	sll	$1, 29, local4
515	xor	$2, local1, $2
516
517	ifelse($7,{}, {}, {retl})
518	xor	$2, local2, $2
519})
520
521
522! {fp_macro}
523!
524!  parameter 1   right (original left)
525!  parameter 2   left (original right)
526!  parameter 3   1 for optional store to [in0]
527!  parameter 4   1 for load input/output address to local5/7
528!
529!  The final permutation logic switches the halves, meaning that
530!  left and right ends up the registers originally used.
531
532define(fp_macro, {
533
534! {fp_macro}
535! $1 $2 $3 $4 $5 $6 $7 $8 $9
536
537	! initially undo the rotate 3 left done after initial permutation
538	! original left is received shifted 3 right and 29 left in local3/4
539
540	sll	$2, 29, local1
541	or	local3, local4, $1
542
543	srl	$2, 3, $2
544	sethi	%hi(0x55555555), local2
545
546	or	$2, local1, $2
547	or	local2, %lo(0x55555555), local2
548
549	srl	$2, 1, local3
550	sethi	%hi(0x00ff00ff), local1
551	xor	local3, $1, local3
552	or	local1, %lo(0x00ff00ff), local1
553	and	local3, local2, local3
554	sethi	%hi(0x33333333), local4
555	sll	local3, 1, local2
556
557	xor	$1, local3, $1
558
559	srl	$1, 8, local3
560	xor	$2, local2, $2
561	xor	local3, $2, local3
562	or	local4, %lo(0x33333333), local4
563	and	local3, local1, local3
564	sethi	%hi(0x0000ffff), local1
565	sll	local3, 8, local2
566
567	xor	$2, local3, $2
568
569	srl	$2, 2, local3
570	xor	$1, local2, $1
571	xor	local3, $1, local3
572	or	local1, %lo(0x0000ffff), local1
573	and	local3, local4, local3
574	sethi	%hi(0x0f0f0f0f), local4
575	sll	local3, 2, local2
576
577	ifelse($4,1, {LDPTR INPUT, local5})
578	xor	$1, local3, $1
579
580	ifelse($4,1, {LDPTR OUTPUT, local7})
581	srl	$1, 16, local3
582	xor	$2, local2, $2
583	xor	local3, $2, local3
584	or	local4, %lo(0x0f0f0f0f), local4
585	and	local3, local1, local3
586	sll	local3, 16, local2
587
588	xor	$2, local3, local1
589
590	srl	local1, 4, local3
591	xor	$1, local2, $1
592	xor	local3, $1, local3
593	and	local3, local4, local3
594	sll	local3, 4, local2
595
596	xor	$1, local3, $1
597
598	! optional store:
599
600	ifelse($3,1, {st $1, [in0]})
601
602	xor	local1, local2, $2
603
604	ifelse($3,1, {st $2, [in0+4]})
605
606})
607
608
609! {fp_ip_macro}
610!
611! Does initial permutation for next block mixed with
612! final permutation for current block.
613!
614! parameter 1   original left
615! parameter 2   original right
616! parameter 3   left ip
617! parameter 4   right ip
618! parameter 5   1: load ks1/ks2 to in3/in4, add 120 to in4
619!                2: mov in4 to in3
620!
621! also adds -8 to length in2 and loads loop counter to out4
622
623define(fp_ip_macro, {
624
625! {fp_ip_macro}
626! $1 $2 $3 $4 $5 $6 $7 $8 $9
627
628	define({temp1},{out4})
629	define({temp2},{local3})
630
631	define({ip1},{local1})
632	define({ip2},{local2})
633	define({ip4},{local4})
634	define({ip5},{local5})
635
636	! $1 in local3, local4
637
638	ld	[out2+256], ip1
639	sll	out5, 29, temp1
640	or	local3, local4, $1
641
642	srl	out5, 3, $2
643	ifelse($5,2,{mov in4, in3})
644
645	ld	[out2+272], ip5
646	srl	$4, 4, local0
647	or	$2, temp1, $2
648
649	srl	$2, 1, temp1
650	xor	temp1, $1, temp1
651
652	and	temp1, ip5, temp1
653	xor	local0, $3, local0
654
655	sll	temp1, 1, temp2
656	xor	$1, temp1, $1
657
658	and	local0, ip1, local0
659	add	in2, -8, in2
660
661	sll	local0, 4, local7
662	xor	$3, local0, $3
663
664	ld	[out2+268], ip4
665	srl	$1, 8, temp1
666	xor	$2, temp2, $2
667	ld	[out2+260], ip2
668	srl	$3, 16, local0
669	xor	$4, local7, $4
670	xor	temp1, $2, temp1
671	xor	local0, $4, local0
672	and	temp1, ip4, temp1
673	and	local0, ip2, local0
674	sll	temp1, 8, temp2
675	xor	$2, temp1, $2
676	sll	local0, 16, local7
677	xor	$4, local0, $4
678
679	srl	$2, 2, temp1
680	xor	$1, temp2, $1
681
682	ld	[out2+264], temp2         ! ip3
683	srl	$4, 2, local0
684	xor	$3, local7, $3
685	xor	temp1, $1, temp1
686	xor	local0, $3, local0
687	and	temp1, temp2, temp1
688	and	local0, temp2, local0
689	sll	temp1, 2, temp2
690	xor	$1, temp1, $1
691	sll	local0, 2, local7
692	xor	$3, local0, $3
693
694	srl	$1, 16, temp1
695	xor	$2, temp2, $2
696	srl	$3, 8, local0
697	xor	$4, local7, $4
698	xor	temp1, $2, temp1
699	xor	local0, $4, local0
700	and	temp1, ip2, temp1
701	and	local0, ip4, local0
702	sll	temp1, 16, temp2
703	xor	$2, temp1, local4
704	sll	local0, 8, local7
705	xor	$4, local0, $4
706
707	srl	$4, 1, local0
708	xor	$3, local7, $3
709
710	srl	local4, 4, temp1
711	xor	local0, $3, local0
712
713	xor	$1, temp2, $1
714	and	local0, ip5, local0
715
716	sll	local0, 1, local7
717	xor	temp1, $1, temp1
718
719	xor	$3, local0, $3
720	xor	$4, local7, $4
721
722	sll	$3, 3, local5
723	and	temp1, ip1, temp1
724
725	sll	temp1, 4, temp2
726	xor	$1, temp1, $1
727
728	ifelse($5,1,{LDPTR	KS2, in4})
729	sll	$4, 3, local2
730	xor	local4, temp2, $2
731
732	! reload since used as temporary:
733
734	ld	[out2+280], out4          ! loop counter
735
736	srl	$3, 29, local0
737	ifelse($5,1,{add in4, 120, in4})
738
739	ifelse($5,1,{LDPTR	KS1, in3})
740	srl	$4, 29, local7
741
742	or	local0, local5, $4
743	or	local2, local7, $3
744
745})
746
747
748
749! {load_little_endian}
750!
751! parameter 1  address
752! parameter 2  destination left
753! parameter 3  destination right
754! parameter 4  temporary
755! parameter 5  label
756
757define(load_little_endian, {
758
759! {load_little_endian}
760! $1 $2 $3 $4 $5 $6 $7 $8 $9
761
762	! first in memory to rightmost in register
763
764$5:
765	ldub	[$1+3], $2
766
767	ldub	[$1+2], $4
768	sll	$2, 8, $2
769	or	$2, $4, $2
770
771	ldub	[$1+1], $4
772	sll	$2, 8, $2
773	or	$2, $4, $2
774
775	ldub	[$1+0], $4
776	sll	$2, 8, $2
777	or	$2, $4, $2
778
779
780	ldub	[$1+3+4], $3
781
782	ldub	[$1+2+4], $4
783	sll	$3, 8, $3
784	or	$3, $4, $3
785
786	ldub	[$1+1+4], $4
787	sll	$3, 8, $3
788	or	$3, $4, $3
789
790	ldub	[$1+0+4], $4
791	sll	$3, 8, $3
792	or	$3, $4, $3
793$5a:
794
795})
796
797
798! {load_little_endian_inc}
799!
800! parameter 1  address
801! parameter 2  destination left
802! parameter 3  destination right
803! parameter 4  temporary
804! parameter 4  label
805!
806! adds 8 to address
807
808define(load_little_endian_inc, {
809
810! {load_little_endian_inc}
811! $1 $2 $3 $4 $5 $6 $7 $8 $9
812
813	! first in memory to rightmost in register
814
815$5:
816	ldub	[$1+3], $2
817
818	ldub	[$1+2], $4
819	sll	$2, 8, $2
820	or	$2, $4, $2
821
822	ldub	[$1+1], $4
823	sll	$2, 8, $2
824	or	$2, $4, $2
825
826	ldub	[$1+0], $4
827	sll	$2, 8, $2
828	or	$2, $4, $2
829
830	ldub	[$1+3+4], $3
831	add	$1, 8, $1
832
833	ldub	[$1+2+4-8], $4
834	sll	$3, 8, $3
835	or	$3, $4, $3
836
837	ldub	[$1+1+4-8], $4
838	sll	$3, 8, $3
839	or	$3, $4, $3
840
841	ldub	[$1+0+4-8], $4
842	sll	$3, 8, $3
843	or	$3, $4, $3
844$5a:
845
846})
847
848
849! {load_n_bytes}
850!
851! Loads 1 to 7 bytes little endian
852! Remaining bytes are zeroed.
853!
854! parameter 1  address
855! parameter 2  length
856! parameter 3  destination register left
857! parameter 4  destination register right
858! parameter 5  temp
859! parameter 6  temp2
860! parameter 7  label
861! parameter 8  return label
862
863define(load_n_bytes, {
864
865! {load_n_bytes}
866! $1 $2 $5 $6 $7 $8 $7 $8 $9
867
868$7.0:	call	.+8
869	sll	$2, 2, $6
870
871	add	%o7,$7.jmp.table-$7.0,$5
872
873	add	$5, $6, $5
874	mov	0, $4
875
876	ld	[$5], $5
877
878	jmp	%o7+$5
879	mov	0, $3
880
881$7.7:
882	ldub	[$1+6], $5
883	sll	$5, 16, $5
884	or	$3, $5, $3
885$7.6:
886	ldub	[$1+5], $5
887	sll	$5, 8, $5
888	or	$3, $5, $3
889$7.5:
890	ldub	[$1+4], $5
891	or	$3, $5, $3
892$7.4:
893	ldub	[$1+3], $5
894	sll	$5, 24, $5
895	or	$4, $5, $4
896$7.3:
897	ldub	[$1+2], $5
898	sll	$5, 16, $5
899	or	$4, $5, $4
900$7.2:
901	ldub	[$1+1], $5
902	sll	$5, 8, $5
903	or	$4, $5, $4
904$7.1:
905	ldub	[$1+0], $5
906	ba	$8
907	or	$4, $5, $4
908
909	.align 4
910
911$7.jmp.table:
912	.word	0
913	.word	$7.1-$7.0
914	.word	$7.2-$7.0
915	.word	$7.3-$7.0
916	.word	$7.4-$7.0
917	.word	$7.5-$7.0
918	.word	$7.6-$7.0
919	.word	$7.7-$7.0
920})
921
922
923! {store_little_endian}
924!
925! parameter 1  address
926! parameter 2  source left
927! parameter 3  source right
928! parameter 4  temporary
929
930define(store_little_endian, {
931
932! {store_little_endian}
933! $1 $2 $3 $4 $5 $6 $7 $8 $9
934
935	! rightmost in register to first in memory
936
937$5:
938	and	$2, 255, $4
939	stub	$4, [$1+0]
940
941	srl	$2, 8, $4
942	and	$4, 255, $4
943	stub	$4, [$1+1]
944
945	srl	$2, 16, $4
946	and	$4, 255, $4
947	stub	$4, [$1+2]
948
949	srl	$2, 24, $4
950	stub	$4, [$1+3]
951
952
953	and	$3, 255, $4
954	stub	$4, [$1+0+4]
955
956	srl	$3, 8, $4
957	and	$4, 255, $4
958	stub	$4, [$1+1+4]
959
960	srl	$3, 16, $4
961	and	$4, 255, $4
962	stub	$4, [$1+2+4]
963
964	srl	$3, 24, $4
965	stub	$4, [$1+3+4]
966
967$5a:
968
969})
970
971
972! {store_n_bytes}
973!
974! Stores 1 to 7 bytes little endian
975!
976! parameter 1  address
977! parameter 2  length
978! parameter 3  source register left
979! parameter 4  source register right
980! parameter 5  temp
981! parameter 6  temp2
982! parameter 7  label
983! parameter 8  return label
984
985define(store_n_bytes, {
986
987! {store_n_bytes}
988! $1 $2 $5 $6 $7 $8 $7 $8 $9
989
990$7.0:	call	.+8
991	sll	$2, 2, $6
992
993	add	%o7,$7.jmp.table-$7.0,$5
994
995	add	$5, $6, $5
996
997	ld	[$5], $5
998
999	jmp	%o7+$5
1000	nop
1001
1002$7.7:
1003	srl	$3, 16, $5
1004	and	$5, 0xff, $5
1005	stub	$5, [$1+6]
1006$7.6:
1007	srl	$3, 8, $5
1008	and	$5, 0xff, $5
1009	stub	$5, [$1+5]
1010$7.5:
1011	and	$3, 0xff, $5
1012	stub	$5, [$1+4]
1013$7.4:
1014	srl	$4, 24, $5
1015	stub	$5, [$1+3]
1016$7.3:
1017	srl	$4, 16, $5
1018	and	$5, 0xff, $5
1019	stub	$5, [$1+2]
1020$7.2:
1021	srl	$4, 8, $5
1022	and	$5, 0xff, $5
1023	stub	$5, [$1+1]
1024$7.1:
1025	and	$4, 0xff, $5
1026
1027
1028	ba	$8
1029	stub	$5, [$1]
1030
1031	.align 4
1032
1033$7.jmp.table:
1034
1035	.word	0
1036	.word	$7.1-$7.0
1037	.word	$7.2-$7.0
1038	.word	$7.3-$7.0
1039	.word	$7.4-$7.0
1040	.word	$7.5-$7.0
1041	.word	$7.6-$7.0
1042	.word	$7.7-$7.0
1043})
1044
1045
1046define(testvalue,{1})
1047
1048define(register_init, {
1049
1050! For test purposes:
1051
1052	sethi	%hi(testvalue), local0
1053	or	local0, %lo(testvalue), local0
1054
1055	ifelse($1,{},{}, {mov	local0, $1})
1056	ifelse($2,{},{}, {mov	local0, $2})
1057	ifelse($3,{},{}, {mov	local0, $3})
1058	ifelse($4,{},{}, {mov	local0, $4})
1059	ifelse($5,{},{}, {mov	local0, $5})
1060	ifelse($6,{},{}, {mov	local0, $6})
1061	ifelse($7,{},{}, {mov	local0, $7})
1062	ifelse($8,{},{}, {mov	local0, $8})
1063
1064	mov	local0, local1
1065	mov	local0, local2
1066	mov	local0, local3
1067	mov	local0, local4
1068	mov	local0, local5
1069	mov	local0, local7
1070	mov	local0, local6
1071	mov	local0, out0
1072	mov	local0, out1
1073	mov	local0, out2
1074	mov	local0, out3
1075	mov	local0, out4
1076	mov	local0, out5
1077	mov	local0, global1
1078	mov	local0, global2
1079	mov	local0, global3
1080	mov	local0, global4
1081	mov	local0, global5
1082
1083})
1084
1085.section	".text"
1086
1087	.align 32
1088
1089.des_enc:
1090
1091	! key address in3
1092	! loads key next encryption/decryption first round from [in4]
1093
1094	rounds_macro(in5, out5, 1, .des_enc.1, in3, in4, retl)
1095
1096
1097	.align 32
1098
1099.des_dec:
1100
1101	! implemented with out5 as first parameter to avoid
1102	! register exchange in ede modes
1103
1104	! key address in4
1105	! loads key next encryption/decryption first round from [in3]
1106
1107	rounds_macro(out5, in5, -1, .des_dec.1, in4, in3, retl)
1108
1109
1110
1111! void DES_encrypt1(data, ks, enc)
1112! *******************************
1113
1114	.align 32
1115	.global DES_encrypt1
1116	.type	 DES_encrypt1,#function
1117
1118DES_encrypt1:
1119
1120	save	%sp, FRAME, %sp
1121
1122	sethi	%hi(.PIC.DES_SPtrans-1f),global1
1123	or	global1,%lo(.PIC.DES_SPtrans-1f),global1
11241:	call	.+8
1125	add	%o7,global1,global1
1126	sub	global1,.PIC.DES_SPtrans-.des_and,out2
1127
1128	ld	[in0], in5                ! left
1129	cmp	in2, 0                    ! enc
1130
1131	be	.encrypt.dec
1132	ld	[in0+4], out5             ! right
1133
1134	! parameter 6  1/2 for include encryption/decryption
1135	! parameter 7  1 for move in1 to in3
1136	! parameter 8  1 for move in3 to in4, 2 for move in4 to in3
1137
1138	ip_macro(in5, out5, in5, out5, in3, 0, 1, 1)
1139
1140	rounds_macro(in5, out5, 1, .des_encrypt1.1, in3, in4) ! in4 not used
1141
1142	fp_macro(in5, out5, 1)            ! 1 for store to [in0]
1143
1144	ret
1145	restore
1146
1147.encrypt.dec:
1148
1149	add	in1, 120, in3             ! use last subkey for first round
1150
1151	! parameter 6  1/2 for include encryption/decryption
1152	! parameter 7  1 for move in1 to in3
1153	! parameter 8  1 for move in3 to in4, 2 for move in4 to in3
1154
1155	ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include dec,  ks in4
1156
1157	fp_macro(out5, in5, 1)            ! 1 for store to [in0]
1158
1159	ret
1160	restore
1161
1162.DES_encrypt1.end:
1163	.size	 DES_encrypt1,.DES_encrypt1.end-DES_encrypt1
1164
1165
1166! void DES_encrypt2(data, ks, enc)
1167!*********************************
1168
1169	! encrypts/decrypts without initial/final permutation
1170
1171	.align 32
1172	.global DES_encrypt2
1173	.type	 DES_encrypt2,#function
1174
1175DES_encrypt2:
1176
1177	save	%sp, FRAME, %sp
1178
1179	sethi	%hi(.PIC.DES_SPtrans-1f),global1
1180	or	global1,%lo(.PIC.DES_SPtrans-1f),global1
11811:	call	.+8
1182	add	%o7,global1,global1
1183	sub	global1,.PIC.DES_SPtrans-.des_and,out2
1184
1185	! Set sbox address 1 to 6 and rotate halves 3 left
1186	! Errors caught by destest? Yes. Still? *NO*
1187
1188	!sethi	%hi(DES_SPtrans), global1 ! address sbox 1
1189
1190	!or	global1, %lo(DES_SPtrans), global1  ! sbox 1
1191
1192	add	global1, 256, global2     ! sbox 2
1193	add	global1, 512, global3     ! sbox 3
1194
1195	ld	[in0], out5               ! right
1196	add	global1, 768, global4     ! sbox 4
1197	add	global1, 1024, global5    ! sbox 5
1198
1199	ld	[in0+4], in5              ! left
1200	add	global1, 1280, local6     ! sbox 6
1201	add	global1, 1792, out3       ! sbox 8
1202
1203	! rotate
1204
1205	sll	in5, 3, local5
1206	mov	in1, in3                  ! key address to in3
1207
1208	sll	out5, 3, local7
1209	srl	in5, 29, in5
1210
1211	srl	out5, 29, out5
1212	add	in5, local5, in5
1213
1214	add	out5, local7, out5
1215	cmp	in2, 0
1216
1217	! we use our own stackframe
1218
1219	be	.encrypt2.dec
1220	STPTR	in0, [%sp+BIAS+ARG0+0*ARGSZ]
1221
1222	ld	[in3], out0               ! key 7531 first round
1223	mov	LOOPS, out4               ! loop counter
1224
1225	ld	[in3+4], out1             ! key 8642 first round
1226	sethi	%hi(0x0000FC00), local5
1227
1228	call .des_enc
1229	mov	in3, in4
1230
1231	! rotate
1232	sll	in5, 29, in0
1233	srl	in5, 3, in5
1234	sll	out5, 29, in1
1235	add	in5, in0, in5
1236	srl	out5, 3, out5
1237	LDPTR	[%sp+BIAS+ARG0+0*ARGSZ], in0
1238	add	out5, in1, out5
1239	st	in5, [in0]
1240	st	out5, [in0+4]
1241
1242	ret
1243	restore
1244
1245
1246.encrypt2.dec:
1247
1248	add in3, 120, in4
1249
1250	ld	[in4], out0               ! key 7531 first round
1251	mov	LOOPS, out4               ! loop counter
1252
1253	ld	[in4+4], out1             ! key 8642 first round
1254	sethi	%hi(0x0000FC00), local5
1255
1256	mov	in5, local1               ! left expected in out5
1257	mov	out5, in5
1258
1259	call .des_dec
1260	mov	local1, out5
1261
1262.encrypt2.finish:
1263
1264	! rotate
1265	sll	in5, 29, in0
1266	srl	in5, 3, in5
1267	sll	out5, 29, in1
1268	add	in5, in0, in5
1269	srl	out5, 3, out5
1270	LDPTR	[%sp+BIAS+ARG0+0*ARGSZ], in0
1271	add	out5, in1, out5
1272	st	out5, [in0]
1273	st	in5, [in0+4]
1274
1275	ret
1276	restore
1277
1278.DES_encrypt2.end:
1279	.size	 DES_encrypt2, .DES_encrypt2.end-DES_encrypt2
1280
1281
1282! void DES_encrypt3(data, ks1, ks2, ks3)
1283! **************************************
1284
1285	.align 32
1286	.global DES_encrypt3
1287	.type	 DES_encrypt3,#function
1288
1289DES_encrypt3:
1290
1291	save	%sp, FRAME, %sp
1292
1293	sethi	%hi(.PIC.DES_SPtrans-1f),global1
1294	or	global1,%lo(.PIC.DES_SPtrans-1f),global1
12951:	call	.+8
1296	add	%o7,global1,global1
1297	sub	global1,.PIC.DES_SPtrans-.des_and,out2
1298
1299	ld	[in0], in5                ! left
1300	add	in2, 120, in4             ! ks2
1301
1302	ld	[in0+4], out5             ! right
1303	mov	in3, in2                  ! save ks3
1304
1305	! parameter 6  1/2 for include encryption/decryption
1306	! parameter 7  1 for mov in1 to in3
1307	! parameter 8  1 for mov in3 to in4
1308	! parameter 9  1 for load ks3 and ks2 to in4 and in3
1309
1310	ip_macro(in5, out5, in5, out5, in3, 1, 1, 0, 0)
1311
1312	call	.des_dec
1313	mov	in2, in3                  ! preload ks3
1314
1315	call	.des_enc
1316	nop
1317
1318	fp_macro(in5, out5, 1)
1319
1320	ret
1321	restore
1322
1323.DES_encrypt3.end:
1324	.size	 DES_encrypt3,.DES_encrypt3.end-DES_encrypt3
1325
1326
1327! void DES_decrypt3(data, ks1, ks2, ks3)
1328! **************************************
1329
1330	.align 32
1331	.global DES_decrypt3
1332	.type	 DES_decrypt3,#function
1333
1334DES_decrypt3:
1335
1336	save	%sp, FRAME, %sp
1337
1338	sethi	%hi(.PIC.DES_SPtrans-1f),global1
1339	or	global1,%lo(.PIC.DES_SPtrans-1f),global1
13401:	call	.+8
1341	add	%o7,global1,global1
1342	sub	global1,.PIC.DES_SPtrans-.des_and,out2
1343
1344	ld	[in0], in5                ! left
1345	add	in3, 120, in4             ! ks3
1346
1347	ld	[in0+4], out5             ! right
1348	mov	in2, in3                  ! ks2
1349
1350	! parameter 6  1/2 for include encryption/decryption
1351	! parameter 7  1 for mov in1 to in3
1352	! parameter 8  1 for mov in3 to in4
1353	! parameter 9  1 for load ks3 and ks2 to in4 and in3
1354
1355	ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 0)
1356
1357	call	.des_enc
1358	add	in1, 120, in4             ! preload ks1
1359
1360	call	.des_dec
1361	nop
1362
1363	fp_macro(out5, in5, 1)
1364
1365	ret
1366	restore
1367
1368.DES_decrypt3.end:
1369	.size	 DES_decrypt3,.DES_decrypt3.end-DES_decrypt3
1370
1371! void DES_ncbc_encrypt(input, output, length, schedule, ivec, enc)
1372! *****************************************************************
1373
1374
1375	.align 32
1376	.global DES_ncbc_encrypt
1377	.type	 DES_ncbc_encrypt,#function
1378
1379DES_ncbc_encrypt:
1380
1381	save	%sp, FRAME, %sp
1382
1383	define({INPUT},  { [%sp+BIAS+ARG0+0*ARGSZ] })
1384	define({OUTPUT}, { [%sp+BIAS+ARG0+1*ARGSZ] })
1385	define({IVEC},   { [%sp+BIAS+ARG0+4*ARGSZ] })
1386
1387	sethi	%hi(.PIC.DES_SPtrans-1f),global1
1388	or	global1,%lo(.PIC.DES_SPtrans-1f),global1
13891:	call	.+8
1390	add	%o7,global1,global1
1391	sub	global1,.PIC.DES_SPtrans-.des_and,out2
1392
1393	cmp	in5, 0                    ! enc
1394
1395	be	.ncbc.dec
1396	STPTR	in4, IVEC
1397
1398	! addr  left  right  temp  label
1399	load_little_endian(in4, in5, out5, local3, .LLE1)  ! iv
1400
1401	addcc	in2, -8, in2              ! bytes missing when first block done
1402
1403	bl	.ncbc.enc.seven.or.less
1404	mov	in3, in4                  ! schedule
1405
1406.ncbc.enc.next.block:
1407
1408	load_little_endian(in0, out4, global4, local3, .LLE2)  ! block
1409
1410.ncbc.enc.next.block_1:
1411
1412	xor	in5, out4, in5            ! iv xor
1413	xor	out5, global4, out5       ! iv xor
1414
1415	! parameter 8  1 for move in3 to in4, 2 for move in4 to in3
1416	ip_macro(in5, out5, in5, out5, in3, 0, 0, 2)
1417
1418.ncbc.enc.next.block_2:
1419
1420!//	call .des_enc                     ! compares in2 to 8
1421!	rounds inlined for alignment purposes
1422
1423	add	global1, 768, global4     ! address sbox 4 since register used below
1424
1425	rounds_macro(in5, out5, 1, .ncbc.enc.1, in3, in4) ! include encryption  ks in3
1426
1427	bl	.ncbc.enc.next.block_fp
1428	add	in0, 8, in0               ! input address
1429
1430	! If 8 or more bytes are to be encrypted after this block,
1431	! we combine final permutation for this block with initial
1432	! permutation for next block. Load next block:
1433
1434	load_little_endian(in0, global3, global4, local5, .LLE12)
1435
1436	!  parameter 1   original left
1437	!  parameter 2   original right
1438	!  parameter 3   left ip
1439	!  parameter 4   right ip
1440	!  parameter 5   1: load ks1/ks2 to in3/in4, add 120 to in4
1441	!                2: mov in4 to in3
1442	!
1443	! also adds -8 to length in2 and loads loop counter to out4
1444
1445	fp_ip_macro(out0, out1, global3, global4, 2)
1446
1447	store_little_endian(in1, out0, out1, local3, .SLE10)  ! block
1448
1449	ld	[in3], out0               ! key 7531 first round next block
1450	mov 	in5, local1
1451	xor	global3, out5, in5        ! iv xor next block
1452
1453	ld	[in3+4], out1             ! key 8642
1454	add	global1, 512, global3     ! address sbox 3 since register used
1455	xor	global4, local1, out5     ! iv xor next block
1456
1457	ba	.ncbc.enc.next.block_2
1458	add	in1, 8, in1               ! output address
1459
1460.ncbc.enc.next.block_fp:
1461
1462	fp_macro(in5, out5)
1463
1464	store_little_endian(in1, in5, out5, local3, .SLE1)  ! block
1465
1466	addcc   in2, -8, in2              ! bytes missing when next block done
1467
1468	bpos	.ncbc.enc.next.block
1469	add	in1, 8, in1
1470
1471.ncbc.enc.seven.or.less:
1472
1473	cmp	in2, -8
1474
1475	ble	.ncbc.enc.finish
1476	nop
1477
1478	add	in2, 8, local1            ! bytes to load
1479
1480	! addr, length, dest left, dest right, temp, temp2, label, ret label
1481	load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB1, .ncbc.enc.next.block_1)
1482
1483	! Loads 1 to 7 bytes little endian to global4, out4
1484
1485
1486.ncbc.enc.finish:
1487
1488	LDPTR	IVEC, local4
1489	store_little_endian(local4, in5, out5, local5, .SLE2)  ! ivec
1490
1491	ret
1492	restore
1493
1494
1495.ncbc.dec:
1496
1497	STPTR	in0, INPUT
1498	cmp	in2, 0                    ! length
1499	add	in3, 120, in3
1500
1501	LDPTR	IVEC, local7              ! ivec
1502	ble	.ncbc.dec.finish
1503	mov	in3, in4                  ! schedule
1504
1505	STPTR	in1, OUTPUT
1506	mov	in0, local5               ! input
1507
1508	load_little_endian(local7, in0, in1, local3, .LLE3)   ! ivec
1509
1510.ncbc.dec.next.block:
1511
1512	load_little_endian(local5, in5, out5, local3, .LLE4)  ! block
1513
1514	! parameter 6  1/2 for include encryption/decryption
1515	! parameter 7  1 for mov in1 to in3
1516	! parameter 8  1 for mov in3 to in4
1517
1518	ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include decryption  ks in4
1519
1520	fp_macro(out5, in5, 0, 1) ! 1 for input and output address to local5/7
1521
1522	! in2 is bytes left to be stored
1523	! in2 is compared to 8 in the rounds
1524
1525	xor	out5, in0, out4           ! iv xor
1526	bl	.ncbc.dec.seven.or.less
1527	xor	in5, in1, global4         ! iv xor
1528
1529	! Load ivec next block now, since input and output address might be the same.
1530
1531	load_little_endian_inc(local5, in0, in1, local3, .LLE5)  ! iv
1532
1533	store_little_endian(local7, out4, global4, local3, .SLE3)
1534
1535	STPTR	local5, INPUT
1536	add	local7, 8, local7
1537	addcc   in2, -8, in2
1538
1539	bg	.ncbc.dec.next.block
1540	STPTR	local7, OUTPUT
1541
1542
1543.ncbc.dec.store.iv:
1544
1545	LDPTR	IVEC, local4              ! ivec
1546	store_little_endian(local4, in0, in1, local5, .SLE4)
1547
1548.ncbc.dec.finish:
1549
1550	ret
1551	restore
1552
1553.ncbc.dec.seven.or.less:
1554
1555	load_little_endian_inc(local5, in0, in1, local3, .LLE13)     ! ivec
1556
1557	store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB1, .ncbc.dec.store.iv)
1558
1559
1560.DES_ncbc_encrypt.end:
1561	.size	 DES_ncbc_encrypt, .DES_ncbc_encrypt.end-DES_ncbc_encrypt
1562
1563
1564! void DES_ede3_cbc_encrypt(input, output, length, ks1, ks2, ks3, ivec, enc)
1565! **************************************************************************
1566
1567
1568	.align 32
1569	.global DES_ede3_cbc_encrypt
1570	.type	 DES_ede3_cbc_encrypt,#function
1571
1572DES_ede3_cbc_encrypt:
1573
1574	save	%sp, FRAME, %sp
1575
1576	define({KS1}, { [%sp+BIAS+ARG0+3*ARGSZ] })
1577	define({KS2}, { [%sp+BIAS+ARG0+4*ARGSZ] })
1578	define({KS3}, { [%sp+BIAS+ARG0+5*ARGSZ] })
1579
1580	sethi	%hi(.PIC.DES_SPtrans-1f),global1
1581	or	global1,%lo(.PIC.DES_SPtrans-1f),global1
15821:	call	.+8
1583	add	%o7,global1,global1
1584	sub	global1,.PIC.DES_SPtrans-.des_and,out2
1585
1586	LDPTR	[%fp+BIAS+ARG0+7*ARGSZ], local3          ! enc
1587	LDPTR	[%fp+BIAS+ARG0+6*ARGSZ], local4          ! ivec
1588	cmp	local3, 0                 ! enc
1589
1590	be	.ede3.dec
1591	STPTR	in4, KS2
1592
1593	STPTR	in5, KS3
1594
1595	load_little_endian(local4, in5, out5, local3, .LLE6)  ! ivec
1596
1597	addcc	in2, -8, in2              ! bytes missing after next block
1598
1599	bl	.ede3.enc.seven.or.less
1600	STPTR	in3, KS1
1601
1602.ede3.enc.next.block:
1603
1604	load_little_endian(in0, out4, global4, local3, .LLE7)
1605
1606.ede3.enc.next.block_1:
1607
1608	LDPTR	KS2, in4
1609	xor	in5, out4, in5            ! iv xor
1610	xor	out5, global4, out5       ! iv xor
1611
1612	LDPTR	KS1, in3
1613	add	in4, 120, in4             ! for decryption we use last subkey first
1614	nop
1615
1616	ip_macro(in5, out5, in5, out5, in3)
1617
1618.ede3.enc.next.block_2:
1619
1620	call .des_enc                     ! ks1 in3
1621	nop
1622
1623	call .des_dec                     ! ks2 in4
1624	LDPTR	KS3, in3
1625
1626	call .des_enc                     ! ks3 in3  compares in2 to 8
1627	nop
1628
1629	bl	.ede3.enc.next.block_fp
1630	add	in0, 8, in0
1631
1632	! If 8 or more bytes are to be encrypted after this block,
1633	! we combine final permutation for this block with initial
1634	! permutation for next block. Load next block:
1635
1636	load_little_endian(in0, global3, global4, local5, .LLE11)
1637
1638	!  parameter 1   original left
1639	!  parameter 2   original right
1640	!  parameter 3   left ip
1641	!  parameter 4   right ip
1642	!  parameter 5   1: load ks1/ks2 to in3/in4, add 120 to in4
1643	!                2: mov in4 to in3
1644	!
1645	! also adds -8 to length in2 and loads loop counter to out4
1646
1647	fp_ip_macro(out0, out1, global3, global4, 1)
1648
1649	store_little_endian(in1, out0, out1, local3, .SLE9)  ! block
1650
1651	mov 	in5, local1
1652	xor	global3, out5, in5        ! iv xor next block
1653
1654	ld	[in3], out0               ! key 7531
1655	add	global1, 512, global3     ! address sbox 3
1656	xor	global4, local1, out5     ! iv xor next block
1657
1658	ld	[in3+4], out1             ! key 8642
1659	add	global1, 768, global4     ! address sbox 4
1660	ba	.ede3.enc.next.block_2
1661	add	in1, 8, in1
1662
1663.ede3.enc.next.block_fp:
1664
1665	fp_macro(in5, out5)
1666
1667	store_little_endian(in1, in5, out5, local3, .SLE5)  ! block
1668
1669	addcc   in2, -8, in2              ! bytes missing when next block done
1670
1671	bpos	.ede3.enc.next.block
1672	add	in1, 8, in1
1673
1674.ede3.enc.seven.or.less:
1675
1676	cmp	in2, -8
1677
1678	ble	.ede3.enc.finish
1679	nop
1680
1681	add	in2, 8, local1            ! bytes to load
1682
1683	! addr, length, dest left, dest right, temp, temp2, label, ret label
1684	load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB2, .ede3.enc.next.block_1)
1685
1686.ede3.enc.finish:
1687
1688	LDPTR	[%fp+BIAS+ARG0+6*ARGSZ], local4          ! ivec
1689	store_little_endian(local4, in5, out5, local5, .SLE6)  ! ivec
1690
1691	ret
1692	restore
1693
1694.ede3.dec:
1695
1696	STPTR	in0, INPUT
1697	add	in5, 120, in5
1698
1699	STPTR	in1, OUTPUT
1700	mov	in0, local5
1701	add	in3, 120, in3
1702
1703	STPTR	in3, KS1
1704	cmp	in2, 0
1705
1706	ble	.ede3.dec.finish
1707	STPTR	in5, KS3
1708
1709	LDPTR	[%fp+BIAS+ARG0+6*ARGSZ], local7          ! iv
1710	load_little_endian(local7, in0, in1, local3, .LLE8)
1711
1712.ede3.dec.next.block:
1713
1714	load_little_endian(local5, in5, out5, local3, .LLE9)
1715
1716	! parameter 6  1/2 for include encryption/decryption
1717	! parameter 7  1 for mov in1 to in3
1718	! parameter 8  1 for mov in3 to in4
1719	! parameter 9  1 for load ks3 and ks2 to in4 and in3
1720
1721	ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 1) ! inc .des_dec ks3 in4
1722
1723	call .des_enc                     ! ks2 in3
1724	LDPTR	KS1, in4
1725
1726	call .des_dec                     ! ks1 in4
1727	nop
1728
1729	fp_macro(out5, in5, 0, 1)   ! 1 for input and output address local5/7
1730
1731	! in2 is bytes left to be stored
1732	! in2 is compared to 8 in the rounds
1733
1734	xor	out5, in0, out4
1735	bl	.ede3.dec.seven.or.less
1736	xor	in5, in1, global4
1737
1738	load_little_endian_inc(local5, in0, in1, local3, .LLE10)   ! iv next block
1739
1740	store_little_endian(local7, out4, global4, local3, .SLE7)  ! block
1741
1742	STPTR	local5, INPUT
1743	addcc   in2, -8, in2
1744	add	local7, 8, local7
1745
1746	bg	.ede3.dec.next.block
1747	STPTR	local7, OUTPUT
1748
1749.ede3.dec.store.iv:
1750
1751	LDPTR	[%fp+BIAS+ARG0+6*ARGSZ], local4          ! ivec
1752	store_little_endian(local4, in0, in1, local5, .SLE8)  ! ivec
1753
1754.ede3.dec.finish:
1755
1756	ret
1757	restore
1758
1759.ede3.dec.seven.or.less:
1760
1761	load_little_endian_inc(local5, in0, in1, local3, .LLE14)     ! iv
1762
1763	store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB2, .ede3.dec.store.iv)
1764
1765
1766.DES_ede3_cbc_encrypt.end:
1767	.size	 DES_ede3_cbc_encrypt,.DES_ede3_cbc_encrypt.end-DES_ede3_cbc_encrypt
1768
1769	.align	256
1770	.type	 .des_and,#object
1771	.size	 .des_and,284
1772
1773.des_and:
1774
1775! This table is used for AND 0xFC when it is known that register
1776! bits 8-31 are zero. Makes it possible to do three arithmetic
1777! operations in one cycle.
1778
1779	.byte  0, 0, 0, 0, 4, 4, 4, 4
1780	.byte  8, 8, 8, 8, 12, 12, 12, 12
1781	.byte  16, 16, 16, 16, 20, 20, 20, 20
1782	.byte  24, 24, 24, 24, 28, 28, 28, 28
1783	.byte  32, 32, 32, 32, 36, 36, 36, 36
1784	.byte  40, 40, 40, 40, 44, 44, 44, 44
1785	.byte  48, 48, 48, 48, 52, 52, 52, 52
1786	.byte  56, 56, 56, 56, 60, 60, 60, 60
1787	.byte  64, 64, 64, 64, 68, 68, 68, 68
1788	.byte  72, 72, 72, 72, 76, 76, 76, 76
1789	.byte  80, 80, 80, 80, 84, 84, 84, 84
1790	.byte  88, 88, 88, 88, 92, 92, 92, 92
1791	.byte  96, 96, 96, 96, 100, 100, 100, 100
1792	.byte  104, 104, 104, 104, 108, 108, 108, 108
1793	.byte  112, 112, 112, 112, 116, 116, 116, 116
1794	.byte  120, 120, 120, 120, 124, 124, 124, 124
1795	.byte  128, 128, 128, 128, 132, 132, 132, 132
1796	.byte  136, 136, 136, 136, 140, 140, 140, 140
1797	.byte  144, 144, 144, 144, 148, 148, 148, 148
1798	.byte  152, 152, 152, 152, 156, 156, 156, 156
1799	.byte  160, 160, 160, 160, 164, 164, 164, 164
1800	.byte  168, 168, 168, 168, 172, 172, 172, 172
1801	.byte  176, 176, 176, 176, 180, 180, 180, 180
1802	.byte  184, 184, 184, 184, 188, 188, 188, 188
1803	.byte  192, 192, 192, 192, 196, 196, 196, 196
1804	.byte  200, 200, 200, 200, 204, 204, 204, 204
1805	.byte  208, 208, 208, 208, 212, 212, 212, 212
1806	.byte  216, 216, 216, 216, 220, 220, 220, 220
1807	.byte  224, 224, 224, 224, 228, 228, 228, 228
1808	.byte  232, 232, 232, 232, 236, 236, 236, 236
1809	.byte  240, 240, 240, 240, 244, 244, 244, 244
1810	.byte  248, 248, 248, 248, 252, 252, 252, 252
1811
1812	! 5 numbers for initial/final permutation
1813
1814	.word   0x0f0f0f0f                ! offset 256
1815	.word	0x0000ffff                ! 260
1816	.word	0x33333333                ! 264
1817	.word	0x00ff00ff                ! 268
1818	.word	0x55555555                ! 272
1819
1820	.word	0                         ! 276
1821	.word	LOOPS                     ! 280
1822	.word	0x0000FC00                ! 284
1823
1824	.global	DES_SPtrans
1825	.type	DES_SPtrans,#object
1826	.size	DES_SPtrans,2048
1827.align	64
1828DES_SPtrans:
1829.PIC.DES_SPtrans:
1830	! nibble 0
1831	.word	0x02080800, 0x00080000, 0x02000002, 0x02080802
1832	.word	0x02000000, 0x00080802, 0x00080002, 0x02000002
1833	.word	0x00080802, 0x02080800, 0x02080000, 0x00000802
1834	.word	0x02000802, 0x02000000, 0x00000000, 0x00080002
1835	.word	0x00080000, 0x00000002, 0x02000800, 0x00080800
1836	.word	0x02080802, 0x02080000, 0x00000802, 0x02000800
1837	.word	0x00000002, 0x00000800, 0x00080800, 0x02080002
1838	.word	0x00000800, 0x02000802, 0x02080002, 0x00000000
1839	.word	0x00000000, 0x02080802, 0x02000800, 0x00080002
1840	.word	0x02080800, 0x00080000, 0x00000802, 0x02000800
1841	.word	0x02080002, 0x00000800, 0x00080800, 0x02000002
1842	.word	0x00080802, 0x00000002, 0x02000002, 0x02080000
1843	.word	0x02080802, 0x00080800, 0x02080000, 0x02000802
1844	.word	0x02000000, 0x00000802, 0x00080002, 0x00000000
1845	.word	0x00080000, 0x02000000, 0x02000802, 0x02080800
1846	.word	0x00000002, 0x02080002, 0x00000800, 0x00080802
1847	! nibble 1
1848	.word	0x40108010, 0x00000000, 0x00108000, 0x40100000
1849	.word	0x40000010, 0x00008010, 0x40008000, 0x00108000
1850	.word	0x00008000, 0x40100010, 0x00000010, 0x40008000
1851	.word	0x00100010, 0x40108000, 0x40100000, 0x00000010
1852	.word	0x00100000, 0x40008010, 0x40100010, 0x00008000
1853	.word	0x00108010, 0x40000000, 0x00000000, 0x00100010
1854	.word	0x40008010, 0x00108010, 0x40108000, 0x40000010
1855	.word	0x40000000, 0x00100000, 0x00008010, 0x40108010
1856	.word	0x00100010, 0x40108000, 0x40008000, 0x00108010
1857	.word	0x40108010, 0x00100010, 0x40000010, 0x00000000
1858	.word	0x40000000, 0x00008010, 0x00100000, 0x40100010
1859	.word	0x00008000, 0x40000000, 0x00108010, 0x40008010
1860	.word	0x40108000, 0x00008000, 0x00000000, 0x40000010
1861	.word	0x00000010, 0x40108010, 0x00108000, 0x40100000
1862	.word	0x40100010, 0x00100000, 0x00008010, 0x40008000
1863	.word	0x40008010, 0x00000010, 0x40100000, 0x00108000
1864	! nibble 2
1865	.word	0x04000001, 0x04040100, 0x00000100, 0x04000101
1866	.word	0x00040001, 0x04000000, 0x04000101, 0x00040100
1867	.word	0x04000100, 0x00040000, 0x04040000, 0x00000001
1868	.word	0x04040101, 0x00000101, 0x00000001, 0x04040001
1869	.word	0x00000000, 0x00040001, 0x04040100, 0x00000100
1870	.word	0x00000101, 0x04040101, 0x00040000, 0x04000001
1871	.word	0x04040001, 0x04000100, 0x00040101, 0x04040000
1872	.word	0x00040100, 0x00000000, 0x04000000, 0x00040101
1873	.word	0x04040100, 0x00000100, 0x00000001, 0x00040000
1874	.word	0x00000101, 0x00040001, 0x04040000, 0x04000101
1875	.word	0x00000000, 0x04040100, 0x00040100, 0x04040001
1876	.word	0x00040001, 0x04000000, 0x04040101, 0x00000001
1877	.word	0x00040101, 0x04000001, 0x04000000, 0x04040101
1878	.word	0x00040000, 0x04000100, 0x04000101, 0x00040100
1879	.word	0x04000100, 0x00000000, 0x04040001, 0x00000101
1880	.word	0x04000001, 0x00040101, 0x00000100, 0x04040000
1881	! nibble 3
1882	.word	0x00401008, 0x10001000, 0x00000008, 0x10401008
1883	.word	0x00000000, 0x10400000, 0x10001008, 0x00400008
1884	.word	0x10401000, 0x10000008, 0x10000000, 0x00001008
1885	.word	0x10000008, 0x00401008, 0x00400000, 0x10000000
1886	.word	0x10400008, 0x00401000, 0x00001000, 0x00000008
1887	.word	0x00401000, 0x10001008, 0x10400000, 0x00001000
1888	.word	0x00001008, 0x00000000, 0x00400008, 0x10401000
1889	.word	0x10001000, 0x10400008, 0x10401008, 0x00400000
1890	.word	0x10400008, 0x00001008, 0x00400000, 0x10000008
1891	.word	0x00401000, 0x10001000, 0x00000008, 0x10400000
1892	.word	0x10001008, 0x00000000, 0x00001000, 0x00400008
1893	.word	0x00000000, 0x10400008, 0x10401000, 0x00001000
1894	.word	0x10000000, 0x10401008, 0x00401008, 0x00400000
1895	.word	0x10401008, 0x00000008, 0x10001000, 0x00401008
1896	.word	0x00400008, 0x00401000, 0x10400000, 0x10001008
1897	.word	0x00001008, 0x10000000, 0x10000008, 0x10401000
1898	! nibble 4
1899	.word	0x08000000, 0x00010000, 0x00000400, 0x08010420
1900	.word	0x08010020, 0x08000400, 0x00010420, 0x08010000
1901	.word	0x00010000, 0x00000020, 0x08000020, 0x00010400
1902	.word	0x08000420, 0x08010020, 0x08010400, 0x00000000
1903	.word	0x00010400, 0x08000000, 0x00010020, 0x00000420
1904	.word	0x08000400, 0x00010420, 0x00000000, 0x08000020
1905	.word	0x00000020, 0x08000420, 0x08010420, 0x00010020
1906	.word	0x08010000, 0x00000400, 0x00000420, 0x08010400
1907	.word	0x08010400, 0x08000420, 0x00010020, 0x08010000
1908	.word	0x00010000, 0x00000020, 0x08000020, 0x08000400
1909	.word	0x08000000, 0x00010400, 0x08010420, 0x00000000
1910	.word	0x00010420, 0x08000000, 0x00000400, 0x00010020
1911	.word	0x08000420, 0x00000400, 0x00000000, 0x08010420
1912	.word	0x08010020, 0x08010400, 0x00000420, 0x00010000
1913	.word	0x00010400, 0x08010020, 0x08000400, 0x00000420
1914	.word	0x00000020, 0x00010420, 0x08010000, 0x08000020
1915	! nibble 5
1916	.word	0x80000040, 0x00200040, 0x00000000, 0x80202000
1917	.word	0x00200040, 0x00002000, 0x80002040, 0x00200000
1918	.word	0x00002040, 0x80202040, 0x00202000, 0x80000000
1919	.word	0x80002000, 0x80000040, 0x80200000, 0x00202040
1920	.word	0x00200000, 0x80002040, 0x80200040, 0x00000000
1921	.word	0x00002000, 0x00000040, 0x80202000, 0x80200040
1922	.word	0x80202040, 0x80200000, 0x80000000, 0x00002040
1923	.word	0x00000040, 0x00202000, 0x00202040, 0x80002000
1924	.word	0x00002040, 0x80000000, 0x80002000, 0x00202040
1925	.word	0x80202000, 0x00200040, 0x00000000, 0x80002000
1926	.word	0x80000000, 0x00002000, 0x80200040, 0x00200000
1927	.word	0x00200040, 0x80202040, 0x00202000, 0x00000040
1928	.word	0x80202040, 0x00202000, 0x00200000, 0x80002040
1929	.word	0x80000040, 0x80200000, 0x00202040, 0x00000000
1930	.word	0x00002000, 0x80000040, 0x80002040, 0x80202000
1931	.word	0x80200000, 0x00002040, 0x00000040, 0x80200040
1932	! nibble 6
1933	.word	0x00004000, 0x00000200, 0x01000200, 0x01000004
1934	.word	0x01004204, 0x00004004, 0x00004200, 0x00000000
1935	.word	0x01000000, 0x01000204, 0x00000204, 0x01004000
1936	.word	0x00000004, 0x01004200, 0x01004000, 0x00000204
1937	.word	0x01000204, 0x00004000, 0x00004004, 0x01004204
1938	.word	0x00000000, 0x01000200, 0x01000004, 0x00004200
1939	.word	0x01004004, 0x00004204, 0x01004200, 0x00000004
1940	.word	0x00004204, 0x01004004, 0x00000200, 0x01000000
1941	.word	0x00004204, 0x01004000, 0x01004004, 0x00000204
1942	.word	0x00004000, 0x00000200, 0x01000000, 0x01004004
1943	.word	0x01000204, 0x00004204, 0x00004200, 0x00000000
1944	.word	0x00000200, 0x01000004, 0x00000004, 0x01000200
1945	.word	0x00000000, 0x01000204, 0x01000200, 0x00004200
1946	.word	0x00000204, 0x00004000, 0x01004204, 0x01000000
1947	.word	0x01004200, 0x00000004, 0x00004004, 0x01004204
1948	.word	0x01000004, 0x01004200, 0x01004000, 0x00004004
1949	! nibble 7
1950	.word	0x20800080, 0x20820000, 0x00020080, 0x00000000
1951	.word	0x20020000, 0x00800080, 0x20800000, 0x20820080
1952	.word	0x00000080, 0x20000000, 0x00820000, 0x00020080
1953	.word	0x00820080, 0x20020080, 0x20000080, 0x20800000
1954	.word	0x00020000, 0x00820080, 0x00800080, 0x20020000
1955	.word	0x20820080, 0x20000080, 0x00000000, 0x00820000
1956	.word	0x20000000, 0x00800000, 0x20020080, 0x20800080
1957	.word	0x00800000, 0x00020000, 0x20820000, 0x00000080
1958	.word	0x00800000, 0x00020000, 0x20000080, 0x20820080
1959	.word	0x00020080, 0x20000000, 0x00000000, 0x00820000
1960	.word	0x20800080, 0x20020080, 0x20020000, 0x00800080
1961	.word	0x20820000, 0x00000080, 0x00800080, 0x20020000
1962	.word	0x20820080, 0x00800000, 0x20800000, 0x20000080
1963	.word	0x00820000, 0x00020080, 0x20020080, 0x20800000
1964	.word	0x00000080, 0x20820000, 0x00820080, 0x00000000
1965	.word	0x20000000, 0x20800080, 0x00020000, 0x00820080
1966
1967