xref: /openssl/crypto/rc4/asm/rc4-c64xplus.pl (revision 33388b44)
1#! /usr/bin/env perl
2# Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the Apache License 2.0 (the "License").  You may not use
5# this file except in compliance with the License.  You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9#
10# ====================================================================
11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12# project. The module is, however, dual licensed under OpenSSL and
13# CRYPTOGAMS licenses depending on where you obtain it. For further
14# details see http://www.openssl.org/~appro/cryptogams/.
15# ====================================================================
16#
17# RC4 for C64x+.
18#
19# April 2014
20#
21# RC4 subroutine processes one byte in 7.0 cycles, which is 3x faster
22# than TI CGT-generated code. Loop is scheduled in such way that
23# there is only one reference to memory in each cycle. This is done
24# to avoid L1D memory banking conflicts, see SPRU871 TI publication
25# for further details. Otherwise it should be possible to schedule
26# the loop for iteration interval of 6...
27
28($KEY,$LEN,$INP,$OUT)=("A4","B4","A6","B6");
29
30($KEYA,$XX,$TY,$xx,$ONE,$ret)=map("A$_",(5,7,8,9,1,2));
31($KEYB,$YY,$TX,$tx,$SUM,$dat)=map("B$_",(5,7,8,9,1,2));
32
33$code.=<<___;
34	.text
35
36	.if	.ASSEMBLER_VERSION<7000000
37	.asg	0,__TI_EABI__
38	.endif
39	.if	__TI_EABI__
40	.nocmp
41	.asg	RC4,_RC4
42	.asg	RC4_set_key,_RC4_set_key
43	.asg	RC4_options,_RC4_options
44	.endif
45
46	.global	_RC4
47	.align	16
48_RC4:
49	.asmfunc
50	MV	$LEN,B0
51  [!B0]	BNOP	B3			; if (len==0) return;
52||[B0]	ADD	$KEY,2,$KEYA
53||[B0]	ADD	$KEY,2,$KEYB
54  [B0]	MVK	1,$ONE
55||[B0]	LDBU	*${KEYA}[-2],$XX	; key->x
56  [B0]	LDBU	*${KEYB}[-1],$YY	; key->y
57||	NOP	4
58
59	ADD4	$ONE,$XX,$XX
60	LDBU	*${KEYA}[$XX],$TX
61||	MVC	$LEN,ILC
62	NOP	4
63;;==================================================
64	SPLOOP	7
65||	ADD4	$TX,$YY,$YY
66
67	LDBU	*${KEYB}[$YY],$TY
68||	MVD	$XX,$xx
69||	ADD4	$ONE,$XX,$XX
70	LDBU	*${KEYA}[$XX],$tx
71	CMPEQ	$YY,$XX,B0
72||	NOP	3
73	STB	$TX,*${KEYB}[$YY]
74||[B0]	ADD4	$TX,$YY,$YY
75	STB	$TY,*${KEYA}[$xx]
76||[!B0]	ADD4	$tx,$YY,$YY
77||[!B0]	MVD	$tx,$TX
78	ADD4	$TY,$TX,$SUM		; [0,0] $TX is not replaced by $tx yet!
79||	NOP	2
80	LDBU	*$INP++,$dat
81||	NOP	2
82	LDBU	*${KEYB}[$SUM],$ret
83||	NOP	5
84	XOR.L	$dat,$ret,$ret
85	SPKERNEL
86||	STB	$ret,*$OUT++
87;;==================================================
88	SUB4	$XX,$ONE,$XX
89||	NOP	5
90	STB	$XX,*${KEYA}[-2]	; key->x
91||	SUB4	$YY,$TX,$YY
92||	BNOP	B3
93	STB	$YY,*${KEYB}[-1]	; key->y
94||	NOP	5
95	.endasmfunc
96
97	.global	_RC4_set_key
98	.align	16
99_RC4_set_key:
100	.asmfunc
101	.if	.BIG_ENDIAN
102	MVK	0x00000404,$ONE
103||	MVK	0x00000203,B0
104	MVKH	0x04040000,$ONE
105||	MVKH	0x00010000,B0
106	.else
107	MVK	0x00000404,$ONE
108||	MVK	0x00000100,B0
109	MVKH	0x04040000,$ONE
110||	MVKH	0x03020000,B0
111	.endif
112	ADD	$KEY,2,$KEYA
113||	ADD	$KEY,2,$KEYB
114||	ADD	$INP,$LEN,$ret		; end of input
115	LDBU	*${INP}++,$dat
116||	MVK	0,$TX
117	STH	$TX,*${KEY}++		; key->x=key->y=0
118||	MV	B0,A0
119||	MVK	64-4,B0
120
121;;==================================================
122	SPLOOPD	1
123||	MVC	B0,ILC
124
125	STNW	A0,*${KEY}++
126||	ADD4	$ONE,A0,A0
127	SPKERNEL
128;;==================================================
129
130	MVK	0,$YY
131||	MVK	0,$XX
132	MVK	1,$ONE
133||	MVK	256-1,B0
134
135;;==================================================
136	SPLOOPD	8
137||	MVC	B0,ILC
138
139	ADD4	$dat,$YY,$YY
140||	CMPEQ	$INP,$ret,A0		; end of input?
141	LDBU	*${KEYB}[$YY],$TY
142||	MVD	$XX,$xx
143||	ADD4	$ONE,$XX,$XX
144	LDBU	*${KEYA}[$XX],$tx
145||[A0]	SUB	$INP,$LEN,$INP		; rewind
146	LDBU	*${INP}++,$dat
147||	CMPEQ	$YY,$XX,B0
148||	NOP	3
149	STB	$TX,*${KEYB}[$YY]
150||[B0]	ADD4	$TX,$YY,$YY
151	STB	$TY,*${KEYA}[$xx]
152||[!B0]	ADD4	$tx,$YY,$YY
153||[!B0]	MV	$tx,$TX
154	SPKERNEL
155;;==================================================
156
157	BNOP	B3,5
158	.endasmfunc
159
160	.global	_RC4_options
161	.align	16
162_RC4_options:
163_rc4_options:
164	.asmfunc
165	BNOP	B3,1
166	ADDKPC	_rc4_options,B4
167	.if	__TI_EABI__
168	MVKL	\$PCR_OFFSET(rc4_options,_rc4_options),A4
169	MVKH	\$PCR_OFFSET(rc4_options,_rc4_options),A4
170	.else
171	MVKL	(rc4_options-_rc4_options),A4
172	MVKH	(rc4_options-_rc4_options),A4
173	.endif
174	ADD	B4,A4,A4
175	.endasmfunc
176
177	.if	__TI_EABI__
178	.sect	".text:rc4_options.const"
179	.else
180	.sect	".const:rc4_options"
181	.endif
182	.align	4
183rc4_options:
184	.cstring "rc4(sploop,char)"
185	.cstring "RC4 for C64+, CRYPTOGAMS by <appro\@openssl.org>"
186	.align	4
187___
188
189$output = pop and open STDOUT,">$output";
190print $code;
191close STDOUT or die "error closing STDOUT: $!";
192