xref: /openssl/crypto/aes/asm/aes-c64xplus.pl (revision 33388b44)
1#! /usr/bin/env perl
2# Copyright 2012-2020 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the Apache License 2.0 (the "License").  You may not use
5# this file except in compliance with the License.  You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9#
10# ====================================================================
11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12# project. The module is, however, dual licensed under OpenSSL and
13# CRYPTOGAMS licenses depending on where you obtain it. For further
14# details see http://www.openssl.org/~appro/cryptogams/.
15# ====================================================================
16#
17# [Endian-neutral] AES for C64x+.
18#
19# Even though SPLOOPs are scheduled for 13 cycles, and thus expected
20# performance is ~8.5 cycles per byte processed with 128-bit key,
21# measured performance turned to be ~10 cycles per byte. Discrepancy
22# must be caused by limitations of L1D memory banking(*), see SPRU871
23# TI publication for further details. If any consolation it's still
24# ~20% faster than TI's linear assembly module anyway... Compared to
25# aes_core.c compiled with cl6x 6.0 with -mv6400+ -o2 options this
26# code is 3.75x faster and almost 3x smaller (tables included).
27#
28# (*)	This means that there might be subtle correlation between data
29#	and timing and one can wonder if it can be ... attacked:-(
30#	On the other hand this also means that *if* one chooses to
31#	implement *4* T-tables variant [instead of 1 T-table as in
32#	this implementation, or in addition to], then one ought to
33#	*interleave* them. Even though it complicates addressing,
34#	references to interleaved tables would be guaranteed not to
35#	clash. I reckon that it should be possible to break 8 cycles
36#	per byte "barrier," i.e. improve by ~20%, naturally at the
37#	cost of 8x increased pressure on L1D. 8x because you'd have
38#	to interleave both Te and Td tables...
39
40$output = pop and open STDOUT,">$output";
41
42($TEA,$TEB)=("A5","B5");
43($KPA,$KPB)=("A3","B1");
44@K=("A6","B6","A7","B7");
45@s=("A8","B8","A9","B9");
46@Te0=@Td0=("A16","B16","A17","B17");
47@Te1=@Td1=("A18","B18","A19","B19");
48@Te2=@Td2=("A20","B20","A21","B21");
49@Te3=@Td3=("A22","B22","A23","B23");
50
51$code=<<___;
52	.text
53
54	.if	.ASSEMBLER_VERSION<7000000
55	.asg	0,__TI_EABI__
56	.endif
57	.if	__TI_EABI__
58	.nocmp
59	.asg	AES_encrypt,_AES_encrypt
60	.asg	AES_decrypt,_AES_decrypt
61	.asg	AES_set_encrypt_key,_AES_set_encrypt_key
62	.asg	AES_set_decrypt_key,_AES_set_decrypt_key
63	.asg	AES_ctr32_encrypt,_AES_ctr32_encrypt
64	.endif
65
66	.asg	B3,RA
67	.asg	A4,INP
68	.asg	B4,OUT
69	.asg	A6,KEY
70	.asg	A4,RET
71	.asg	B15,SP
72
73	.eval	24,EXT0
74	.eval	16,EXT1
75	.eval	8,EXT2
76	.eval	0,EXT3
77	.eval	8,TBL1
78	.eval	16,TBL2
79	.eval	24,TBL3
80
81	.if	.BIG_ENDIAN
82	.eval	24-EXT0,EXT0
83	.eval	24-EXT1,EXT1
84	.eval	24-EXT2,EXT2
85	.eval	24-EXT3,EXT3
86	.eval	32-TBL1,TBL1
87	.eval	32-TBL2,TBL2
88	.eval	32-TBL3,TBL3
89	.endif
90
91	.global	_AES_encrypt
92_AES_encrypt:
93	.asmfunc
94	MVK	1,B2
95__encrypt:
96	.if	__TI_EABI__
97   [B2]	LDNDW	*INP++,A9:A8			; load input
98||	MVKL	\$PCR_OFFSET(AES_Te,__encrypt),$TEA
99||	ADDKPC	__encrypt,B0
100   [B2]	LDNDW	*INP++,B9:B8
101||	MVKH	\$PCR_OFFSET(AES_Te,__encrypt),$TEA
102||	ADD	0,KEY,$KPA
103||	ADD	4,KEY,$KPB
104	.else
105   [B2]	LDNDW	*INP++,A9:A8			; load input
106||	MVKL	(AES_Te-__encrypt),$TEA
107||	ADDKPC	__encrypt,B0
108   [B2]	LDNDW	*INP++,B9:B8
109||	MVKH	(AES_Te-__encrypt),$TEA
110||	ADD	0,KEY,$KPA
111||	ADD	4,KEY,$KPB
112	.endif
113	LDW	*$KPA++[2],$Te0[0]		; zero round key
114||	LDW	*$KPB++[2],$Te0[1]
115||	MVK	60,A0
116||	ADD	B0,$TEA,$TEA			; AES_Te
117	LDW	*KEY[A0],B0			; rounds
118||	MVK	1024,A0				; sizeof(AES_Te)
119	LDW	*$KPA++[2],$Te0[2]
120||	LDW	*$KPB++[2],$Te0[3]
121||	MV	$TEA,$TEB
122	NOP
123	.if	.BIG_ENDIAN
124	MV	A9,$s[0]
125||	MV	A8,$s[1]
126||	MV	B9,$s[2]
127||	MV	B8,$s[3]
128	.else
129	MV	A8,$s[0]
130||	MV	A9,$s[1]
131||	MV	B8,$s[2]
132||	MV	B9,$s[3]
133	.endif
134	XOR	$Te0[0],$s[0],$s[0]
135||	XOR	$Te0[1],$s[1],$s[1]
136||	LDW	*$KPA++[2],$K[0]		; 1st round key
137||	LDW	*$KPB++[2],$K[1]
138	SUB	B0,2,B0
139
140	SPLOOPD	13
141||	MVC	B0,ILC
142||	LDW	*$KPA++[2],$K[2]
143||	LDW	*$KPB++[2],$K[3]
144;;====================================================================
145	EXTU	$s[1],EXT1,24,$Te1[1]
146||	EXTU	$s[0],EXT3,24,$Te3[0]
147	LDW	*${TEB}[$Te1[1]],$Te1[1]	; Te1[s1>>8],	t0
148||	LDW	*${TEA}[$Te3[0]],$Te3[0]	; Te3[s0>>24],	t1
149||	XOR	$s[2],$Te0[2],$s[2]		; modulo-scheduled
150||	XOR	$s[3],$Te0[3],$s[3]		; modulo-scheduled
151||	EXTU	$s[1],EXT3,24,$Te3[1]
152||	EXTU	$s[0],EXT1,24,$Te1[0]
153	LDW	*${TEB}[$Te3[1]],$Te3[1]	; Te3[s1>>24],	t2
154||	LDW	*${TEA}[$Te1[0]],$Te1[0]	; Te1[s0>>8],	t3
155||	EXTU	$s[2],EXT2,24,$Te2[2]
156||	EXTU	$s[3],EXT2,24,$Te2[3]
157	LDW	*${TEA}[$Te2[2]],$Te2[2]	; Te2[s2>>16],	t0
158||	LDW	*${TEB}[$Te2[3]],$Te2[3]	; Te2[s3>>16],	t1
159||	EXTU	$s[3],EXT3,24,$Te3[3]
160||	EXTU	$s[2],EXT1,24,$Te1[2]
161	LDW	*${TEB}[$Te3[3]],$Te3[3]	; Te3[s3>>24],	t0
162||	LDW	*${TEA}[$Te1[2]],$Te1[2]	; Te1[s2>>8],	t1
163||	EXTU	$s[0],EXT2,24,$Te2[0]
164||	EXTU	$s[1],EXT2,24,$Te2[1]
165	LDW	*${TEA}[$Te2[0]],$Te2[0]	; Te2[s0>>16],	t2
166||	LDW	*${TEB}[$Te2[1]],$Te2[1]	; Te2[s1>>16],	t3
167||	EXTU	$s[3],EXT1,24,$Te1[3]
168||	EXTU	$s[2],EXT3,24,$Te3[2]
169	LDW	*${TEB}[$Te1[3]],$Te1[3]	; Te1[s3>>8],	t2
170||	LDW	*${TEA}[$Te3[2]],$Te3[2]	; Te3[s2>>24],	t3
171||	ROTL	$Te1[1],TBL1,$Te3[0]		; t0
172||	ROTL	$Te3[0],TBL3,$Te1[1]		; t1
173||	EXTU	$s[0],EXT0,24,$Te0[0]
174||	EXTU	$s[1],EXT0,24,$Te0[1]
175	LDW	*${TEA}[$Te0[0]],$Te0[0]	; Te0[s0],	t0
176||	LDW	*${TEB}[$Te0[1]],$Te0[1]	; Te0[s1],	t1
177||	ROTL	$Te3[1],TBL3,$Te1[0]		; t2
178||	ROTL	$Te1[0],TBL1,$Te3[1]		; t3
179||	EXTU	$s[2],EXT0,24,$Te0[2]
180||	EXTU	$s[3],EXT0,24,$Te0[3]
181	LDW	*${TEA}[$Te0[2]],$Te0[2]	; Te0[s2],	t2
182||	LDW	*${TEB}[$Te0[3]],$Te0[3]	; Te0[s3],	t3
183||	ROTL	$Te2[2],TBL2,$Te2[2]		; t0
184||	ROTL	$Te2[3],TBL2,$Te2[3]		; t1
185||	XOR	$K[0],$Te3[0],$s[0]
186||	XOR	$K[1],$Te1[1],$s[1]
187	ROTL	$Te3[3],TBL3,$Te1[2]		; t0
188||	ROTL	$Te1[2],TBL1,$Te3[3]		; t1
189||	XOR	$K[2],$Te1[0],$s[2]
190||	XOR	$K[3],$Te3[1],$s[3]
191||	LDW	*$KPA++[2],$K[0]		; next round key
192||	LDW	*$KPB++[2],$K[1]
193	ROTL	$Te2[0],TBL2,$Te2[0]		; t2
194||	ROTL	$Te2[1],TBL2,$Te2[1]		; t3
195||	XOR	$s[0],$Te2[2],$s[0]
196||	XOR	$s[1],$Te2[3],$s[1]
197||	LDW	*$KPA++[2],$K[2]
198||	LDW	*$KPB++[2],$K[3]
199	ROTL	$Te1[3],TBL1,$Te3[2]		; t2
200||	ROTL	$Te3[2],TBL3,$Te1[3]		; t3
201||	XOR	$s[0],$Te1[2],$s[0]
202||	XOR	$s[1],$Te3[3],$s[1]
203	XOR	$s[2],$Te2[0],$s[2]
204||	XOR	$s[3],$Te2[1],$s[3]
205||	XOR	$s[0],$Te0[0],$s[0]
206||	XOR	$s[1],$Te0[1],$s[1]
207	SPKERNEL
208||	XOR.L	$s[2],$Te3[2],$s[2]
209||	XOR.L	$s[3],$Te1[3],$s[3]
210;;====================================================================
211	ADD.D	${TEA},A0,${TEA}		; point to Te4
212||	ADD.D	${TEB},A0,${TEB}
213||	EXTU	$s[1],EXT1,24,$Te1[1]
214||	EXTU	$s[0],EXT3,24,$Te3[0]
215	LDBU	*${TEB}[$Te1[1]],$Te1[1]	; Te1[s1>>8],	t0
216||	LDBU	*${TEA}[$Te3[0]],$Te3[0]	; Te3[s0>>24],	t1
217||	XOR	$s[2],$Te0[2],$s[2]		; modulo-scheduled
218||	XOR	$s[3],$Te0[3],$s[3]		; modulo-scheduled
219||	EXTU	$s[0],EXT0,24,$Te0[0]
220||	EXTU	$s[1],EXT0,24,$Te0[1]
221	LDBU	*${TEA}[$Te0[0]],$Te0[0]	; Te0[s0],	t0
222||	LDBU	*${TEB}[$Te0[1]],$Te0[1]	; Te0[s1],	t1
223||	EXTU	$s[3],EXT3,24,$Te3[3]
224||	EXTU	$s[2],EXT1,24,$Te1[2]
225	LDBU	*${TEB}[$Te3[3]],$Te3[3]	; Te3[s3>>24],	t0
226||	LDBU	*${TEA}[$Te1[2]],$Te1[2]	; Te1[s2>>8],	t1
227||	EXTU	$s[2],EXT2,24,$Te2[2]
228||	EXTU	$s[3],EXT2,24,$Te2[3]
229	LDBU	*${TEA}[$Te2[2]],$Te2[2]	; Te2[s2>>16],	t0
230||	LDBU	*${TEB}[$Te2[3]],$Te2[3]	; Te2[s3>>16],	t1
231||	EXTU	$s[1],EXT3,24,$Te3[1]
232||	EXTU	$s[0],EXT1,24,$Te1[0]
233	LDBU	*${TEB}[$Te3[1]],$Te3[1]	; Te3[s1>>24],	t2
234||	LDBU	*${TEA}[$Te1[0]],$Te1[0]	; Te1[s0>>8],	t3
235||	EXTU	$s[3],EXT1,24,$Te1[3]
236||	EXTU	$s[2],EXT3,24,$Te3[2]
237	LDBU	*${TEB}[$Te1[3]],$Te1[3]	; Te1[s3>>8],	t2
238||	LDBU	*${TEA}[$Te3[2]],$Te3[2]	; Te3[s2>>24],	t3
239||	EXTU	$s[2],EXT0,24,$Te0[2]
240||	EXTU	$s[3],EXT0,24,$Te0[3]
241	LDBU	*${TEA}[$Te0[2]],$Te0[2]	; Te0[s2],	t2
242||	LDBU	*${TEB}[$Te0[3]],$Te0[3]	; Te0[s3],	t3
243||	EXTU	$s[0],EXT2,24,$Te2[0]
244||	EXTU	$s[1],EXT2,24,$Te2[1]
245	LDBU	*${TEA}[$Te2[0]],$Te2[0]	; Te2[s0>>16],	t2
246||	LDBU	*${TEB}[$Te2[1]],$Te2[1]	; Te2[s1>>16],	t3
247
248	.if	.BIG_ENDIAN
249	PACK2	$Te0[0],$Te1[1],$Te0[0]
250||	PACK2	$Te0[1],$Te1[2],$Te0[1]
251	PACK2	$Te2[2],$Te3[3],$Te2[2]
252||	PACK2	$Te2[3],$Te3[0],$Te2[3]
253	PACKL4	$Te0[0],$Te2[2],$Te0[0]
254||	PACKL4	$Te0[1],$Te2[3],$Te0[1]
255	XOR	$K[0],$Te0[0],$Te0[0]		; s[0]
256||	XOR	$K[1],$Te0[1],$Te0[1]		; s[1]
257
258	PACK2	$Te0[2],$Te1[3],$Te0[2]
259||	PACK2	$Te0[3],$Te1[0],$Te0[3]
260	PACK2	$Te2[0],$Te3[1],$Te2[0]
261||	PACK2	$Te2[1],$Te3[2],$Te2[1]
262||	BNOP	RA
263	PACKL4	$Te0[2],$Te2[0],$Te0[2]
264||	PACKL4	$Te0[3],$Te2[1],$Te0[3]
265	XOR	$K[2],$Te0[2],$Te0[2]		; s[2]
266||	XOR	$K[3],$Te0[3],$Te0[3]		; s[3]
267
268	MV	$Te0[0],A9
269||	MV	$Te0[1],A8
270	MV	$Te0[2],B9
271||	MV	$Te0[3],B8
272|| [B2]	STNDW	A9:A8,*OUT++
273   [B2]	STNDW	B9:B8,*OUT++
274	.else
275	PACK2	$Te1[1],$Te0[0],$Te1[1]
276||	PACK2	$Te1[2],$Te0[1],$Te1[2]
277	PACK2	$Te3[3],$Te2[2],$Te3[3]
278||	PACK2	$Te3[0],$Te2[3],$Te3[0]
279	PACKL4	$Te3[3],$Te1[1],$Te1[1]
280||	PACKL4	$Te3[0],$Te1[2],$Te1[2]
281	XOR	$K[0],$Te1[1],$Te1[1]		; s[0]
282||	XOR	$K[1],$Te1[2],$Te1[2]		; s[1]
283
284	PACK2	$Te1[3],$Te0[2],$Te1[3]
285||	PACK2	$Te1[0],$Te0[3],$Te1[0]
286	PACK2	$Te3[1],$Te2[0],$Te3[1]
287||	PACK2	$Te3[2],$Te2[1],$Te3[2]
288||	BNOP	RA
289	PACKL4	$Te3[1],$Te1[3],$Te1[3]
290||	PACKL4	$Te3[2],$Te1[0],$Te1[0]
291	XOR	$K[2],$Te1[3],$Te1[3]		; s[2]
292||	XOR	$K[3],$Te1[0],$Te1[0]		; s[3]
293
294	MV	$Te1[1],A8
295||	MV	$Te1[2],A9
296	MV	$Te1[3],B8
297||	MV	$Te1[0],B9
298|| [B2]	STNDW	A9:A8,*OUT++
299   [B2]	STNDW	B9:B8,*OUT++
300	.endif
301	.endasmfunc
302
303	.global	_AES_decrypt
304_AES_decrypt:
305	.asmfunc
306	MVK	1,B2
307__decrypt:
308	.if	__TI_EABI__
309   [B2]	LDNDW	*INP++,A9:A8			; load input
310||	MVKL	\$PCR_OFFSET(AES_Td,__decrypt),$TEA
311||	ADDKPC	__decrypt,B0
312   [B2]	LDNDW	*INP++,B9:B8
313||	MVKH	\$PCR_OFFSET(AES_Td,__decrypt),$TEA
314||	ADD	0,KEY,$KPA
315||	ADD	4,KEY,$KPB
316	.else
317   [B2]	LDNDW	*INP++,A9:A8			; load input
318||	MVKL	(AES_Td-__decrypt),$TEA
319||	ADDKPC	__decrypt,B0
320   [B2]	LDNDW	*INP++,B9:B8
321||	MVKH	(AES_Td-__decrypt),$TEA
322||	ADD	0,KEY,$KPA
323||	ADD	4,KEY,$KPB
324	.endif
325	LDW	*$KPA++[2],$Td0[0]		; zero round key
326||	LDW	*$KPB++[2],$Td0[1]
327||	MVK	60,A0
328||	ADD	B0,$TEA,$TEA			; AES_Td
329	LDW	*KEY[A0],B0			; rounds
330||	MVK	1024,A0				; sizeof(AES_Td)
331	LDW	*$KPA++[2],$Td0[2]
332||	LDW	*$KPB++[2],$Td0[3]
333||	MV	$TEA,$TEB
334	NOP
335	.if	.BIG_ENDIAN
336	MV	A9,$s[0]
337||	MV	A8,$s[1]
338||	MV	B9,$s[2]
339||	MV	B8,$s[3]
340	.else
341	MV	A8,$s[0]
342||	MV	A9,$s[1]
343||	MV	B8,$s[2]
344||	MV	B9,$s[3]
345	.endif
346	XOR	$Td0[0],$s[0],$s[0]
347||	XOR	$Td0[1],$s[1],$s[1]
348||	LDW	*$KPA++[2],$K[0]		; 1st round key
349||	LDW	*$KPB++[2],$K[1]
350	SUB	B0,2,B0
351
352	SPLOOPD	13
353||	MVC	B0,ILC
354||	LDW	*$KPA++[2],$K[2]
355||	LDW	*$KPB++[2],$K[3]
356;;====================================================================
357	EXTU	$s[1],EXT3,24,$Td3[1]
358||	EXTU	$s[0],EXT1,24,$Td1[0]
359	LDW	*${TEB}[$Td3[1]],$Td3[1]	; Td3[s1>>24],	t0
360||	LDW	*${TEA}[$Td1[0]],$Td1[0]	; Td1[s0>>8],	t1
361||	XOR	$s[2],$Td0[2],$s[2]		; modulo-scheduled
362||	XOR	$s[3],$Td0[3],$s[3]		; modulo-scheduled
363||	EXTU	$s[1],EXT1,24,$Td1[1]
364||	EXTU	$s[0],EXT3,24,$Td3[0]
365	LDW	*${TEB}[$Td1[1]],$Td1[1]	; Td1[s1>>8],	t2
366||	LDW	*${TEA}[$Td3[0]],$Td3[0]	; Td3[s0>>24],	t3
367||	EXTU	$s[2],EXT2,24,$Td2[2]
368||	EXTU	$s[3],EXT2,24,$Td2[3]
369	LDW	*${TEA}[$Td2[2]],$Td2[2]	; Td2[s2>>16],	t0
370||	LDW	*${TEB}[$Td2[3]],$Td2[3]	; Td2[s3>>16],	t1
371||	EXTU	$s[3],EXT1,24,$Td1[3]
372||	EXTU	$s[2],EXT3,24,$Td3[2]
373	LDW	*${TEB}[$Td1[3]],$Td1[3]	; Td1[s3>>8],	t0
374||	LDW	*${TEA}[$Td3[2]],$Td3[2]	; Td3[s2>>24],	t1
375||	EXTU	$s[0],EXT2,24,$Td2[0]
376||	EXTU	$s[1],EXT2,24,$Td2[1]
377	LDW	*${TEA}[$Td2[0]],$Td2[0]	; Td2[s0>>16],	t2
378||	LDW	*${TEB}[$Td2[1]],$Td2[1]	; Td2[s1>>16],	t3
379||	EXTU	$s[3],EXT3,24,$Td3[3]
380||	EXTU	$s[2],EXT1,24,$Td1[2]
381	LDW	*${TEB}[$Td3[3]],$Td3[3]	; Td3[s3>>24],	t2
382||	LDW	*${TEA}[$Td1[2]],$Td1[2]	; Td1[s2>>8],	t3
383||	ROTL	$Td3[1],TBL3,$Td1[0]		; t0
384||	ROTL	$Td1[0],TBL1,$Td3[1]		; t1
385||	EXTU	$s[0],EXT0,24,$Td0[0]
386||	EXTU	$s[1],EXT0,24,$Td0[1]
387	LDW	*${TEA}[$Td0[0]],$Td0[0]	; Td0[s0],	t0
388||	LDW	*${TEB}[$Td0[1]],$Td0[1]	; Td0[s1],	t1
389||	ROTL	$Td1[1],TBL1,$Td3[0]		; t2
390||	ROTL	$Td3[0],TBL3,$Td1[1]		; t3
391||	EXTU	$s[2],EXT0,24,$Td0[2]
392||	EXTU	$s[3],EXT0,24,$Td0[3]
393	LDW	*${TEA}[$Td0[2]],$Td0[2]	; Td0[s2],	t2
394||	LDW	*${TEB}[$Td0[3]],$Td0[3]	; Td0[s3],	t3
395||	ROTL	$Td2[2],TBL2,$Td2[2]		; t0
396||	ROTL	$Td2[3],TBL2,$Td2[3]		; t1
397||	XOR	$K[0],$Td1[0],$s[0]
398||	XOR	$K[1],$Td3[1],$s[1]
399	ROTL	$Td1[3],TBL1,$Td3[2]		; t0
400||	ROTL	$Td3[2],TBL3,$Td1[3]		; t1
401||	XOR	$K[2],$Td3[0],$s[2]
402||	XOR	$K[3],$Td1[1],$s[3]
403||	LDW	*$KPA++[2],$K[0]		; next round key
404||	LDW	*$KPB++[2],$K[1]
405	ROTL	$Td2[0],TBL2,$Td2[0]		; t2
406||	ROTL	$Td2[1],TBL2,$Td2[1]		; t3
407||	XOR	$s[0],$Td2[2],$s[0]
408||	XOR	$s[1],$Td2[3],$s[1]
409||	LDW	*$KPA++[2],$K[2]
410||	LDW	*$KPB++[2],$K[3]
411	ROTL	$Td3[3],TBL3,$Td1[2]		; t2
412||	ROTL	$Td1[2],TBL1,$Td3[3]		; t3
413||	XOR	$s[0],$Td3[2],$s[0]
414||	XOR	$s[1],$Td1[3],$s[1]
415	XOR	$s[2],$Td2[0],$s[2]
416||	XOR	$s[3],$Td2[1],$s[3]
417||	XOR	$s[0],$Td0[0],$s[0]
418||	XOR	$s[1],$Td0[1],$s[1]
419	SPKERNEL
420||	XOR.L	$s[2],$Td1[2],$s[2]
421||	XOR.L	$s[3],$Td3[3],$s[3]
422;;====================================================================
423	ADD.D	${TEA},A0,${TEA}		; point to Td4
424||	ADD.D	${TEB},A0,${TEB}
425||	EXTU	$s[1],EXT3,24,$Td3[1]
426||	EXTU	$s[0],EXT1,24,$Td1[0]
427	LDBU	*${TEB}[$Td3[1]],$Td3[1]	; Td3[s1>>24],	t0
428||	LDBU	*${TEA}[$Td1[0]],$Td1[0]	; Td1[s0>>8],	t1
429||	XOR	$s[2],$Td0[2],$s[2]		; modulo-scheduled
430||	XOR	$s[3],$Td0[3],$s[3]		; modulo-scheduled
431||	EXTU	$s[0],EXT0,24,$Td0[0]
432||	EXTU	$s[1],EXT0,24,$Td0[1]
433	LDBU	*${TEA}[$Td0[0]],$Td0[0]	; Td0[s0],	t0
434||	LDBU	*${TEB}[$Td0[1]],$Td0[1]	; Td0[s1],	t1
435||	EXTU	$s[2],EXT2,24,$Td2[2]
436||	EXTU	$s[3],EXT2,24,$Td2[3]
437	LDBU	*${TEA}[$Td2[2]],$Td2[2]	; Td2[s2>>16],	t0
438||	LDBU	*${TEB}[$Td2[3]],$Td2[3]	; Td2[s3>>16],	t1
439||	EXTU	$s[3],EXT1,24,$Td1[3]
440||	EXTU	$s[2],EXT3,24,$Td3[2]
441	LDBU	*${TEB}[$Td1[3]],$Td1[3]	; Td1[s3>>8],	t0
442||	LDBU	*${TEA}[$Td3[2]],$Td3[2]	; Td3[s2>>24],	t1
443||	EXTU	$s[1],EXT1,24,$Td1[1]
444||	EXTU	$s[0],EXT3,24,$Td3[0]
445	LDBU	*${TEB}[$Td1[1]],$Td1[1]	; Td1[s1>>8],	t2
446||	LDBU	*${TEA}[$Td3[0]],$Td3[0]	; Td3[s0>>24],	t3
447||	EXTU	$s[0],EXT2,24,$Td2[0]
448||	EXTU	$s[1],EXT2,24,$Td2[1]
449	LDBU	*${TEA}[$Td2[0]],$Td2[0]	; Td2[s0>>16],	t2
450||	LDBU	*${TEB}[$Td2[1]],$Td2[1]	; Td2[s1>>16],	t3
451||	EXTU	$s[3],EXT3,24,$Td3[3]
452||	EXTU	$s[2],EXT1,24,$Td1[2]
453	LDBU	*${TEB}[$Td3[3]],$Td3[3]	; Td3[s3>>24],	t2
454||	LDBU	*${TEA}[$Td1[2]],$Td1[2]	; Td1[s2>>8],	t3
455||	EXTU	$s[2],EXT0,24,$Td0[2]
456||	EXTU	$s[3],EXT0,24,$Td0[3]
457	LDBU	*${TEA}[$Td0[2]],$Td0[2]	; Td0[s2],	t2
458||	LDBU	*${TEB}[$Td0[3]],$Td0[3]	; Td0[s3],	t3
459
460	.if	.BIG_ENDIAN
461	PACK2	$Td0[0],$Td1[3],$Td0[0]
462||	PACK2	$Td0[1],$Td1[0],$Td0[1]
463	PACK2	$Td2[2],$Td3[1],$Td2[2]
464||	PACK2	$Td2[3],$Td3[2],$Td2[3]
465	PACKL4	$Td0[0],$Td2[2],$Td0[0]
466||	PACKL4	$Td0[1],$Td2[3],$Td0[1]
467	XOR	$K[0],$Td0[0],$Td0[0]		; s[0]
468||	XOR	$K[1],$Td0[1],$Td0[1]		; s[1]
469
470	PACK2	$Td0[2],$Td1[1],$Td0[2]
471||	PACK2	$Td0[3],$Td1[2],$Td0[3]
472	PACK2	$Td2[0],$Td3[3],$Td2[0]
473||	PACK2	$Td2[1],$Td3[0],$Td2[1]
474||	BNOP	RA
475	PACKL4	$Td0[2],$Td2[0],$Td0[2]
476||	PACKL4	$Td0[3],$Td2[1],$Td0[3]
477	XOR	$K[2],$Td0[2],$Td0[2]		; s[2]
478||	XOR	$K[3],$Td0[3],$Td0[3]		; s[3]
479
480	MV	$Td0[0],A9
481||	MV	$Td0[1],A8
482	MV	$Td0[2],B9
483||	MV	$Td0[3],B8
484|| [B2]	STNDW	A9:A8,*OUT++
485   [B2]	STNDW	B9:B8,*OUT++
486	.else
487	PACK2	$Td1[3],$Td0[0],$Td1[3]
488||	PACK2	$Td1[0],$Td0[1],$Td1[0]
489	PACK2	$Td3[1],$Td2[2],$Td3[1]
490||	PACK2	$Td3[2],$Td2[3],$Td3[2]
491	PACKL4	$Td3[1],$Td1[3],$Td1[3]
492||	PACKL4	$Td3[2],$Td1[0],$Td1[0]
493	XOR	$K[0],$Td1[3],$Td1[3]		; s[0]
494||	XOR	$K[1],$Td1[0],$Td1[0]		; s[1]
495
496	PACK2	$Td1[1],$Td0[2],$Td1[1]
497||	PACK2	$Td1[2],$Td0[3],$Td1[2]
498	PACK2	$Td3[3],$Td2[0],$Td3[3]
499||	PACK2	$Td3[0],$Td2[1],$Td3[0]
500||	BNOP	RA
501	PACKL4	$Td3[3],$Td1[1],$Td1[1]
502||	PACKL4	$Td3[0],$Td1[2],$Td1[2]
503	XOR	$K[2],$Td1[1],$Td1[1]		; s[2]
504||	XOR	$K[3],$Td1[2],$Td1[2]		; s[3]
505
506	MV	$Td1[3],A8
507||	MV	$Td1[0],A9
508	MV	$Td1[1],B8
509||	MV	$Td1[2],B9
510|| [B2]	STNDW	A9:A8,*OUT++
511   [B2]	STNDW	B9:B8,*OUT++
512	.endif
513	.endasmfunc
514___
515{
516my @K=(@K,@s);			# extended key
517my @Te4=map("B$_",(16..19));
518
519my @Kx9=@Te0;			# used in AES_set_decrypt_key
520my @KxB=@Te1;
521my @KxD=@Te2;
522my @KxE=@Te3;
523
524$code.=<<___;
525	.asg	OUT,BITS
526
527	.global	_AES_set_encrypt_key
528_AES_set_encrypt_key:
529__set_encrypt_key:
530	.asmfunc
531	MV	INP,A0
532||	SHRU	BITS,5,BITS			; 128-192-256 -> 4-6-8
533||	MV	KEY,A1
534  [!A0]	B	RA
535||[!A0]	MVK	-1,RET
536||[!A0]	MVK	1,A1				; only one B RA
537  [!A1]	B	RA
538||[!A1]	MVK	-1,RET
539||[!A1]	MVK	0,A0
540||	MVK	0,B0
541||	MVK	0,A1
542   [A0]	LDNDW	*INP++,A9:A8
543|| [A0]	CMPEQ	4,BITS,B0
544|| [A0]	CMPLT	3,BITS,A1
545   [B0]	B	key128?
546|| [A1]	LDNDW	*INP++,B9:B8
547|| [A0]	CMPEQ	6,BITS,B0
548|| [A0]	CMPLT	5,BITS,A1
549   [B0]	B	key192?
550|| [A1]	LDNDW	*INP++,B17:B16
551|| [A0]	CMPEQ	8,BITS,B0
552|| [A0]	CMPLT	7,BITS,A1
553   [B0]	B	key256?
554|| [A1]	LDNDW	*INP++,B19:B18
555
556	.if	__TI_EABI__
557   [A0]	ADD	0,KEY,$KPA
558|| [A0]	ADD	4,KEY,$KPB
559|| [A0]	MVKL	\$PCR_OFFSET(AES_Te4,__set_encrypt_key),$TEA
560|| [A0]	ADDKPC	__set_encrypt_key,B6
561   [A0]	MVKH	\$PCR_OFFSET(AES_Te4,__set_encrypt_key),$TEA
562   [A0]	ADD	B6,$TEA,$TEA			; AES_Te4
563	.else
564   [A0]	ADD	0,KEY,$KPA
565|| [A0]	ADD	4,KEY,$KPB
566|| [A0]	MVKL	(AES_Te4-__set_encrypt_key),$TEA
567|| [A0]	ADDKPC	__set_encrypt_key,B6
568   [A0]	MVKH	(AES_Te4-__set_encrypt_key),$TEA
569   [A0]	ADD	B6,$TEA,$TEA			; AES_Te4
570	.endif
571	NOP
572	NOP
573
574	BNOP	RA,5
575||	MVK	-2,RET				; unknown bit length
576||	MVK	0,B0				; redundant
577;;====================================================================
578;;====================================================================
579key128?:
580	.if	.BIG_ENDIAN
581	MV	A9,$K[0]
582||	MV	A8,$K[1]
583||	MV	B9,$Te4[2]
584||	MV	B8,$K[3]
585	.else
586	MV	A8,$K[0]
587||	MV	A9,$K[1]
588||	MV	B8,$Te4[2]
589||	MV	B9,$K[3]
590	.endif
591
592	MVK	256,A0
593||	MVK	9,B0
594
595	SPLOOPD	14
596||	MVC	B0,ILC
597||	MV	$TEA,$TEB
598||	ADD	$TEA,A0,A30			; rcon
599;;====================================================================
600	LDW	*A30++[1],A31			; rcon[i]
601||	MV	$Te4[2],$K[2]
602||	EXTU	$K[3],EXT1,24,$Te4[0]
603	LDBU	*${TEB}[$Te4[0]],$Te4[0]
604||	MV	$K[3],A0
605||	EXTU	$K[3],EXT2,24,$Te4[1]
606	LDBU	*${TEB}[$Te4[1]],$Te4[1]
607||	EXTU	A0,EXT3,24,A0
608||	EXTU	$K[3],EXT0,24,$Te4[3]
609	.if	.BIG_ENDIAN
610	LDBU	*${TEA}[A0],$Te4[3]
611||	LDBU	*${TEB}[$Te4[3]],A0
612	.else
613	LDBU	*${TEA}[A0],A0
614||	LDBU	*${TEB}[$Te4[3]],$Te4[3]
615	.endif
616
617	STW	$K[0],*$KPA++[2]
618||	STW	$K[1],*$KPB++[2]
619	STW	$K[2],*$KPA++[2]
620||	STW	$K[3],*$KPB++[2]
621
622	XOR	A31,$K[0],$K[0]			; ^=rcon[i]
623	.if	.BIG_ENDIAN
624	PACK2	$Te4[0],$Te4[1],$Te4[1]
625	PACK2	$Te4[3],A0,$Te4[3]
626	PACKL4	$Te4[1],$Te4[3],$Te4[3]
627	.else
628	PACK2	$Te4[1],$Te4[0],$Te4[1]
629	PACK2	$Te4[3],A0,$Te4[3]
630	PACKL4	$Te4[3],$Te4[1],$Te4[3]
631	.endif
632	XOR	$Te4[3],$K[0],$Te4[0]		; K[0]
633	XOR	$Te4[0],$K[1],$K[1]		; K[1]
634	MV	$Te4[0],$K[0]
635||	XOR	$K[1],$K[2],$Te4[2]		; K[2]
636	XOR	$Te4[2],$K[3],$K[3]		; K[3]
637	SPKERNEL
638;;====================================================================
639	BNOP	RA
640	MV	$Te4[2],$K[2]
641||	STW	$K[0],*$KPA++[2]
642||	STW	$K[1],*$KPB++[2]
643	STW	$K[2],*$KPA++[2]
644||	STW	$K[3],*$KPB++[2]
645	MVK	10,B0				; rounds
646	STW	B0,*++${KPB}[15]
647	MVK	0,RET
648;;====================================================================
649;;====================================================================
650key192?:
651	.if	.BIG_ENDIAN
652	MV	A9,$K[0]
653||	MV	A8,$K[1]
654||	MV	B9,$K[2]
655||	MV	B8,$K[3]
656	MV	B17,$Te4[2]
657||	MV	B16,$K[5]
658	.else
659	MV	A8,$K[0]
660||	MV	A9,$K[1]
661||	MV	B8,$K[2]
662||	MV	B9,$K[3]
663	MV	B16,$Te4[2]
664||	MV	B17,$K[5]
665	.endif
666
667	MVK	256,A0
668||	MVK	6,B0
669	MV	$TEA,$TEB
670||	ADD	$TEA,A0,A30			; rcon
671;;====================================================================
672loop192?:
673	LDW	*A30++[1],A31			; rcon[i]
674||	MV	$Te4[2],$K[4]
675||	EXTU	$K[5],EXT1,24,$Te4[0]
676	LDBU	*${TEB}[$Te4[0]],$Te4[0]
677||	MV	$K[5],A0
678||	EXTU	$K[5],EXT2,24,$Te4[1]
679	LDBU	*${TEB}[$Te4[1]],$Te4[1]
680||	EXTU	A0,EXT3,24,A0
681||	EXTU	$K[5],EXT0,24,$Te4[3]
682	.if	.BIG_ENDIAN
683	LDBU	*${TEA}[A0],$Te4[3]
684||	LDBU	*${TEB}[$Te4[3]],A0
685	.else
686	LDBU	*${TEA}[A0],A0
687||	LDBU	*${TEB}[$Te4[3]],$Te4[3]
688	.endif
689
690	STW	$K[0],*$KPA++[2]
691||	STW	$K[1],*$KPB++[2]
692	STW	$K[2],*$KPA++[2]
693||	STW	$K[3],*$KPB++[2]
694	STW	$K[4],*$KPA++[2]
695||	STW	$K[5],*$KPB++[2]
696
697	XOR	A31,$K[0],$K[0]			; ^=rcon[i]
698	.if	.BIG_ENDIAN
699	PACK2	$Te4[0],$Te4[1],$Te4[1]
700||	PACK2	$Te4[3],A0,$Te4[3]
701	PACKL4	$Te4[1],$Te4[3],$Te4[3]
702	.else
703	PACK2	$Te4[1],$Te4[0],$Te4[1]
704||	PACK2	$Te4[3],A0,$Te4[3]
705	PACKL4	$Te4[3],$Te4[1],$Te4[3]
706	.endif
707	BDEC	loop192?,B0
708||	XOR	$Te4[3],$K[0],$Te4[0]		; K[0]
709	XOR	$Te4[0],$K[1],$K[1]		; K[1]
710	MV	$Te4[0],$K[0]
711||	XOR	$K[1],$K[2],$Te4[2]		; K[2]
712	XOR	$Te4[2],$K[3],$K[3]		; K[3]
713	MV	$Te4[2],$K[2]
714||	XOR	$K[3],$K[4],$Te4[2]		; K[4]
715	XOR	$Te4[2],$K[5],$K[5]		; K[5]
716;;====================================================================
717	BNOP	RA
718	STW	$K[0],*$KPA++[2]
719||	STW	$K[1],*$KPB++[2]
720	STW	$K[2],*$KPA++[2]
721||	STW	$K[3],*$KPB++[2]
722	MVK	12,B0				; rounds
723	STW	B0,*++${KPB}[7]
724	MVK	0,RET
725;;====================================================================
726;;====================================================================
727key256?:
728	.if	.BIG_ENDIAN
729	MV	A9,$K[0]
730||	MV	A8,$K[1]
731||	MV	B9,$K[2]
732||	MV	B8,$K[3]
733	MV	B17,$K[4]
734||	MV	B16,$K[5]
735||	MV	B19,$Te4[2]
736||	MV	B18,$K[7]
737	.else
738	MV	A8,$K[0]
739||	MV	A9,$K[1]
740||	MV	B8,$K[2]
741||	MV	B9,$K[3]
742	MV	B16,$K[4]
743||	MV	B17,$K[5]
744||	MV	B18,$Te4[2]
745||	MV	B19,$K[7]
746	.endif
747
748	MVK	256,A0
749||	MVK	6,B0
750	MV	$TEA,$TEB
751||	ADD	$TEA,A0,A30			; rcon
752;;====================================================================
753loop256?:
754	LDW	*A30++[1],A31			; rcon[i]
755||	MV	$Te4[2],$K[6]
756||	EXTU	$K[7],EXT1,24,$Te4[0]
757	LDBU	*${TEB}[$Te4[0]],$Te4[0]
758||	MV	$K[7],A0
759||	EXTU	$K[7],EXT2,24,$Te4[1]
760	LDBU	*${TEB}[$Te4[1]],$Te4[1]
761||	EXTU	A0,EXT3,24,A0
762||	EXTU	$K[7],EXT0,24,$Te4[3]
763	.if	.BIG_ENDIAN
764	LDBU	*${TEA}[A0],$Te4[3]
765||	LDBU	*${TEB}[$Te4[3]],A0
766	.else
767	LDBU	*${TEA}[A0],A0
768||	LDBU	*${TEB}[$Te4[3]],$Te4[3]
769	.endif
770
771	STW	$K[0],*$KPA++[2]
772||	STW	$K[1],*$KPB++[2]
773	STW	$K[2],*$KPA++[2]
774||	STW	$K[3],*$KPB++[2]
775	STW	$K[4],*$KPA++[2]
776||	STW	$K[5],*$KPB++[2]
777	STW	$K[6],*$KPA++[2]
778||	STW	$K[7],*$KPB++[2]
779||	XOR	A31,$K[0],$K[0]			; ^=rcon[i]
780	.if	.BIG_ENDIAN
781	PACK2	$Te4[0],$Te4[1],$Te4[1]
782||	PACK2	$Te4[3],A0,$Te4[3]
783	PACKL4	$Te4[1],$Te4[3],$Te4[3]
784||[!B0]	B	done256?
785	.else
786	PACK2	$Te4[1],$Te4[0],$Te4[1]
787||	PACK2	$Te4[3],A0,$Te4[3]
788	PACKL4	$Te4[3],$Te4[1],$Te4[3]
789||[!B0]	B	done256?
790	.endif
791	XOR	$Te4[3],$K[0],$Te4[0]		; K[0]
792	XOR	$Te4[0],$K[1],$K[1]		; K[1]
793	MV	$Te4[0],$K[0]
794||	XOR	$K[1],$K[2],$Te4[2]		; K[2]
795	XOR	$Te4[2],$K[3],$K[3]		; K[3]
796
797	MV	$Te4[2],$K[2]
798|| [B0]	EXTU	$K[3],EXT0,24,$Te4[0]
799|| [B0]	SUB	B0,1,B0
800	LDBU	*${TEB}[$Te4[0]],$Te4[0]
801||	MV	$K[3],A0
802||	EXTU	$K[3],EXT1,24,$Te4[1]
803	LDBU	*${TEB}[$Te4[1]],$Te4[1]
804||	EXTU	A0,EXT2,24,A0
805||	EXTU	$K[3],EXT3,24,$Te4[3]
806
807	.if	.BIG_ENDIAN
808	LDBU	*${TEA}[A0],$Te4[3]
809||	LDBU	*${TEB}[$Te4[3]],A0
810	NOP	3
811	PACK2	$Te4[0],$Te4[1],$Te4[1]
812	PACK2	$Te4[3],A0,$Te4[3]
813||	B	loop256?
814	PACKL4	$Te4[1],$Te4[3],$Te4[3]
815	.else
816	LDBU	*${TEA}[A0],A0
817||	LDBU	*${TEB}[$Te4[3]],$Te4[3]
818	NOP	3
819	PACK2	$Te4[1],$Te4[0],$Te4[1]
820	PACK2	$Te4[3],A0,$Te4[3]
821||	B	loop256?
822	PACKL4	$Te4[3],$Te4[1],$Te4[3]
823	.endif
824
825	XOR	$Te4[3],$K[4],$Te4[0]		; K[4]
826	XOR	$Te4[0],$K[5],$K[5]		; K[5]
827	MV	$Te4[0],$K[4]
828||	XOR	$K[5],$K[6],$Te4[2]		; K[6]
829	XOR	$Te4[2],$K[7],$K[7]		; K[7]
830;;====================================================================
831done256?:
832	BNOP	RA
833	STW	$K[0],*$KPA++[2]
834||	STW	$K[1],*$KPB++[2]
835	STW	$K[2],*$KPA++[2]
836||	STW	$K[3],*$KPB++[2]
837	MVK	14,B0				; rounds
838	STW	B0,*--${KPB}[1]
839	MVK	0,RET
840	.endasmfunc
841
842	.global	_AES_set_decrypt_key
843_AES_set_decrypt_key:
844	.asmfunc
845	B	__set_encrypt_key		; guarantee local call
846	MV	KEY,B30				; B30 is not modified
847	MV	RA, B31				; B31 is not modified
848	ADDKPC	ret?,RA,2
849ret?:						; B0 holds rounds or zero
850  [!B0]	BNOP	B31				; return if zero
851   [B0]	SHL	B0,4,A0				; offset to last round key
852   [B0]	SHRU	B0,1,B1
853   [B0]	SUB	B1,1,B1
854   [B0]	MVK	0x0000001B,B3			; AES polynomial
855   [B0]	MVKH	0x07000000,B3
856
857	SPLOOPD	9				; flip round keys
858||	MVC	B1,ILC
859||	MV	B30,$KPA
860||	ADD	B30,A0,$KPB
861||	MVK	16,A0				; sizeof(round key)
862;;====================================================================
863	LDW	*${KPA}[0],A16
864||	LDW	*${KPB}[0],B16
865	LDW	*${KPA}[1],A17
866||	LDW	*${KPB}[1],B17
867	LDW	*${KPA}[2],A18
868||	LDW	*${KPB}[2],B18
869	LDW	*${KPA}[3],A19
870||	ADD	$KPA,A0,$KPA
871||	LDW	*${KPB}[3],B19
872||	SUB	$KPB,A0,$KPB
873	NOP
874	STW	B16,*${KPA}[-4]
875||	STW	A16,*${KPB}[4]
876	STW	B17,*${KPA}[-3]
877||	STW	A17,*${KPB}[5]
878	STW	B18,*${KPA}[-2]
879||	STW	A18,*${KPB}[6]
880	STW	B19,*${KPA}[-1]
881||	STW	A19,*${KPB}[7]
882	SPKERNEL
883;;====================================================================
884	SUB	B0,1,B0				; skip last round
885||	ADD	B30,A0,$KPA			; skip first round
886||	ADD	B30,A0,$KPB
887||	MVC	GFPGFR,B30			; save GFPGFR
888	LDW	*${KPA}[0],$K[0]
889||	LDW	*${KPB}[1],$K[1]
890||	MVC	B3,GFPGFR
891	LDW	*${KPA}[2],$K[2]
892||	LDW	*${KPB}[3],$K[3]
893	MVK	0x00000909,A24
894||	MVK	0x00000B0B,B24
895	MVKH	0x09090000,A24
896||	MVKH	0x0B0B0000,B24
897	MVC	B0,ILC
898||	SUB	B0,1,B0
899
900	GMPY4	$K[0],A24,$Kx9[0]		; ·0x09
901||	GMPY4	$K[1],A24,$Kx9[1]
902||	MVK	0x00000D0D,A25
903||	MVK	0x00000E0E,B25
904	GMPY4	$K[2],A24,$Kx9[2]
905||	GMPY4	$K[3],A24,$Kx9[3]
906||	MVKH	0x0D0D0000,A25
907||	MVKH	0x0E0E0000,B25
908
909	GMPY4	$K[0],B24,$KxB[0]		; ·0x0B
910||	GMPY4	$K[1],B24,$KxB[1]
911	GMPY4	$K[2],B24,$KxB[2]
912||	GMPY4	$K[3],B24,$KxB[3]
913
914	SPLOOP	11				; InvMixColumns
915;;====================================================================
916	GMPY4	$K[0],A25,$KxD[0]		; ·0x0D
917||	GMPY4	$K[1],A25,$KxD[1]
918||	SWAP2	$Kx9[0],$Kx9[0]			; rotate by 16
919||	SWAP2	$Kx9[1],$Kx9[1]
920||	MV	$K[0],$s[0]			; this or DINT
921||	MV	$K[1],$s[1]
922|| [B0]	LDW	*${KPA}[4],$K[0]
923|| [B0]	LDW	*${KPB}[5],$K[1]
924	GMPY4	$K[2],A25,$KxD[2]
925||	GMPY4	$K[3],A25,$KxD[3]
926||	SWAP2	$Kx9[2],$Kx9[2]
927||	SWAP2	$Kx9[3],$Kx9[3]
928||	MV	$K[2],$s[2]
929||	MV	$K[3],$s[3]
930|| [B0]	LDW	*${KPA}[6],$K[2]
931|| [B0]	LDW	*${KPB}[7],$K[3]
932
933	GMPY4	$s[0],B25,$KxE[0]		; ·0x0E
934||	GMPY4	$s[1],B25,$KxE[1]
935||	XOR	$Kx9[0],$KxB[0],$KxB[0]
936||	XOR	$Kx9[1],$KxB[1],$KxB[1]
937	GMPY4	$s[2],B25,$KxE[2]
938||	GMPY4	$s[3],B25,$KxE[3]
939||	XOR	$Kx9[2],$KxB[2],$KxB[2]
940||	XOR	$Kx9[3],$KxB[3],$KxB[3]
941
942	ROTL	$KxB[0],TBL3,$KxB[0]
943||	ROTL	$KxB[1],TBL3,$KxB[1]
944||	SWAP2	$KxD[0],$KxD[0]			; rotate by 16
945||	SWAP2	$KxD[1],$KxD[1]
946	ROTL	$KxB[2],TBL3,$KxB[2]
947||	ROTL	$KxB[3],TBL3,$KxB[3]
948||	SWAP2	$KxD[2],$KxD[2]
949||	SWAP2	$KxD[3],$KxD[3]
950
951	XOR	$KxE[0],$KxD[0],$KxE[0]
952||	XOR	$KxE[1],$KxD[1],$KxE[1]
953|| [B0]	GMPY4	$K[0],A24,$Kx9[0]		; ·0x09
954|| [B0]	GMPY4	$K[1],A24,$Kx9[1]
955||	ADDAW	$KPA,4,$KPA
956	XOR	$KxE[2],$KxD[2],$KxE[2]
957||	XOR	$KxE[3],$KxD[3],$KxE[3]
958|| [B0]	GMPY4	$K[2],A24,$Kx9[2]
959|| [B0]	GMPY4	$K[3],A24,$Kx9[3]
960||	ADDAW	$KPB,4,$KPB
961
962	XOR	$KxB[0],$KxE[0],$KxE[0]
963||	XOR	$KxB[1],$KxE[1],$KxE[1]
964|| [B0]	GMPY4	$K[0],B24,$KxB[0]		; ·0x0B
965|| [B0]	GMPY4	$K[1],B24,$KxB[1]
966	XOR	$KxB[2],$KxE[2],$KxE[2]
967||	XOR	$KxB[3],$KxE[3],$KxE[3]
968|| [B0]	GMPY4	$K[2],B24,$KxB[2]
969|| [B0]	GMPY4	$K[3],B24,$KxB[3]
970||	STW	$KxE[0],*${KPA}[-4]
971||	STW	$KxE[1],*${KPB}[-3]
972	STW	$KxE[2],*${KPA}[-2]
973||	STW	$KxE[3],*${KPB}[-1]
974|| [B0]	SUB	B0,1,B0
975	SPKERNEL
976;;====================================================================
977	BNOP	B31,3
978	MVC	B30,GFPGFR			; restore GFPGFR(*)
979	MVK	0,RET
980	.endasmfunc
981___
982# (*)	Even though ABI doesn't specify GFPGFR as non-volatile, there
983#	are code samples out there that *assume* its default value.
984}
985{
986my ($inp,$out,$blocks,$key,$ivp)=("A4","B4","A6","B6","A8");
987$code.=<<___;
988	.global	_AES_ctr32_encrypt
989_AES_ctr32_encrypt:
990	.asmfunc
991	LDNDW	*${ivp}[0],A31:A30	; load counter value
992||	MV	$blocks,A2		; reassign $blocks
993||	DMV	RA,$key,B27:B26		; reassign RA and $key
994	LDNDW	*${ivp}[1],B31:B30
995||	MVK	0,B2			; don't let __encrypt load input
996||	MVK	0,A1			; and postpone writing output
997	.if	.BIG_ENDIAN
998	NOP
999	.else
1000	NOP	4
1001	SWAP2	B31,B31			; keep least significant 32 bits
1002	SWAP4	B31,B31			; in host byte order
1003	.endif
1004ctr32_loop?:
1005   [A2]	BNOP	__encrypt
1006|| [A1]	XOR	A29,A9,A9		; input^Ek(counter)
1007|| [A1]	XOR	A28,A8,A8
1008|| [A2]	LDNDW	*INP++,A29:A28		; load input
1009  [!A2]	BNOP	B27			; return
1010|| [A1]	XOR	B29,B9,B9
1011|| [A1]	XOR	B28,B8,B8
1012|| [A2]	LDNDW	*INP++,B29:B28
1013	.if	.BIG_ENDIAN
1014   [A1]	STNDW	A9:A8,*OUT++		; save output
1015|| [A2]	DMV	A31,A30,A9:A8		; pass counter value to __encrypt
1016   [A1]	STNDW	B9:B8,*OUT++
1017|| [A2]	DMV	B31,B30,B9:B8
1018|| [A2]	ADD	B30,1,B30		; counter++
1019	.else
1020   [A1]	STNDW	A9:A8,*OUT++		; save output
1021|| [A2]	DMV	A31,A30,A9:A8
1022|| [A2]	SWAP2	B31,B0
1023|| [A2]	ADD	B31,1,B31		; counter++
1024   [A1]	STNDW	B9:B8,*OUT++
1025|| [A2]	MV	B30,B8
1026|| [A2]	SWAP4	B0,B9
1027	.endif
1028   [A2]	ADDKPC	ctr32_loop?,RA		; return to ctr32_loop?
1029|| [A2]	MV	B26,KEY			; pass $key
1030|| [A2]	SUB	A2,1,A2			; $blocks--
1031||[!A1]	MVK	1,A1
1032	NOP
1033	NOP
1034	.endasmfunc
1035___
1036}
1037# Tables are kept in endian-neutral manner
1038$code.=<<___;
1039	.if	__TI_EABI__
1040	.sect	".text:aes_asm.const"
1041	.else
1042	.sect	".const:aes_asm"
1043	.endif
1044	.align	128
1045AES_Te:
1046	.byte	0xc6,0x63,0x63,0xa5,	0xf8,0x7c,0x7c,0x84
1047	.byte	0xee,0x77,0x77,0x99,	0xf6,0x7b,0x7b,0x8d
1048	.byte	0xff,0xf2,0xf2,0x0d,	0xd6,0x6b,0x6b,0xbd
1049	.byte	0xde,0x6f,0x6f,0xb1,	0x91,0xc5,0xc5,0x54
1050	.byte	0x60,0x30,0x30,0x50,	0x02,0x01,0x01,0x03
1051	.byte	0xce,0x67,0x67,0xa9,	0x56,0x2b,0x2b,0x7d
1052	.byte	0xe7,0xfe,0xfe,0x19,	0xb5,0xd7,0xd7,0x62
1053	.byte	0x4d,0xab,0xab,0xe6,	0xec,0x76,0x76,0x9a
1054	.byte	0x8f,0xca,0xca,0x45,	0x1f,0x82,0x82,0x9d
1055	.byte	0x89,0xc9,0xc9,0x40,	0xfa,0x7d,0x7d,0x87
1056	.byte	0xef,0xfa,0xfa,0x15,	0xb2,0x59,0x59,0xeb
1057	.byte	0x8e,0x47,0x47,0xc9,	0xfb,0xf0,0xf0,0x0b
1058	.byte	0x41,0xad,0xad,0xec,	0xb3,0xd4,0xd4,0x67
1059	.byte	0x5f,0xa2,0xa2,0xfd,	0x45,0xaf,0xaf,0xea
1060	.byte	0x23,0x9c,0x9c,0xbf,	0x53,0xa4,0xa4,0xf7
1061	.byte	0xe4,0x72,0x72,0x96,	0x9b,0xc0,0xc0,0x5b
1062	.byte	0x75,0xb7,0xb7,0xc2,	0xe1,0xfd,0xfd,0x1c
1063	.byte	0x3d,0x93,0x93,0xae,	0x4c,0x26,0x26,0x6a
1064	.byte	0x6c,0x36,0x36,0x5a,	0x7e,0x3f,0x3f,0x41
1065	.byte	0xf5,0xf7,0xf7,0x02,	0x83,0xcc,0xcc,0x4f
1066	.byte	0x68,0x34,0x34,0x5c,	0x51,0xa5,0xa5,0xf4
1067	.byte	0xd1,0xe5,0xe5,0x34,	0xf9,0xf1,0xf1,0x08
1068	.byte	0xe2,0x71,0x71,0x93,	0xab,0xd8,0xd8,0x73
1069	.byte	0x62,0x31,0x31,0x53,	0x2a,0x15,0x15,0x3f
1070	.byte	0x08,0x04,0x04,0x0c,	0x95,0xc7,0xc7,0x52
1071	.byte	0x46,0x23,0x23,0x65,	0x9d,0xc3,0xc3,0x5e
1072	.byte	0x30,0x18,0x18,0x28,	0x37,0x96,0x96,0xa1
1073	.byte	0x0a,0x05,0x05,0x0f,	0x2f,0x9a,0x9a,0xb5
1074	.byte	0x0e,0x07,0x07,0x09,	0x24,0x12,0x12,0x36
1075	.byte	0x1b,0x80,0x80,0x9b,	0xdf,0xe2,0xe2,0x3d
1076	.byte	0xcd,0xeb,0xeb,0x26,	0x4e,0x27,0x27,0x69
1077	.byte	0x7f,0xb2,0xb2,0xcd,	0xea,0x75,0x75,0x9f
1078	.byte	0x12,0x09,0x09,0x1b,	0x1d,0x83,0x83,0x9e
1079	.byte	0x58,0x2c,0x2c,0x74,	0x34,0x1a,0x1a,0x2e
1080	.byte	0x36,0x1b,0x1b,0x2d,	0xdc,0x6e,0x6e,0xb2
1081	.byte	0xb4,0x5a,0x5a,0xee,	0x5b,0xa0,0xa0,0xfb
1082	.byte	0xa4,0x52,0x52,0xf6,	0x76,0x3b,0x3b,0x4d
1083	.byte	0xb7,0xd6,0xd6,0x61,	0x7d,0xb3,0xb3,0xce
1084	.byte	0x52,0x29,0x29,0x7b,	0xdd,0xe3,0xe3,0x3e
1085	.byte	0x5e,0x2f,0x2f,0x71,	0x13,0x84,0x84,0x97
1086	.byte	0xa6,0x53,0x53,0xf5,	0xb9,0xd1,0xd1,0x68
1087	.byte	0x00,0x00,0x00,0x00,	0xc1,0xed,0xed,0x2c
1088	.byte	0x40,0x20,0x20,0x60,	0xe3,0xfc,0xfc,0x1f
1089	.byte	0x79,0xb1,0xb1,0xc8,	0xb6,0x5b,0x5b,0xed
1090	.byte	0xd4,0x6a,0x6a,0xbe,	0x8d,0xcb,0xcb,0x46
1091	.byte	0x67,0xbe,0xbe,0xd9,	0x72,0x39,0x39,0x4b
1092	.byte	0x94,0x4a,0x4a,0xde,	0x98,0x4c,0x4c,0xd4
1093	.byte	0xb0,0x58,0x58,0xe8,	0x85,0xcf,0xcf,0x4a
1094	.byte	0xbb,0xd0,0xd0,0x6b,	0xc5,0xef,0xef,0x2a
1095	.byte	0x4f,0xaa,0xaa,0xe5,	0xed,0xfb,0xfb,0x16
1096	.byte	0x86,0x43,0x43,0xc5,	0x9a,0x4d,0x4d,0xd7
1097	.byte	0x66,0x33,0x33,0x55,	0x11,0x85,0x85,0x94
1098	.byte	0x8a,0x45,0x45,0xcf,	0xe9,0xf9,0xf9,0x10
1099	.byte	0x04,0x02,0x02,0x06,	0xfe,0x7f,0x7f,0x81
1100	.byte	0xa0,0x50,0x50,0xf0,	0x78,0x3c,0x3c,0x44
1101	.byte	0x25,0x9f,0x9f,0xba,	0x4b,0xa8,0xa8,0xe3
1102	.byte	0xa2,0x51,0x51,0xf3,	0x5d,0xa3,0xa3,0xfe
1103	.byte	0x80,0x40,0x40,0xc0,	0x05,0x8f,0x8f,0x8a
1104	.byte	0x3f,0x92,0x92,0xad,	0x21,0x9d,0x9d,0xbc
1105	.byte	0x70,0x38,0x38,0x48,	0xf1,0xf5,0xf5,0x04
1106	.byte	0x63,0xbc,0xbc,0xdf,	0x77,0xb6,0xb6,0xc1
1107	.byte	0xaf,0xda,0xda,0x75,	0x42,0x21,0x21,0x63
1108	.byte	0x20,0x10,0x10,0x30,	0xe5,0xff,0xff,0x1a
1109	.byte	0xfd,0xf3,0xf3,0x0e,	0xbf,0xd2,0xd2,0x6d
1110	.byte	0x81,0xcd,0xcd,0x4c,	0x18,0x0c,0x0c,0x14
1111	.byte	0x26,0x13,0x13,0x35,	0xc3,0xec,0xec,0x2f
1112	.byte	0xbe,0x5f,0x5f,0xe1,	0x35,0x97,0x97,0xa2
1113	.byte	0x88,0x44,0x44,0xcc,	0x2e,0x17,0x17,0x39
1114	.byte	0x93,0xc4,0xc4,0x57,	0x55,0xa7,0xa7,0xf2
1115	.byte	0xfc,0x7e,0x7e,0x82,	0x7a,0x3d,0x3d,0x47
1116	.byte	0xc8,0x64,0x64,0xac,	0xba,0x5d,0x5d,0xe7
1117	.byte	0x32,0x19,0x19,0x2b,	0xe6,0x73,0x73,0x95
1118	.byte	0xc0,0x60,0x60,0xa0,	0x19,0x81,0x81,0x98
1119	.byte	0x9e,0x4f,0x4f,0xd1,	0xa3,0xdc,0xdc,0x7f
1120	.byte	0x44,0x22,0x22,0x66,	0x54,0x2a,0x2a,0x7e
1121	.byte	0x3b,0x90,0x90,0xab,	0x0b,0x88,0x88,0x83
1122	.byte	0x8c,0x46,0x46,0xca,	0xc7,0xee,0xee,0x29
1123	.byte	0x6b,0xb8,0xb8,0xd3,	0x28,0x14,0x14,0x3c
1124	.byte	0xa7,0xde,0xde,0x79,	0xbc,0x5e,0x5e,0xe2
1125	.byte	0x16,0x0b,0x0b,0x1d,	0xad,0xdb,0xdb,0x76
1126	.byte	0xdb,0xe0,0xe0,0x3b,	0x64,0x32,0x32,0x56
1127	.byte	0x74,0x3a,0x3a,0x4e,	0x14,0x0a,0x0a,0x1e
1128	.byte	0x92,0x49,0x49,0xdb,	0x0c,0x06,0x06,0x0a
1129	.byte	0x48,0x24,0x24,0x6c,	0xb8,0x5c,0x5c,0xe4
1130	.byte	0x9f,0xc2,0xc2,0x5d,	0xbd,0xd3,0xd3,0x6e
1131	.byte	0x43,0xac,0xac,0xef,	0xc4,0x62,0x62,0xa6
1132	.byte	0x39,0x91,0x91,0xa8,	0x31,0x95,0x95,0xa4
1133	.byte	0xd3,0xe4,0xe4,0x37,	0xf2,0x79,0x79,0x8b
1134	.byte	0xd5,0xe7,0xe7,0x32,	0x8b,0xc8,0xc8,0x43
1135	.byte	0x6e,0x37,0x37,0x59,	0xda,0x6d,0x6d,0xb7
1136	.byte	0x01,0x8d,0x8d,0x8c,	0xb1,0xd5,0xd5,0x64
1137	.byte	0x9c,0x4e,0x4e,0xd2,	0x49,0xa9,0xa9,0xe0
1138	.byte	0xd8,0x6c,0x6c,0xb4,	0xac,0x56,0x56,0xfa
1139	.byte	0xf3,0xf4,0xf4,0x07,	0xcf,0xea,0xea,0x25
1140	.byte	0xca,0x65,0x65,0xaf,	0xf4,0x7a,0x7a,0x8e
1141	.byte	0x47,0xae,0xae,0xe9,	0x10,0x08,0x08,0x18
1142	.byte	0x6f,0xba,0xba,0xd5,	0xf0,0x78,0x78,0x88
1143	.byte	0x4a,0x25,0x25,0x6f,	0x5c,0x2e,0x2e,0x72
1144	.byte	0x38,0x1c,0x1c,0x24,	0x57,0xa6,0xa6,0xf1
1145	.byte	0x73,0xb4,0xb4,0xc7,	0x97,0xc6,0xc6,0x51
1146	.byte	0xcb,0xe8,0xe8,0x23,	0xa1,0xdd,0xdd,0x7c
1147	.byte	0xe8,0x74,0x74,0x9c,	0x3e,0x1f,0x1f,0x21
1148	.byte	0x96,0x4b,0x4b,0xdd,	0x61,0xbd,0xbd,0xdc
1149	.byte	0x0d,0x8b,0x8b,0x86,	0x0f,0x8a,0x8a,0x85
1150	.byte	0xe0,0x70,0x70,0x90,	0x7c,0x3e,0x3e,0x42
1151	.byte	0x71,0xb5,0xb5,0xc4,	0xcc,0x66,0x66,0xaa
1152	.byte	0x90,0x48,0x48,0xd8,	0x06,0x03,0x03,0x05
1153	.byte	0xf7,0xf6,0xf6,0x01,	0x1c,0x0e,0x0e,0x12
1154	.byte	0xc2,0x61,0x61,0xa3,	0x6a,0x35,0x35,0x5f
1155	.byte	0xae,0x57,0x57,0xf9,	0x69,0xb9,0xb9,0xd0
1156	.byte	0x17,0x86,0x86,0x91,	0x99,0xc1,0xc1,0x58
1157	.byte	0x3a,0x1d,0x1d,0x27,	0x27,0x9e,0x9e,0xb9
1158	.byte	0xd9,0xe1,0xe1,0x38,	0xeb,0xf8,0xf8,0x13
1159	.byte	0x2b,0x98,0x98,0xb3,	0x22,0x11,0x11,0x33
1160	.byte	0xd2,0x69,0x69,0xbb,	0xa9,0xd9,0xd9,0x70
1161	.byte	0x07,0x8e,0x8e,0x89,	0x33,0x94,0x94,0xa7
1162	.byte	0x2d,0x9b,0x9b,0xb6,	0x3c,0x1e,0x1e,0x22
1163	.byte	0x15,0x87,0x87,0x92,	0xc9,0xe9,0xe9,0x20
1164	.byte	0x87,0xce,0xce,0x49,	0xaa,0x55,0x55,0xff
1165	.byte	0x50,0x28,0x28,0x78,	0xa5,0xdf,0xdf,0x7a
1166	.byte	0x03,0x8c,0x8c,0x8f,	0x59,0xa1,0xa1,0xf8
1167	.byte	0x09,0x89,0x89,0x80,	0x1a,0x0d,0x0d,0x17
1168	.byte	0x65,0xbf,0xbf,0xda,	0xd7,0xe6,0xe6,0x31
1169	.byte	0x84,0x42,0x42,0xc6,	0xd0,0x68,0x68,0xb8
1170	.byte	0x82,0x41,0x41,0xc3,	0x29,0x99,0x99,0xb0
1171	.byte	0x5a,0x2d,0x2d,0x77,	0x1e,0x0f,0x0f,0x11
1172	.byte	0x7b,0xb0,0xb0,0xcb,	0xa8,0x54,0x54,0xfc
1173	.byte	0x6d,0xbb,0xbb,0xd6,	0x2c,0x16,0x16,0x3a
1174AES_Te4:
1175	.byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
1176	.byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
1177	.byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
1178	.byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
1179	.byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
1180	.byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
1181	.byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
1182	.byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
1183	.byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
1184	.byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
1185	.byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
1186	.byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
1187	.byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
1188	.byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
1189	.byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
1190	.byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
1191	.byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
1192	.byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
1193	.byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
1194	.byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
1195	.byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
1196	.byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
1197	.byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
1198	.byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
1199	.byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
1200	.byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
1201	.byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
1202	.byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
1203	.byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
1204	.byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
1205	.byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
1206	.byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
1207rcon:
1208	.byte	0x01,0x00,0x00,0x00,	0x02,0x00,0x00,0x00
1209	.byte	0x04,0x00,0x00,0x00,	0x08,0x00,0x00,0x00
1210	.byte	0x10,0x00,0x00,0x00,	0x20,0x00,0x00,0x00
1211	.byte	0x40,0x00,0x00,0x00,	0x80,0x00,0x00,0x00
1212	.byte	0x1B,0x00,0x00,0x00,	0x36,0x00,0x00,0x00
1213	.align	128
1214AES_Td:
1215	.byte	0x51,0xf4,0xa7,0x50,	0x7e,0x41,0x65,0x53
1216	.byte	0x1a,0x17,0xa4,0xc3,	0x3a,0x27,0x5e,0x96
1217	.byte	0x3b,0xab,0x6b,0xcb,	0x1f,0x9d,0x45,0xf1
1218	.byte	0xac,0xfa,0x58,0xab,	0x4b,0xe3,0x03,0x93
1219	.byte	0x20,0x30,0xfa,0x55,	0xad,0x76,0x6d,0xf6
1220	.byte	0x88,0xcc,0x76,0x91,	0xf5,0x02,0x4c,0x25
1221	.byte	0x4f,0xe5,0xd7,0xfc,	0xc5,0x2a,0xcb,0xd7
1222	.byte	0x26,0x35,0x44,0x80,	0xb5,0x62,0xa3,0x8f
1223	.byte	0xde,0xb1,0x5a,0x49,	0x25,0xba,0x1b,0x67
1224	.byte	0x45,0xea,0x0e,0x98,	0x5d,0xfe,0xc0,0xe1
1225	.byte	0xc3,0x2f,0x75,0x02,	0x81,0x4c,0xf0,0x12
1226	.byte	0x8d,0x46,0x97,0xa3,	0x6b,0xd3,0xf9,0xc6
1227	.byte	0x03,0x8f,0x5f,0xe7,	0x15,0x92,0x9c,0x95
1228	.byte	0xbf,0x6d,0x7a,0xeb,	0x95,0x52,0x59,0xda
1229	.byte	0xd4,0xbe,0x83,0x2d,	0x58,0x74,0x21,0xd3
1230	.byte	0x49,0xe0,0x69,0x29,	0x8e,0xc9,0xc8,0x44
1231	.byte	0x75,0xc2,0x89,0x6a,	0xf4,0x8e,0x79,0x78
1232	.byte	0x99,0x58,0x3e,0x6b,	0x27,0xb9,0x71,0xdd
1233	.byte	0xbe,0xe1,0x4f,0xb6,	0xf0,0x88,0xad,0x17
1234	.byte	0xc9,0x20,0xac,0x66,	0x7d,0xce,0x3a,0xb4
1235	.byte	0x63,0xdf,0x4a,0x18,	0xe5,0x1a,0x31,0x82
1236	.byte	0x97,0x51,0x33,0x60,	0x62,0x53,0x7f,0x45
1237	.byte	0xb1,0x64,0x77,0xe0,	0xbb,0x6b,0xae,0x84
1238	.byte	0xfe,0x81,0xa0,0x1c,	0xf9,0x08,0x2b,0x94
1239	.byte	0x70,0x48,0x68,0x58,	0x8f,0x45,0xfd,0x19
1240	.byte	0x94,0xde,0x6c,0x87,	0x52,0x7b,0xf8,0xb7
1241	.byte	0xab,0x73,0xd3,0x23,	0x72,0x4b,0x02,0xe2
1242	.byte	0xe3,0x1f,0x8f,0x57,	0x66,0x55,0xab,0x2a
1243	.byte	0xb2,0xeb,0x28,0x07,	0x2f,0xb5,0xc2,0x03
1244	.byte	0x86,0xc5,0x7b,0x9a,	0xd3,0x37,0x08,0xa5
1245	.byte	0x30,0x28,0x87,0xf2,	0x23,0xbf,0xa5,0xb2
1246	.byte	0x02,0x03,0x6a,0xba,	0xed,0x16,0x82,0x5c
1247	.byte	0x8a,0xcf,0x1c,0x2b,	0xa7,0x79,0xb4,0x92
1248	.byte	0xf3,0x07,0xf2,0xf0,	0x4e,0x69,0xe2,0xa1
1249	.byte	0x65,0xda,0xf4,0xcd,	0x06,0x05,0xbe,0xd5
1250	.byte	0xd1,0x34,0x62,0x1f,	0xc4,0xa6,0xfe,0x8a
1251	.byte	0x34,0x2e,0x53,0x9d,	0xa2,0xf3,0x55,0xa0
1252	.byte	0x05,0x8a,0xe1,0x32,	0xa4,0xf6,0xeb,0x75
1253	.byte	0x0b,0x83,0xec,0x39,	0x40,0x60,0xef,0xaa
1254	.byte	0x5e,0x71,0x9f,0x06,	0xbd,0x6e,0x10,0x51
1255	.byte	0x3e,0x21,0x8a,0xf9,	0x96,0xdd,0x06,0x3d
1256	.byte	0xdd,0x3e,0x05,0xae,	0x4d,0xe6,0xbd,0x46
1257	.byte	0x91,0x54,0x8d,0xb5,	0x71,0xc4,0x5d,0x05
1258	.byte	0x04,0x06,0xd4,0x6f,	0x60,0x50,0x15,0xff
1259	.byte	0x19,0x98,0xfb,0x24,	0xd6,0xbd,0xe9,0x97
1260	.byte	0x89,0x40,0x43,0xcc,	0x67,0xd9,0x9e,0x77
1261	.byte	0xb0,0xe8,0x42,0xbd,	0x07,0x89,0x8b,0x88
1262	.byte	0xe7,0x19,0x5b,0x38,	0x79,0xc8,0xee,0xdb
1263	.byte	0xa1,0x7c,0x0a,0x47,	0x7c,0x42,0x0f,0xe9
1264	.byte	0xf8,0x84,0x1e,0xc9,	0x00,0x00,0x00,0x00
1265	.byte	0x09,0x80,0x86,0x83,	0x32,0x2b,0xed,0x48
1266	.byte	0x1e,0x11,0x70,0xac,	0x6c,0x5a,0x72,0x4e
1267	.byte	0xfd,0x0e,0xff,0xfb,	0x0f,0x85,0x38,0x56
1268	.byte	0x3d,0xae,0xd5,0x1e,	0x36,0x2d,0x39,0x27
1269	.byte	0x0a,0x0f,0xd9,0x64,	0x68,0x5c,0xa6,0x21
1270	.byte	0x9b,0x5b,0x54,0xd1,	0x24,0x36,0x2e,0x3a
1271	.byte	0x0c,0x0a,0x67,0xb1,	0x93,0x57,0xe7,0x0f
1272	.byte	0xb4,0xee,0x96,0xd2,	0x1b,0x9b,0x91,0x9e
1273	.byte	0x80,0xc0,0xc5,0x4f,	0x61,0xdc,0x20,0xa2
1274	.byte	0x5a,0x77,0x4b,0x69,	0x1c,0x12,0x1a,0x16
1275	.byte	0xe2,0x93,0xba,0x0a,	0xc0,0xa0,0x2a,0xe5
1276	.byte	0x3c,0x22,0xe0,0x43,	0x12,0x1b,0x17,0x1d
1277	.byte	0x0e,0x09,0x0d,0x0b,	0xf2,0x8b,0xc7,0xad
1278	.byte	0x2d,0xb6,0xa8,0xb9,	0x14,0x1e,0xa9,0xc8
1279	.byte	0x57,0xf1,0x19,0x85,	0xaf,0x75,0x07,0x4c
1280	.byte	0xee,0x99,0xdd,0xbb,	0xa3,0x7f,0x60,0xfd
1281	.byte	0xf7,0x01,0x26,0x9f,	0x5c,0x72,0xf5,0xbc
1282	.byte	0x44,0x66,0x3b,0xc5,	0x5b,0xfb,0x7e,0x34
1283	.byte	0x8b,0x43,0x29,0x76,	0xcb,0x23,0xc6,0xdc
1284	.byte	0xb6,0xed,0xfc,0x68,	0xb8,0xe4,0xf1,0x63
1285	.byte	0xd7,0x31,0xdc,0xca,	0x42,0x63,0x85,0x10
1286	.byte	0x13,0x97,0x22,0x40,	0x84,0xc6,0x11,0x20
1287	.byte	0x85,0x4a,0x24,0x7d,	0xd2,0xbb,0x3d,0xf8
1288	.byte	0xae,0xf9,0x32,0x11,	0xc7,0x29,0xa1,0x6d
1289	.byte	0x1d,0x9e,0x2f,0x4b,	0xdc,0xb2,0x30,0xf3
1290	.byte	0x0d,0x86,0x52,0xec,	0x77,0xc1,0xe3,0xd0
1291	.byte	0x2b,0xb3,0x16,0x6c,	0xa9,0x70,0xb9,0x99
1292	.byte	0x11,0x94,0x48,0xfa,	0x47,0xe9,0x64,0x22
1293	.byte	0xa8,0xfc,0x8c,0xc4,	0xa0,0xf0,0x3f,0x1a
1294	.byte	0x56,0x7d,0x2c,0xd8,	0x22,0x33,0x90,0xef
1295	.byte	0x87,0x49,0x4e,0xc7,	0xd9,0x38,0xd1,0xc1
1296	.byte	0x8c,0xca,0xa2,0xfe,	0x98,0xd4,0x0b,0x36
1297	.byte	0xa6,0xf5,0x81,0xcf,	0xa5,0x7a,0xde,0x28
1298	.byte	0xda,0xb7,0x8e,0x26,	0x3f,0xad,0xbf,0xa4
1299	.byte	0x2c,0x3a,0x9d,0xe4,	0x50,0x78,0x92,0x0d
1300	.byte	0x6a,0x5f,0xcc,0x9b,	0x54,0x7e,0x46,0x62
1301	.byte	0xf6,0x8d,0x13,0xc2,	0x90,0xd8,0xb8,0xe8
1302	.byte	0x2e,0x39,0xf7,0x5e,	0x82,0xc3,0xaf,0xf5
1303	.byte	0x9f,0x5d,0x80,0xbe,	0x69,0xd0,0x93,0x7c
1304	.byte	0x6f,0xd5,0x2d,0xa9,	0xcf,0x25,0x12,0xb3
1305	.byte	0xc8,0xac,0x99,0x3b,	0x10,0x18,0x7d,0xa7
1306	.byte	0xe8,0x9c,0x63,0x6e,	0xdb,0x3b,0xbb,0x7b
1307	.byte	0xcd,0x26,0x78,0x09,	0x6e,0x59,0x18,0xf4
1308	.byte	0xec,0x9a,0xb7,0x01,	0x83,0x4f,0x9a,0xa8
1309	.byte	0xe6,0x95,0x6e,0x65,	0xaa,0xff,0xe6,0x7e
1310	.byte	0x21,0xbc,0xcf,0x08,	0xef,0x15,0xe8,0xe6
1311	.byte	0xba,0xe7,0x9b,0xd9,	0x4a,0x6f,0x36,0xce
1312	.byte	0xea,0x9f,0x09,0xd4,	0x29,0xb0,0x7c,0xd6
1313	.byte	0x31,0xa4,0xb2,0xaf,	0x2a,0x3f,0x23,0x31
1314	.byte	0xc6,0xa5,0x94,0x30,	0x35,0xa2,0x66,0xc0
1315	.byte	0x74,0x4e,0xbc,0x37,	0xfc,0x82,0xca,0xa6
1316	.byte	0xe0,0x90,0xd0,0xb0,	0x33,0xa7,0xd8,0x15
1317	.byte	0xf1,0x04,0x98,0x4a,	0x41,0xec,0xda,0xf7
1318	.byte	0x7f,0xcd,0x50,0x0e,	0x17,0x91,0xf6,0x2f
1319	.byte	0x76,0x4d,0xd6,0x8d,	0x43,0xef,0xb0,0x4d
1320	.byte	0xcc,0xaa,0x4d,0x54,	0xe4,0x96,0x04,0xdf
1321	.byte	0x9e,0xd1,0xb5,0xe3,	0x4c,0x6a,0x88,0x1b
1322	.byte	0xc1,0x2c,0x1f,0xb8,	0x46,0x65,0x51,0x7f
1323	.byte	0x9d,0x5e,0xea,0x04,	0x01,0x8c,0x35,0x5d
1324	.byte	0xfa,0x87,0x74,0x73,	0xfb,0x0b,0x41,0x2e
1325	.byte	0xb3,0x67,0x1d,0x5a,	0x92,0xdb,0xd2,0x52
1326	.byte	0xe9,0x10,0x56,0x33,	0x6d,0xd6,0x47,0x13
1327	.byte	0x9a,0xd7,0x61,0x8c,	0x37,0xa1,0x0c,0x7a
1328	.byte	0x59,0xf8,0x14,0x8e,	0xeb,0x13,0x3c,0x89
1329	.byte	0xce,0xa9,0x27,0xee,	0xb7,0x61,0xc9,0x35
1330	.byte	0xe1,0x1c,0xe5,0xed,	0x7a,0x47,0xb1,0x3c
1331	.byte	0x9c,0xd2,0xdf,0x59,	0x55,0xf2,0x73,0x3f
1332	.byte	0x18,0x14,0xce,0x79,	0x73,0xc7,0x37,0xbf
1333	.byte	0x53,0xf7,0xcd,0xea,	0x5f,0xfd,0xaa,0x5b
1334	.byte	0xdf,0x3d,0x6f,0x14,	0x78,0x44,0xdb,0x86
1335	.byte	0xca,0xaf,0xf3,0x81,	0xb9,0x68,0xc4,0x3e
1336	.byte	0x38,0x24,0x34,0x2c,	0xc2,0xa3,0x40,0x5f
1337	.byte	0x16,0x1d,0xc3,0x72,	0xbc,0xe2,0x25,0x0c
1338	.byte	0x28,0x3c,0x49,0x8b,	0xff,0x0d,0x95,0x41
1339	.byte	0x39,0xa8,0x01,0x71,	0x08,0x0c,0xb3,0xde
1340	.byte	0xd8,0xb4,0xe4,0x9c,	0x64,0x56,0xc1,0x90
1341	.byte	0x7b,0xcb,0x84,0x61,	0xd5,0x32,0xb6,0x70
1342	.byte	0x48,0x6c,0x5c,0x74,	0xd0,0xb8,0x57,0x42
1343AES_Td4:
1344	.byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
1345	.byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
1346	.byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
1347	.byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
1348	.byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
1349	.byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
1350	.byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
1351	.byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
1352	.byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
1353	.byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
1354	.byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
1355	.byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
1356	.byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
1357	.byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
1358	.byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
1359	.byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
1360	.byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
1361	.byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
1362	.byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
1363	.byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
1364	.byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
1365	.byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
1366	.byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
1367	.byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
1368	.byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
1369	.byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
1370	.byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
1371	.byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
1372	.byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
1373	.byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
1374	.byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
1375	.byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
1376	.cstring "AES for C64x+, CRYPTOGAMS by <appro\@openssl.org>"
1377	.align	4
1378___
1379
1380print $code;
1381close STDOUT or die "error closing STDOUT: $!";
1382