xref: /openssl/crypto/aes/asm/aes-mips.pl (revision 33388b44)
1#! /usr/bin/env perl
2# Copyright 2010-2020 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the Apache License 2.0 (the "License").  You may not use
5# this file except in compliance with the License.  You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9
10# ====================================================================
11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12# project. The module is, however, dual licensed under OpenSSL and
13# CRYPTOGAMS licenses depending on where you obtain it. For further
14# details see http://www.openssl.org/~appro/cryptogams/.
15# ====================================================================
16
17# AES for MIPS
18
19# October 2010
20#
21# Code uses 1K[+256B] S-box and on single-issue core [such as R5000]
22# spends ~68 cycles per byte processed with 128-bit key. This is ~16%
23# faster than gcc-generated code, which is not very impressive. But
24# recall that compressed S-box requires extra processing, namely
25# additional rotations. Rotations are implemented with lwl/lwr pairs,
26# which is normally used for loading unaligned data. Another cool
27# thing about this module is its endian neutrality, which means that
28# it processes data without ever changing byte order...
29
30# September 2012
31#
32# Add MIPS32R2 (~10% less instructions) and SmartMIPS ASE (further
33# ~25% less instructions) code. Note that there is no run-time switch,
34# instead, code path is chosen upon pre-process time, pass -mips32r2
35# or/and -msmartmips.
36
37# February 2019
38#
39# Normalize MIPS32R2 AES table address calculation by always using EXT
40# instruction. This reduces the standard codebase by another 10%.
41
42######################################################################
43# There is a number of MIPS ABI in use, O32 and N32/64 are most
44# widely used. Then there is a new contender: NUBI. It appears that if
45# one picks the latter, it's possible to arrange code in ABI neutral
46# manner. Therefore let's stick to NUBI register layout:
47#
48($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
49($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
50($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
51($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
52#
53# The return value is placed in $a0. Following coding rules facilitate
54# interoperability:
55#
56# - never ever touch $tp, "thread pointer", former $gp;
57# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
58#   old code];
59# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
60#
61# For reference here is register layout for N32/64 MIPS ABIs:
62#
63# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
64# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
65# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
66# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
67# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
68
69# $output is the last argument if it looks like a file (it has an extension)
70# $flavour is the first argument if it doesn't look like a file
71$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
72$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
73$flavour ||= "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
74
75if ($flavour =~ /64|n32/i) {
76	$PTR_LA="dla";
77	$PTR_ADD="daddu";	# incidentally works even on n32
78	$PTR_SUB="dsubu";	# incidentally works even on n32
79	$PTR_INS="dins";
80	$REG_S="sd";
81	$REG_L="ld";
82	$PTR_SLL="dsll";	# incidentally works even on n32
83	$SZREG=8;
84} else {
85	$PTR_LA="la";
86	$PTR_ADD="addu";
87	$PTR_SUB="subu";
88	$PTR_INS="ins";
89	$REG_S="sw";
90	$REG_L="lw";
91	$PTR_SLL="sll";
92	$SZREG=4;
93}
94$pf = ($flavour =~ /nubi/i) ? $t0 : $t2;
95#
96# <appro@openssl.org>
97#
98######################################################################
99
100$big_endian=(`echo MIPSEB | $ENV{CC} -E -`=~/MIPSEB/)?0:1 if ($ENV{CC});
101
102if (!defined($big_endian))
103{    $big_endian=(unpack('L',pack('N',1))==1);   }
104
105my ($MSB,$LSB)=(0,3);	# automatically converted to little-endian
106
107$output and open STDOUT,">$output";
108
109$code.=<<___;
110#include "mips_arch.h"
111
112.text
113#if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__))
114.option	pic2
115#endif
116.set	noat
117___
118
119{{{
120my $FRAMESIZE=16*$SZREG;
121my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0xc0fff008" : "0xc0ff0000";
122
123my ($inp,$out,$key,$Tbl,$s0,$s1,$s2,$s3)=($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7);
124my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
125my ($t0,$t1,$t2,$t3,$t4,$t5,$t6,$t7,$t8,$t9,$t10,$t11) = map("\$$_",(12..23));
126my ($key0,$cnt)=($gp,$fp);
127
128# instruction ordering is "stolen" from output from MIPSpro assembler
129# invoked with -mips3 -O3 arguments...
130$code.=<<___;
131.align	5
132.ent	_mips_AES_encrypt
133_mips_AES_encrypt:
134	.frame	$sp,0,$ra
135	.set	reorder
136	lw	$t0,0($key)
137	lw	$t1,4($key)
138	lw	$t2,8($key)
139	lw	$t3,12($key)
140	lw	$cnt,240($key)
141	$PTR_ADD $key0,$key,16
142
143	xor	$s0,$t0
144	xor	$s1,$t1
145	xor	$s2,$t2
146	xor	$s3,$t3
147
148	subu	$cnt,1
149#if defined(__mips_smartmips)
150	ext	$i0,$s1,16,8
151.Loop_enc:
152	ext	$i1,$s2,16,8
153	ext	$i2,$s3,16,8
154	ext	$i3,$s0,16,8
155	lwxs	$t0,$i0($Tbl)		# Te1[s1>>16]
156	ext	$i0,$s2,8,8
157	lwxs	$t1,$i1($Tbl)		# Te1[s2>>16]
158	ext	$i1,$s3,8,8
159	lwxs	$t2,$i2($Tbl)		# Te1[s3>>16]
160	ext	$i2,$s0,8,8
161	lwxs	$t3,$i3($Tbl)		# Te1[s0>>16]
162	ext	$i3,$s1,8,8
163
164	lwxs	$t4,$i0($Tbl)		# Te2[s2>>8]
165	ext	$i0,$s3,0,8
166	lwxs	$t5,$i1($Tbl)		# Te2[s3>>8]
167	ext	$i1,$s0,0,8
168	lwxs	$t6,$i2($Tbl)		# Te2[s0>>8]
169	ext	$i2,$s1,0,8
170	lwxs	$t7,$i3($Tbl)		# Te2[s1>>8]
171	ext	$i3,$s2,0,8
172
173	lwxs	$t8,$i0($Tbl)		# Te3[s3]
174	ext	$i0,$s0,24,8
175	lwxs	$t9,$i1($Tbl)		# Te3[s0]
176	ext	$i1,$s1,24,8
177	lwxs	$t10,$i2($Tbl)		# Te3[s1]
178	ext	$i2,$s2,24,8
179	lwxs	$t11,$i3($Tbl)		# Te3[s2]
180	ext	$i3,$s3,24,8
181
182	rotr	$t0,$t0,8
183	rotr	$t1,$t1,8
184	rotr	$t2,$t2,8
185	rotr	$t3,$t3,8
186
187	rotr	$t4,$t4,16
188	rotr	$t5,$t5,16
189	rotr	$t6,$t6,16
190	rotr	$t7,$t7,16
191
192	xor	$t0,$t4
193	lwxs	$t4,$i0($Tbl)		# Te0[s0>>24]
194	xor	$t1,$t5
195	lwxs	$t5,$i1($Tbl)		# Te0[s1>>24]
196	xor	$t2,$t6
197	lwxs	$t6,$i2($Tbl)		# Te0[s2>>24]
198	xor	$t3,$t7
199	lwxs	$t7,$i3($Tbl)		# Te0[s3>>24]
200
201	rotr	$t8,$t8,24
202	lw	$s0,0($key0)
203	rotr	$t9,$t9,24
204	lw	$s1,4($key0)
205	rotr	$t10,$t10,24
206	lw	$s2,8($key0)
207	rotr	$t11,$t11,24
208	lw	$s3,12($key0)
209
210	xor	$t0,$t8
211	xor	$t1,$t9
212	xor	$t2,$t10
213	xor	$t3,$t11
214
215	xor	$t0,$t4
216	xor	$t1,$t5
217	xor	$t2,$t6
218	xor	$t3,$t7
219
220	subu	$cnt,1
221	$PTR_ADD $key0,16
222	xor	$s0,$t0
223	xor	$s1,$t1
224	xor	$s2,$t2
225	xor	$s3,$t3
226	.set	noreorder
227	bnez	$cnt,.Loop_enc
228	ext	$i0,$s1,16,8
229
230	_xtr	$i0,$s1,16-2
231#else
232#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
233	move	$i0,$Tbl
234	move	$i1,$Tbl
235	move	$i2,$Tbl
236	move	$i3,$Tbl
237	ext	$t0,$s1,16,8
238.Loop_enc:
239	ext	$t1,$s2,16,8
240	ext	$t2,$s3,16,8
241	ext	$t3,$s0,16,8
242	$PTR_INS $i0,$t0,2,8
243	$PTR_INS $i1,$t1,2,8
244	$PTR_INS $i2,$t2,2,8
245	$PTR_INS $i3,$t3,2,8
246	lw	$t0,0($i0)		# Te1[s1>>16]
247	ext	$t4,$s2,8,8
248	lw	$t1,0($i1)		# Te1[s2>>16]
249	ext	$t5,$s3,8,8
250	lw	$t2,0($i2)		# Te1[s3>>16]
251	ext	$t6,$s0,8,8
252	lw	$t3,0($i3)		# Te1[s0>>16]
253	ext	$t7,$s1,8,8
254	$PTR_INS $i0,$t4,2,8
255	$PTR_INS $i1,$t5,2,8
256	$PTR_INS $i2,$t6,2,8
257	$PTR_INS $i3,$t7,2,8
258#else
259	_xtr	$i0,$s1,16-2
260.Loop_enc:
261	_xtr	$i1,$s2,16-2
262	_xtr	$i2,$s3,16-2
263	_xtr	$i3,$s0,16-2
264	and	$i0,0x3fc
265	and	$i1,0x3fc
266	and	$i2,0x3fc
267	and	$i3,0x3fc
268	$PTR_ADD $i0,$Tbl
269	$PTR_ADD $i1,$Tbl
270	$PTR_ADD $i2,$Tbl
271	$PTR_ADD $i3,$Tbl
272	lwl	$t0,3($i0)		# Te1[s1>>16]
273	lwl	$t1,3($i1)		# Te1[s2>>16]
274	lwl	$t2,3($i2)		# Te1[s3>>16]
275	lwl	$t3,3($i3)		# Te1[s0>>16]
276	lwr	$t0,2($i0)		# Te1[s1>>16]
277	_xtr	$i0,$s2,8-2
278	lwr	$t1,2($i1)		# Te1[s2>>16]
279	_xtr	$i1,$s3,8-2
280	lwr	$t2,2($i2)		# Te1[s3>>16]
281	_xtr	$i2,$s0,8-2
282	lwr	$t3,2($i3)		# Te1[s0>>16]
283	_xtr	$i3,$s1,8-2
284	and	$i0,0x3fc
285	and	$i1,0x3fc
286	and	$i2,0x3fc
287	and	$i3,0x3fc
288	$PTR_ADD $i0,$Tbl
289	$PTR_ADD $i1,$Tbl
290	$PTR_ADD $i2,$Tbl
291	$PTR_ADD $i3,$Tbl
292#endif
293#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
294	rotr	$t0,$t0,8
295	rotr	$t1,$t1,8
296	rotr	$t2,$t2,8
297	rotr	$t3,$t3,8
298# if defined(_MIPSEL)
299	lw	$t4,0($i0)		# Te2[s2>>8]
300	ext	$t8,$s3,0,8
301	lw	$t5,0($i1)		# Te2[s3>>8]
302	ext	$t9,$s0,0,8
303	lw	$t6,0($i2)		# Te2[s0>>8]
304	ext	$t10,$s1,0,8
305	lw	$t7,0($i3)		# Te2[s1>>8]
306	ext	$t11,$s2,0,8
307	$PTR_INS $i0,$t8,2,8
308	$PTR_INS $i1,$t9,2,8
309	$PTR_INS $i2,$t10,2,8
310	$PTR_INS $i3,$t11,2,8
311
312	lw	$t8,0($i0)		# Te3[s3]
313	$PTR_INS $i0,$s0,2,8
314	lw	$t9,0($i1)		# Te3[s0]
315	$PTR_INS $i1,$s1,2,8
316	lw	$t10,0($i2)		# Te3[s1]
317	$PTR_INS $i2,$s2,2,8
318	lw	$t11,0($i3)		# Te3[s2]
319	$PTR_INS $i3,$s3,2,8
320# else
321	lw	$t4,0($i0)		# Te2[s2>>8]
322	$PTR_INS $i0,$s3,2,8
323	lw	$t5,0($i1)		# Te2[s3>>8]
324	$PTR_INS $i1,$s0,2,8
325	lw	$t6,0($i2)		# Te2[s0>>8]
326	$PTR_INS $i2,$s1,2,8
327	lw	$t7,0($i3)		# Te2[s1>>8]
328	$PTR_INS $i3,$s2,2,8
329
330	lw	$t8,0($i0)		# Te3[s3]
331	_xtr	$i0,$s0,24-2
332	lw	$t9,0($i1)		# Te3[s0]
333	_xtr	$i1,$s1,24-2
334	lw	$t10,0($i2)		# Te3[s1]
335	_xtr	$i2,$s2,24-2
336	lw	$t11,0($i3)		# Te3[s2]
337	_xtr	$i3,$s3,24-2
338
339	and	$i0,0x3fc
340	and	$i1,0x3fc
341	and	$i2,0x3fc
342	and	$i3,0x3fc
343	$PTR_ADD $i0,$Tbl
344	$PTR_ADD $i1,$Tbl
345	$PTR_ADD $i2,$Tbl
346	$PTR_ADD $i3,$Tbl
347# endif
348	rotr	$t4,$t4,16
349	rotr	$t5,$t5,16
350	rotr	$t6,$t6,16
351	rotr	$t7,$t7,16
352
353	rotr	$t8,$t8,24
354	rotr	$t9,$t9,24
355	rotr	$t10,$t10,24
356	rotr	$t11,$t11,24
357#else
358	lwl	$t4,2($i0)		# Te2[s2>>8]
359	lwl	$t5,2($i1)		# Te2[s3>>8]
360	lwl	$t6,2($i2)		# Te2[s0>>8]
361	lwl	$t7,2($i3)		# Te2[s1>>8]
362	lwr	$t4,1($i0)		# Te2[s2>>8]
363	_xtr	$i0,$s3,0-2
364	lwr	$t5,1($i1)		# Te2[s3>>8]
365	_xtr	$i1,$s0,0-2
366	lwr	$t6,1($i2)		# Te2[s0>>8]
367	_xtr	$i2,$s1,0-2
368	lwr	$t7,1($i3)		# Te2[s1>>8]
369	_xtr	$i3,$s2,0-2
370
371	and	$i0,0x3fc
372	and	$i1,0x3fc
373	and	$i2,0x3fc
374	and	$i3,0x3fc
375	$PTR_ADD $i0,$Tbl
376	$PTR_ADD $i1,$Tbl
377	$PTR_ADD $i2,$Tbl
378	$PTR_ADD $i3,$Tbl
379	lwl	$t8,1($i0)		# Te3[s3]
380	lwl	$t9,1($i1)		# Te3[s0]
381	lwl	$t10,1($i2)		# Te3[s1]
382	lwl	$t11,1($i3)		# Te3[s2]
383	lwr	$t8,0($i0)		# Te3[s3]
384	_xtr	$i0,$s0,24-2
385	lwr	$t9,0($i1)		# Te3[s0]
386	_xtr	$i1,$s1,24-2
387	lwr	$t10,0($i2)		# Te3[s1]
388	_xtr	$i2,$s2,24-2
389	lwr	$t11,0($i3)		# Te3[s2]
390	_xtr	$i3,$s3,24-2
391
392	and	$i0,0x3fc
393	and	$i1,0x3fc
394	and	$i2,0x3fc
395	and	$i3,0x3fc
396	$PTR_ADD $i0,$Tbl
397	$PTR_ADD $i1,$Tbl
398	$PTR_ADD $i2,$Tbl
399	$PTR_ADD $i3,$Tbl
400#endif
401	xor	$t0,$t4
402	lw	$t4,0($i0)		# Te0[s0>>24]
403	xor	$t1,$t5
404	lw	$t5,0($i1)		# Te0[s1>>24]
405	xor	$t2,$t6
406	lw	$t6,0($i2)		# Te0[s2>>24]
407	xor	$t3,$t7
408	lw	$t7,0($i3)		# Te0[s3>>24]
409
410	xor	$t0,$t8
411	lw	$s0,0($key0)
412	xor	$t1,$t9
413	lw	$s1,4($key0)
414	xor	$t2,$t10
415	lw	$s2,8($key0)
416	xor	$t3,$t11
417	lw	$s3,12($key0)
418
419	xor	$t0,$t4
420	xor	$t1,$t5
421	xor	$t2,$t6
422	xor	$t3,$t7
423
424	subu	$cnt,1
425	$PTR_ADD $key0,16
426	xor	$s0,$t0
427	xor	$s1,$t1
428	xor	$s2,$t2
429	xor	$s3,$t3
430	.set	noreorder
431	bnez	$cnt,.Loop_enc
432#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
433	ext	$t0,$s1,16,8
434#endif
435	_xtr	$i0,$s1,16-2
436#endif
437
438	.set	reorder
439	_xtr	$i1,$s2,16-2
440	_xtr	$i2,$s3,16-2
441	_xtr	$i3,$s0,16-2
442	and	$i0,0x3fc
443	and	$i1,0x3fc
444	and	$i2,0x3fc
445	and	$i3,0x3fc
446	$PTR_ADD $i0,$Tbl
447	$PTR_ADD $i1,$Tbl
448	$PTR_ADD $i2,$Tbl
449	$PTR_ADD $i3,$Tbl
450	lbu	$t0,2($i0)		# Te4[s1>>16]
451	_xtr	$i0,$s2,8-2
452	lbu	$t1,2($i1)		# Te4[s2>>16]
453	_xtr	$i1,$s3,8-2
454	lbu	$t2,2($i2)		# Te4[s3>>16]
455	_xtr	$i2,$s0,8-2
456	lbu	$t3,2($i3)		# Te4[s0>>16]
457	_xtr	$i3,$s1,8-2
458
459	and	$i0,0x3fc
460	and	$i1,0x3fc
461	and	$i2,0x3fc
462	and	$i3,0x3fc
463	$PTR_ADD $i0,$Tbl
464	$PTR_ADD $i1,$Tbl
465	$PTR_ADD $i2,$Tbl
466	$PTR_ADD $i3,$Tbl
467#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
468# if defined(_MIPSEL)
469	lbu	$t4,2($i0)		# Te4[s2>>8]
470	$PTR_INS $i0,$s0,2,8
471	lbu	$t5,2($i1)		# Te4[s3>>8]
472	$PTR_INS $i1,$s1,2,8
473	lbu	$t6,2($i2)		# Te4[s0>>8]
474	$PTR_INS $i2,$s2,2,8
475	lbu	$t7,2($i3)		# Te4[s1>>8]
476	$PTR_INS $i3,$s3,2,8
477
478	lbu	$t8,2($i0)		# Te4[s0>>24]
479	_xtr	$i0,$s3,0-2
480	lbu	$t9,2($i1)		# Te4[s1>>24]
481	_xtr	$i1,$s0,0-2
482	lbu	$t10,2($i2)		# Te4[s2>>24]
483	_xtr	$i2,$s1,0-2
484	lbu	$t11,2($i3)		# Te4[s3>>24]
485	_xtr	$i3,$s2,0-2
486
487	and	$i0,0x3fc
488	and	$i1,0x3fc
489	and	$i2,0x3fc
490	and	$i3,0x3fc
491	$PTR_ADD $i0,$Tbl
492	$PTR_ADD $i1,$Tbl
493	$PTR_ADD $i2,$Tbl
494	$PTR_ADD $i3,$Tbl
495# else
496	lbu	$t4,2($i0)		# Te4[s2>>8]
497	_xtr	$i0,$s0,24-2
498	lbu	$t5,2($i1)		# Te4[s3>>8]
499	_xtr	$i1,$s1,24-2
500	lbu	$t6,2($i2)		# Te4[s0>>8]
501	_xtr	$i2,$s2,24-2
502	lbu	$t7,2($i3)		# Te4[s1>>8]
503	_xtr	$i3,$s3,24-2
504
505	and	$i0,0x3fc
506	and	$i1,0x3fc
507	and	$i2,0x3fc
508	and	$i3,0x3fc
509	$PTR_ADD $i0,$Tbl
510	$PTR_ADD $i1,$Tbl
511	$PTR_ADD $i2,$Tbl
512	$PTR_ADD $i3,$Tbl
513	lbu	$t8,2($i0)		# Te4[s0>>24]
514	$PTR_INS $i0,$s3,2,8
515	lbu	$t9,2($i1)		# Te4[s1>>24]
516	$PTR_INS $i1,$s0,2,8
517	lbu	$t10,2($i2)		# Te4[s2>>24]
518	$PTR_INS $i2,$s1,2,8
519	lbu	$t11,2($i3)		# Te4[s3>>24]
520	$PTR_INS $i3,$s2,2,8
521# endif
522	_ins	$t0,16
523	_ins	$t1,16
524	_ins	$t2,16
525	_ins	$t3,16
526
527	_ins2	$t0,$t4,8
528	lbu	$t4,2($i0)		# Te4[s3]
529	_ins2	$t1,$t5,8
530	lbu	$t5,2($i1)		# Te4[s0]
531	_ins2	$t2,$t6,8
532	lbu	$t6,2($i2)		# Te4[s1]
533	_ins2	$t3,$t7,8
534	lbu	$t7,2($i3)		# Te4[s2]
535
536	_ins2	$t0,$t8,24
537	lw	$s0,0($key0)
538	_ins2	$t1,$t9,24
539	lw	$s1,4($key0)
540	_ins2	$t2,$t10,24
541	lw	$s2,8($key0)
542	_ins2	$t3,$t11,24
543	lw	$s3,12($key0)
544
545	_ins2	$t0,$t4,0
546	_ins2	$t1,$t5,0
547	_ins2	$t2,$t6,0
548	_ins2	$t3,$t7,0
549#else
550	lbu	$t4,2($i0)		# Te4[s2>>8]
551	_xtr	$i0,$s0,24-2
552	lbu	$t5,2($i1)		# Te4[s3>>8]
553	_xtr	$i1,$s1,24-2
554	lbu	$t6,2($i2)		# Te4[s0>>8]
555	_xtr	$i2,$s2,24-2
556	lbu	$t7,2($i3)		# Te4[s1>>8]
557	_xtr	$i3,$s3,24-2
558
559	and	$i0,0x3fc
560	and	$i1,0x3fc
561	and	$i2,0x3fc
562	and	$i3,0x3fc
563	$PTR_ADD $i0,$Tbl
564	$PTR_ADD $i1,$Tbl
565	$PTR_ADD $i2,$Tbl
566	$PTR_ADD $i3,$Tbl
567	lbu	$t8,2($i0)		# Te4[s0>>24]
568	_xtr	$i0,$s3,0-2
569	lbu	$t9,2($i1)		# Te4[s1>>24]
570	_xtr	$i1,$s0,0-2
571	lbu	$t10,2($i2)		# Te4[s2>>24]
572	_xtr	$i2,$s1,0-2
573	lbu	$t11,2($i3)		# Te4[s3>>24]
574	_xtr	$i3,$s2,0-2
575
576	and	$i0,0x3fc
577	and	$i1,0x3fc
578	and	$i2,0x3fc
579	and	$i3,0x3fc
580	$PTR_ADD $i0,$Tbl
581	$PTR_ADD $i1,$Tbl
582	$PTR_ADD $i2,$Tbl
583	$PTR_ADD $i3,$Tbl
584
585	_ins	$t0,16
586	_ins	$t1,16
587	_ins	$t2,16
588	_ins	$t3,16
589
590	_ins	$t4,8
591	_ins	$t5,8
592	_ins	$t6,8
593	_ins	$t7,8
594
595	xor	$t0,$t4
596	lbu	$t4,2($i0)		# Te4[s3]
597	xor	$t1,$t5
598	lbu	$t5,2($i1)		# Te4[s0]
599	xor	$t2,$t6
600	lbu	$t6,2($i2)		# Te4[s1]
601	xor	$t3,$t7
602	lbu	$t7,2($i3)		# Te4[s2]
603
604	_ins	$t8,24
605	lw	$s0,0($key0)
606	_ins	$t9,24
607	lw	$s1,4($key0)
608	_ins	$t10,24
609	lw	$s2,8($key0)
610	_ins	$t11,24
611	lw	$s3,12($key0)
612
613	xor	$t0,$t8
614	xor	$t1,$t9
615	xor	$t2,$t10
616	xor	$t3,$t11
617
618	_ins	$t4,0
619	_ins	$t5,0
620	_ins	$t6,0
621	_ins	$t7,0
622
623	xor	$t0,$t4
624	xor	$t1,$t5
625	xor	$t2,$t6
626	xor	$t3,$t7
627#endif
628	xor	$s0,$t0
629	xor	$s1,$t1
630	xor	$s2,$t2
631	xor	$s3,$t3
632
633	jr	$ra
634.end	_mips_AES_encrypt
635
636.align	5
637.globl	AES_encrypt
638.ent	AES_encrypt
639AES_encrypt:
640	.frame	$sp,$FRAMESIZE,$ra
641	.mask	$SAVED_REGS_MASK,-$SZREG
642	.set	noreorder
643___
644$code.=<<___ if ($flavour =~ /o32/i);	# o32 PIC-ification
645	.cpload	$pf
646___
647$code.=<<___;
648	$PTR_SUB $sp,$FRAMESIZE
649	$REG_S	$ra,$FRAMESIZE-1*$SZREG($sp)
650	$REG_S	$fp,$FRAMESIZE-2*$SZREG($sp)
651	$REG_S	$s11,$FRAMESIZE-3*$SZREG($sp)
652	$REG_S	$s10,$FRAMESIZE-4*$SZREG($sp)
653	$REG_S	$s9,$FRAMESIZE-5*$SZREG($sp)
654	$REG_S	$s8,$FRAMESIZE-6*$SZREG($sp)
655	$REG_S	$s7,$FRAMESIZE-7*$SZREG($sp)
656	$REG_S	$s6,$FRAMESIZE-8*$SZREG($sp)
657	$REG_S	$s5,$FRAMESIZE-9*$SZREG($sp)
658	$REG_S	$s4,$FRAMESIZE-10*$SZREG($sp)
659___
660$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
661	$REG_S	\$15,$FRAMESIZE-11*$SZREG($sp)
662	$REG_S	\$14,$FRAMESIZE-12*$SZREG($sp)
663	$REG_S	\$13,$FRAMESIZE-13*$SZREG($sp)
664	$REG_S	\$12,$FRAMESIZE-14*$SZREG($sp)
665	$REG_S	$gp,$FRAMESIZE-15*$SZREG($sp)
666___
667$code.=<<___ if ($flavour !~ /o32/i);	# non-o32 PIC-ification
668	.cplocal	$Tbl
669	.cpsetup	$pf,$zero,AES_encrypt
670___
671$code.=<<___;
672	.set	reorder
673	$PTR_LA	$Tbl,AES_Te		# PIC-ified 'load address'
674
675#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
676	lw	$s0,0($inp)
677	lw	$s1,4($inp)
678	lw	$s2,8($inp)
679	lw	$s3,12($inp)
680#else
681	lwl	$s0,0+$MSB($inp)
682	lwl	$s1,4+$MSB($inp)
683	lwl	$s2,8+$MSB($inp)
684	lwl	$s3,12+$MSB($inp)
685	lwr	$s0,0+$LSB($inp)
686	lwr	$s1,4+$LSB($inp)
687	lwr	$s2,8+$LSB($inp)
688	lwr	$s3,12+$LSB($inp)
689#endif
690
691	bal	_mips_AES_encrypt
692
693#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
694	sw	$s0,0($out)
695	sw	$s1,4($out)
696	sw	$s2,8($out)
697	sw	$s3,12($out)
698#else
699	swr	$s0,0+$LSB($out)
700	swr	$s1,4+$LSB($out)
701	swr	$s2,8+$LSB($out)
702	swr	$s3,12+$LSB($out)
703	swl	$s0,0+$MSB($out)
704	swl	$s1,4+$MSB($out)
705	swl	$s2,8+$MSB($out)
706	swl	$s3,12+$MSB($out)
707#endif
708
709	.set	noreorder
710	$REG_L	$ra,$FRAMESIZE-1*$SZREG($sp)
711	$REG_L	$fp,$FRAMESIZE-2*$SZREG($sp)
712	$REG_L	$s11,$FRAMESIZE-3*$SZREG($sp)
713	$REG_L	$s10,$FRAMESIZE-4*$SZREG($sp)
714	$REG_L	$s9,$FRAMESIZE-5*$SZREG($sp)
715	$REG_L	$s8,$FRAMESIZE-6*$SZREG($sp)
716	$REG_L	$s7,$FRAMESIZE-7*$SZREG($sp)
717	$REG_L	$s6,$FRAMESIZE-8*$SZREG($sp)
718	$REG_L	$s5,$FRAMESIZE-9*$SZREG($sp)
719	$REG_L	$s4,$FRAMESIZE-10*$SZREG($sp)
720___
721$code.=<<___ if ($flavour =~ /nubi/i);
722	$REG_L	\$15,$FRAMESIZE-11*$SZREG($sp)
723	$REG_L	\$14,$FRAMESIZE-12*$SZREG($sp)
724	$REG_L	\$13,$FRAMESIZE-13*$SZREG($sp)
725	$REG_L	\$12,$FRAMESIZE-14*$SZREG($sp)
726	$REG_L	$gp,$FRAMESIZE-15*$SZREG($sp)
727___
728$code.=<<___;
729	jr	$ra
730	$PTR_ADD $sp,$FRAMESIZE
731.end	AES_encrypt
732___
733
734$code.=<<___;
735.align	5
736.ent	_mips_AES_decrypt
737_mips_AES_decrypt:
738	.frame	$sp,0,$ra
739	.set	reorder
740	lw	$t0,0($key)
741	lw	$t1,4($key)
742	lw	$t2,8($key)
743	lw	$t3,12($key)
744	lw	$cnt,240($key)
745	$PTR_ADD $key0,$key,16
746
747	xor	$s0,$t0
748	xor	$s1,$t1
749	xor	$s2,$t2
750	xor	$s3,$t3
751
752	subu	$cnt,1
753#if defined(__mips_smartmips)
754	ext	$i0,$s3,16,8
755.Loop_dec:
756	ext	$i1,$s0,16,8
757	ext	$i2,$s1,16,8
758	ext	$i3,$s2,16,8
759	lwxs	$t0,$i0($Tbl)		# Td1[s3>>16]
760	ext	$i0,$s2,8,8
761	lwxs	$t1,$i1($Tbl)		# Td1[s0>>16]
762	ext	$i1,$s3,8,8
763	lwxs	$t2,$i2($Tbl)		# Td1[s1>>16]
764	ext	$i2,$s0,8,8
765	lwxs	$t3,$i3($Tbl)		# Td1[s2>>16]
766	ext	$i3,$s1,8,8
767
768	lwxs	$t4,$i0($Tbl)		# Td2[s2>>8]
769	ext	$i0,$s1,0,8
770	lwxs	$t5,$i1($Tbl)		# Td2[s3>>8]
771	ext	$i1,$s2,0,8
772	lwxs	$t6,$i2($Tbl)		# Td2[s0>>8]
773	ext	$i2,$s3,0,8
774	lwxs	$t7,$i3($Tbl)		# Td2[s1>>8]
775	ext	$i3,$s0,0,8
776
777	lwxs	$t8,$i0($Tbl)		# Td3[s1]
778	ext	$i0,$s0,24,8
779	lwxs	$t9,$i1($Tbl)		# Td3[s2]
780	ext	$i1,$s1,24,8
781	lwxs	$t10,$i2($Tbl)		# Td3[s3]
782	ext	$i2,$s2,24,8
783	lwxs	$t11,$i3($Tbl)		# Td3[s0]
784	ext	$i3,$s3,24,8
785
786	rotr	$t0,$t0,8
787	rotr	$t1,$t1,8
788	rotr	$t2,$t2,8
789	rotr	$t3,$t3,8
790
791	rotr	$t4,$t4,16
792	rotr	$t5,$t5,16
793	rotr	$t6,$t6,16
794	rotr	$t7,$t7,16
795
796	xor	$t0,$t4
797	lwxs	$t4,$i0($Tbl)		# Td0[s0>>24]
798	xor	$t1,$t5
799	lwxs	$t5,$i1($Tbl)		# Td0[s1>>24]
800	xor	$t2,$t6
801	lwxs	$t6,$i2($Tbl)		# Td0[s2>>24]
802	xor	$t3,$t7
803	lwxs	$t7,$i3($Tbl)		# Td0[s3>>24]
804
805	rotr	$t8,$t8,24
806	lw	$s0,0($key0)
807	rotr	$t9,$t9,24
808	lw	$s1,4($key0)
809	rotr	$t10,$t10,24
810	lw	$s2,8($key0)
811	rotr	$t11,$t11,24
812	lw	$s3,12($key0)
813
814	xor	$t0,$t8
815	xor	$t1,$t9
816	xor	$t2,$t10
817	xor	$t3,$t11
818
819	xor	$t0,$t4
820	xor	$t1,$t5
821	xor	$t2,$t6
822	xor	$t3,$t7
823
824	subu	$cnt,1
825	$PTR_ADD $key0,16
826	xor	$s0,$t0
827	xor	$s1,$t1
828	xor	$s2,$t2
829	xor	$s3,$t3
830	.set	noreorder
831	bnez	$cnt,.Loop_dec
832	ext	$i0,$s3,16,8
833
834	_xtr	$i0,$s3,16-2
835#else
836#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
837	move	$i0,$Tbl
838	move	$i1,$Tbl
839	move	$i2,$Tbl
840	move	$i3,$Tbl
841	ext	$t0,$s3,16,8
842.Loop_dec:
843	ext	$t1,$s0,16,8
844	ext	$t2,$s1,16,8
845	ext	$t3,$s2,16,8
846	$PTR_INS $i0,$t0,2,8
847	$PTR_INS $i1,$t1,2,8
848	$PTR_INS $i2,$t2,2,8
849	$PTR_INS $i3,$t3,2,8
850	lw	$t0,0($i0)		# Td1[s3>>16]
851	ext	$t4,$s2,8,8
852	lw	$t1,0($i1)		# Td1[s0>>16]
853	ext	$t5,$s3,8,8
854	lw	$t2,0($i2)		# Td1[s1>>16]
855	ext	$t6,$s0,8,8
856	lw	$t3,0($i3)		# Td1[s2>>16]
857	ext	$t7,$s1,8,8
858	$PTR_INS $i0,$t4,2,8
859	$PTR_INS $i1,$t5,2,8
860	$PTR_INS $i2,$t6,2,8
861	$PTR_INS $i3,$t7,2,8
862#else
863	_xtr	$i0,$s3,16-2
864.Loop_dec:
865	_xtr	$i1,$s0,16-2
866	_xtr	$i2,$s1,16-2
867	_xtr	$i3,$s2,16-2
868	and	$i0,0x3fc
869	and	$i1,0x3fc
870	and	$i2,0x3fc
871	and	$i3,0x3fc
872	$PTR_ADD $i0,$Tbl
873	$PTR_ADD $i1,$Tbl
874	$PTR_ADD $i2,$Tbl
875	$PTR_ADD $i3,$Tbl
876	lwl	$t0,3($i0)		# Td1[s3>>16]
877	lwl	$t1,3($i1)		# Td1[s0>>16]
878	lwl	$t2,3($i2)		# Td1[s1>>16]
879	lwl	$t3,3($i3)		# Td1[s2>>16]
880	lwr	$t0,2($i0)		# Td1[s3>>16]
881	_xtr	$i0,$s2,8-2
882	lwr	$t1,2($i1)		# Td1[s0>>16]
883	_xtr	$i1,$s3,8-2
884	lwr	$t2,2($i2)		# Td1[s1>>16]
885	_xtr	$i2,$s0,8-2
886	lwr	$t3,2($i3)		# Td1[s2>>16]
887	_xtr	$i3,$s1,8-2
888	and	$i0,0x3fc
889	and	$i1,0x3fc
890	and	$i2,0x3fc
891	and	$i3,0x3fc
892	$PTR_ADD $i0,$Tbl
893	$PTR_ADD $i1,$Tbl
894	$PTR_ADD $i2,$Tbl
895	$PTR_ADD $i3,$Tbl
896#endif
897#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
898	rotr	$t0,$t0,8
899	rotr	$t1,$t1,8
900	rotr	$t2,$t2,8
901	rotr	$t3,$t3,8
902# if defined(_MIPSEL)
903	lw	$t4,0($i0)		# Td2[s2>>8]
904	ext	$t8,$s1,0,8
905	lw	$t5,0($i1)		# Td2[s3>>8]
906	ext	$t9,$s2,0,8
907	lw	$t6,0($i2)		# Td2[s0>>8]
908	ext	$t10,$s3,0,8
909	lw	$t7,0($i3)		# Td2[s1>>8]
910	ext	$t11,$s0,0,8
911	$PTR_INS $i0,$t8,2,8
912	$PTR_INS $i1,$t9,2,8
913	$PTR_INS $i2,$t10,2,8
914	$PTR_INS $i3,$t11,2,8
915	lw	$t8,0($i0)		# Td3[s1]
916	$PTR_INS $i0,$s0,2,8
917	lw	$t9,0($i1)		# Td3[s2]
918	$PTR_INS $i1,$s1,2,8
919	lw	$t10,0($i2)		# Td3[s3]
920	$PTR_INS $i2,$s2,2,8
921	lw	$t11,0($i3)		# Td3[s0]
922	$PTR_INS $i3,$s3,2,8
923#else
924	lw	$t4,0($i0)		# Td2[s2>>8]
925	$PTR_INS $i0,$s1,2,8
926	lw	$t5,0($i1)		# Td2[s3>>8]
927	$PTR_INS $i1,$s2,2,8
928	lw	$t6,0($i2)		# Td2[s0>>8]
929	$PTR_INS $i2,$s3,2,8
930	lw	$t7,0($i3)		# Td2[s1>>8]
931	$PTR_INS $i3,$s0,2,8
932
933	lw	$t8,0($i0)		# Td3[s1]
934	_xtr	$i0,$s0,24-2
935	lw	$t9,0($i1)		# Td3[s2]
936	_xtr	$i1,$s1,24-2
937	lw	$t10,0($i2)		# Td3[s3]
938	_xtr	$i2,$s2,24-2
939	lw	$t11,0($i3)		# Td3[s0]
940	_xtr	$i3,$s3,24-2
941
942	and	$i0,0x3fc
943	and	$i1,0x3fc
944	and	$i2,0x3fc
945	and	$i3,0x3fc
946	$PTR_ADD $i0,$Tbl
947	$PTR_ADD $i1,$Tbl
948	$PTR_ADD $i2,$Tbl
949	$PTR_ADD $i3,$Tbl
950#endif
951	rotr	$t4,$t4,16
952	rotr	$t5,$t5,16
953	rotr	$t6,$t6,16
954	rotr	$t7,$t7,16
955
956	rotr	$t8,$t8,24
957	rotr	$t9,$t9,24
958	rotr	$t10,$t10,24
959	rotr	$t11,$t11,24
960#else
961	lwl	$t4,2($i0)		# Td2[s2>>8]
962	lwl	$t5,2($i1)		# Td2[s3>>8]
963	lwl	$t6,2($i2)		# Td2[s0>>8]
964	lwl	$t7,2($i3)		# Td2[s1>>8]
965	lwr	$t4,1($i0)		# Td2[s2>>8]
966	_xtr	$i0,$s1,0-2
967	lwr	$t5,1($i1)		# Td2[s3>>8]
968	_xtr	$i1,$s2,0-2
969	lwr	$t6,1($i2)		# Td2[s0>>8]
970	_xtr	$i2,$s3,0-2
971	lwr	$t7,1($i3)		# Td2[s1>>8]
972	_xtr	$i3,$s0,0-2
973
974	and	$i0,0x3fc
975	and	$i1,0x3fc
976	and	$i2,0x3fc
977	and	$i3,0x3fc
978	$PTR_ADD $i0,$Tbl
979	$PTR_ADD $i1,$Tbl
980	$PTR_ADD $i2,$Tbl
981	$PTR_ADD $i3,$Tbl
982	lwl	$t8,1($i0)		# Td3[s1]
983	lwl	$t9,1($i1)		# Td3[s2]
984	lwl	$t10,1($i2)		# Td3[s3]
985	lwl	$t11,1($i3)		# Td3[s0]
986	lwr	$t8,0($i0)		# Td3[s1]
987	_xtr	$i0,$s0,24-2
988	lwr	$t9,0($i1)		# Td3[s2]
989	_xtr	$i1,$s1,24-2
990	lwr	$t10,0($i2)		# Td3[s3]
991	_xtr	$i2,$s2,24-2
992	lwr	$t11,0($i3)		# Td3[s0]
993	_xtr	$i3,$s3,24-2
994
995	and	$i0,0x3fc
996	and	$i1,0x3fc
997	and	$i2,0x3fc
998	and	$i3,0x3fc
999	$PTR_ADD $i0,$Tbl
1000	$PTR_ADD $i1,$Tbl
1001	$PTR_ADD $i2,$Tbl
1002	$PTR_ADD $i3,$Tbl
1003#endif
1004
1005	xor	$t0,$t4
1006	lw	$t4,0($i0)		# Td0[s0>>24]
1007	xor	$t1,$t5
1008	lw	$t5,0($i1)		# Td0[s1>>24]
1009	xor	$t2,$t6
1010	lw	$t6,0($i2)		# Td0[s2>>24]
1011	xor	$t3,$t7
1012	lw	$t7,0($i3)		# Td0[s3>>24]
1013
1014	xor	$t0,$t8
1015	lw	$s0,0($key0)
1016	xor	$t1,$t9
1017	lw	$s1,4($key0)
1018	xor	$t2,$t10
1019	lw	$s2,8($key0)
1020	xor	$t3,$t11
1021	lw	$s3,12($key0)
1022
1023	xor	$t0,$t4
1024	xor	$t1,$t5
1025	xor	$t2,$t6
1026	xor	$t3,$t7
1027
1028	subu	$cnt,1
1029	$PTR_ADD $key0,16
1030	xor	$s0,$t0
1031	xor	$s1,$t1
1032	xor	$s2,$t2
1033	xor	$s3,$t3
1034	.set	noreorder
1035	bnez	$cnt,.Loop_dec
1036#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1037	ext	$t0,$s3,16,8
1038#endif
1039
1040	_xtr	$i0,$s3,16-2
1041#endif
1042
1043	.set	reorder
1044	lw	$t4,1024($Tbl)		# prefetch Td4
1045	_xtr	$i0,$s3,16
1046	lw	$t5,1024+32($Tbl)
1047	_xtr	$i1,$s0,16
1048	lw	$t6,1024+64($Tbl)
1049	_xtr	$i2,$s1,16
1050	lw	$t7,1024+96($Tbl)
1051	_xtr	$i3,$s2,16
1052	lw	$t8,1024+128($Tbl)
1053	and	$i0,0xff
1054	lw	$t9,1024+160($Tbl)
1055	and	$i1,0xff
1056	lw	$t10,1024+192($Tbl)
1057	and	$i2,0xff
1058	lw	$t11,1024+224($Tbl)
1059	and	$i3,0xff
1060
1061	$PTR_ADD $i0,$Tbl
1062	$PTR_ADD $i1,$Tbl
1063	$PTR_ADD $i2,$Tbl
1064	$PTR_ADD $i3,$Tbl
1065	lbu	$t0,1024($i0)		# Td4[s3>>16]
1066	_xtr	$i0,$s2,8
1067	lbu	$t1,1024($i1)		# Td4[s0>>16]
1068	_xtr	$i1,$s3,8
1069	lbu	$t2,1024($i2)		# Td4[s1>>16]
1070	_xtr	$i2,$s0,8
1071	lbu	$t3,1024($i3)		# Td4[s2>>16]
1072	_xtr	$i3,$s1,8
1073
1074	and	$i0,0xff
1075	and	$i1,0xff
1076	and	$i2,0xff
1077	and	$i3,0xff
1078	$PTR_ADD $i0,$Tbl
1079	$PTR_ADD $i1,$Tbl
1080	$PTR_ADD $i2,$Tbl
1081	$PTR_ADD $i3,$Tbl
1082#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1083# if defined(_MIPSEL)
1084	lbu	$t4,1024($i0)		# Td4[s2>>8]
1085	$PTR_INS $i0,$s0,0,8
1086	lbu	$t5,1024($i1)		# Td4[s3>>8]
1087	$PTR_INS $i1,$s1,0,8
1088	lbu	$t6,1024($i2)		# Td4[s0>>8]
1089	$PTR_INS $i2,$s2,0,8
1090	lbu	$t7,1024($i3)		# Td4[s1>>8]
1091	$PTR_INS $i3,$s3,0,8
1092
1093	lbu	$t8,1024($i0)		# Td4[s0>>24]
1094	_xtr	$i0,$s1,0
1095	lbu	$t9,1024($i1)		# Td4[s1>>24]
1096	_xtr	$i1,$s2,0
1097	lbu	$t10,1024($i2)		# Td4[s2>>24]
1098	_xtr	$i2,$s3,0
1099	lbu	$t11,1024($i3)		# Td4[s3>>24]
1100	_xtr	$i3,$s0,0
1101
1102	$PTR_ADD $i0,$Tbl
1103	$PTR_ADD $i1,$Tbl
1104	$PTR_ADD $i2,$Tbl
1105	$PTR_ADD $i3,$Tbl
1106# else
1107	lbu	$t4,1024($i0)		# Td4[s2>>8]
1108	_xtr	$i0,$s0,24
1109	lbu	$t5,1024($i1)		# Td4[s3>>8]
1110	_xtr	$i1,$s1,24
1111	lbu	$t6,1024($i2)		# Td4[s0>>8]
1112	_xtr	$i2,$s2,24
1113	lbu	$t7,1024($i3)		# Td4[s1>>8]
1114	_xtr	$i3,$s3,24
1115
1116	$PTR_ADD $i0,$Tbl
1117	$PTR_ADD $i1,$Tbl
1118	$PTR_ADD $i2,$Tbl
1119	$PTR_ADD $i3,$Tbl
1120	lbu	$t8,1024($i0)		# Td4[s0>>24]
1121	$PTR_INS $i0,$s1,0,8
1122	lbu	$t9,1024($i1)		# Td4[s1>>24]
1123	$PTR_INS $i1,$s2,0,8
1124	lbu	$t10,1024($i2)		# Td4[s2>>24]
1125	$PTR_INS $i2,$s3,0,8
1126	lbu	$t11,1024($i3)		# Td4[s3>>24]
1127	$PTR_INS $i3,$s0,0,8
1128# endif
1129	_ins	$t0,16
1130	_ins	$t1,16
1131	_ins	$t2,16
1132	_ins	$t3,16
1133
1134	_ins2	$t0,$t4,8
1135	lbu	$t4,1024($i0)		# Td4[s1]
1136	_ins2	$t1,$t5,8
1137	lbu	$t5,1024($i1)		# Td4[s2]
1138	_ins2	$t2,$t6,8
1139	lbu	$t6,1024($i2)		# Td4[s3]
1140	_ins2	$t3,$t7,8
1141	lbu	$t7,1024($i3)		# Td4[s0]
1142
1143	_ins2	$t0,$t8,24
1144	lw	$s0,0($key0)
1145	_ins2	$t1,$t9,24
1146	lw	$s1,4($key0)
1147	_ins2	$t2,$t10,24
1148	lw	$s2,8($key0)
1149	_ins2	$t3,$t11,24
1150	lw	$s3,12($key0)
1151
1152	_ins2	$t0,$t4,0
1153	_ins2	$t1,$t5,0
1154	_ins2	$t2,$t6,0
1155	_ins2	$t3,$t7,0
1156#else
1157	lbu	$t4,1024($i0)		# Td4[s2>>8]
1158	_xtr	$i0,$s0,24
1159	lbu	$t5,1024($i1)		# Td4[s3>>8]
1160	_xtr	$i1,$s1,24
1161	lbu	$t6,1024($i2)		# Td4[s0>>8]
1162	_xtr	$i2,$s2,24
1163	lbu	$t7,1024($i3)		# Td4[s1>>8]
1164	_xtr	$i3,$s3,24
1165
1166	$PTR_ADD $i0,$Tbl
1167	$PTR_ADD $i1,$Tbl
1168	$PTR_ADD $i2,$Tbl
1169	$PTR_ADD $i3,$Tbl
1170	lbu	$t8,1024($i0)		# Td4[s0>>24]
1171	_xtr	$i0,$s1,0
1172	lbu	$t9,1024($i1)		# Td4[s1>>24]
1173	_xtr	$i1,$s2,0
1174	lbu	$t10,1024($i2)		# Td4[s2>>24]
1175	_xtr	$i2,$s3,0
1176	lbu	$t11,1024($i3)		# Td4[s3>>24]
1177	_xtr	$i3,$s0,0
1178
1179	$PTR_ADD $i0,$Tbl
1180	$PTR_ADD $i1,$Tbl
1181	$PTR_ADD $i2,$Tbl
1182	$PTR_ADD $i3,$Tbl
1183
1184	_ins	$t0,16
1185	_ins	$t1,16
1186	_ins	$t2,16
1187	_ins	$t3,16
1188
1189	_ins	$t4,8
1190	_ins	$t5,8
1191	_ins	$t6,8
1192	_ins	$t7,8
1193
1194	xor	$t0,$t4
1195	lbu	$t4,1024($i0)		# Td4[s1]
1196	xor	$t1,$t5
1197	lbu	$t5,1024($i1)		# Td4[s2]
1198	xor	$t2,$t6
1199	lbu	$t6,1024($i2)		# Td4[s3]
1200	xor	$t3,$t7
1201	lbu	$t7,1024($i3)		# Td4[s0]
1202
1203	_ins	$t8,24
1204	lw	$s0,0($key0)
1205	_ins	$t9,24
1206	lw	$s1,4($key0)
1207	_ins	$t10,24
1208	lw	$s2,8($key0)
1209	_ins	$t11,24
1210	lw	$s3,12($key0)
1211
1212	xor	$t0,$t8
1213	xor	$t1,$t9
1214	xor	$t2,$t10
1215	xor	$t3,$t11
1216
1217	_ins	$t4,0
1218	_ins	$t5,0
1219	_ins	$t6,0
1220	_ins	$t7,0
1221
1222	xor	$t0,$t4
1223	xor	$t1,$t5
1224	xor	$t2,$t6
1225	xor	$t3,$t7
1226#endif
1227
1228	xor	$s0,$t0
1229	xor	$s1,$t1
1230	xor	$s2,$t2
1231	xor	$s3,$t3
1232
1233	jr	$ra
1234.end	_mips_AES_decrypt
1235
1236.align	5
1237.globl	AES_decrypt
1238.ent	AES_decrypt
1239AES_decrypt:
1240	.frame	$sp,$FRAMESIZE,$ra
1241	.mask	$SAVED_REGS_MASK,-$SZREG
1242	.set	noreorder
1243___
1244$code.=<<___ if ($flavour =~ /o32/i);	# o32 PIC-ification
1245	.cpload	$pf
1246___
1247$code.=<<___;
1248	$PTR_SUB $sp,$FRAMESIZE
1249	$REG_S	$ra,$FRAMESIZE-1*$SZREG($sp)
1250	$REG_S	$fp,$FRAMESIZE-2*$SZREG($sp)
1251	$REG_S	$s11,$FRAMESIZE-3*$SZREG($sp)
1252	$REG_S	$s10,$FRAMESIZE-4*$SZREG($sp)
1253	$REG_S	$s9,$FRAMESIZE-5*$SZREG($sp)
1254	$REG_S	$s8,$FRAMESIZE-6*$SZREG($sp)
1255	$REG_S	$s7,$FRAMESIZE-7*$SZREG($sp)
1256	$REG_S	$s6,$FRAMESIZE-8*$SZREG($sp)
1257	$REG_S	$s5,$FRAMESIZE-9*$SZREG($sp)
1258	$REG_S	$s4,$FRAMESIZE-10*$SZREG($sp)
1259___
1260$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
1261	$REG_S	\$15,$FRAMESIZE-11*$SZREG($sp)
1262	$REG_S	\$14,$FRAMESIZE-12*$SZREG($sp)
1263	$REG_S	\$13,$FRAMESIZE-13*$SZREG($sp)
1264	$REG_S	\$12,$FRAMESIZE-14*$SZREG($sp)
1265	$REG_S	$gp,$FRAMESIZE-15*$SZREG($sp)
1266___
1267$code.=<<___ if ($flavour !~ /o32/i);	# non-o32 PIC-ification
1268	.cplocal	$Tbl
1269	.cpsetup	$pf,$zero,AES_decrypt
1270___
1271$code.=<<___;
1272	.set	reorder
1273	$PTR_LA	$Tbl,AES_Td		# PIC-ified 'load address'
1274
1275#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
1276	lw	$s0,0($inp)
1277	lw	$s1,4($inp)
1278	lw	$s2,8($inp)
1279	lw	$s3,12($inp)
1280#else
1281	lwl	$s0,0+$MSB($inp)
1282	lwl	$s1,4+$MSB($inp)
1283	lwl	$s2,8+$MSB($inp)
1284	lwl	$s3,12+$MSB($inp)
1285	lwr	$s0,0+$LSB($inp)
1286	lwr	$s1,4+$LSB($inp)
1287	lwr	$s2,8+$LSB($inp)
1288	lwr	$s3,12+$LSB($inp)
1289#endif
1290
1291	bal	_mips_AES_decrypt
1292
1293#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
1294	sw	$s0,0($out)
1295	sw	$s1,4($out)
1296	sw	$s2,8($out)
1297	sw	$s3,12($out)
1298#else
1299	swr	$s0,0+$LSB($out)
1300	swr	$s1,4+$LSB($out)
1301	swr	$s2,8+$LSB($out)
1302	swr	$s3,12+$LSB($out)
1303	swl	$s0,0+$MSB($out)
1304	swl	$s1,4+$MSB($out)
1305	swl	$s2,8+$MSB($out)
1306	swl	$s3,12+$MSB($out)
1307#endif
1308
1309	.set	noreorder
1310	$REG_L	$ra,$FRAMESIZE-1*$SZREG($sp)
1311	$REG_L	$fp,$FRAMESIZE-2*$SZREG($sp)
1312	$REG_L	$s11,$FRAMESIZE-3*$SZREG($sp)
1313	$REG_L	$s10,$FRAMESIZE-4*$SZREG($sp)
1314	$REG_L	$s9,$FRAMESIZE-5*$SZREG($sp)
1315	$REG_L	$s8,$FRAMESIZE-6*$SZREG($sp)
1316	$REG_L	$s7,$FRAMESIZE-7*$SZREG($sp)
1317	$REG_L	$s6,$FRAMESIZE-8*$SZREG($sp)
1318	$REG_L	$s5,$FRAMESIZE-9*$SZREG($sp)
1319	$REG_L	$s4,$FRAMESIZE-10*$SZREG($sp)
1320___
1321$code.=<<___ if ($flavour =~ /nubi/i);
1322	$REG_L	\$15,$FRAMESIZE-11*$SZREG($sp)
1323	$REG_L	\$14,$FRAMESIZE-12*$SZREG($sp)
1324	$REG_L	\$13,$FRAMESIZE-13*$SZREG($sp)
1325	$REG_L	\$12,$FRAMESIZE-14*$SZREG($sp)
1326	$REG_L	$gp,$FRAMESIZE-15*$SZREG($sp)
1327___
1328$code.=<<___;
1329	jr	$ra
1330	$PTR_ADD $sp,$FRAMESIZE
1331.end	AES_decrypt
1332___
1333}}}
1334
1335{{{
1336my $FRAMESIZE=8*$SZREG;
1337my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0xc000f008" : "0xc0000000";
1338
1339my ($inp,$bits,$key,$Tbl)=($a0,$a1,$a2,$a3);
1340my ($rk0,$rk1,$rk2,$rk3,$rk4,$rk5,$rk6,$rk7)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
1341my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
1342my ($rcon,$cnt)=($gp,$fp);
1343
1344$code.=<<___;
1345.align	5
1346.ent	_mips_AES_set_encrypt_key
1347_mips_AES_set_encrypt_key:
1348	.frame	$sp,0,$ra
1349	.set	noreorder
1350	beqz	$inp,.Lekey_done
1351	li	$t0,-1
1352	beqz	$key,.Lekey_done
1353	$PTR_ADD $rcon,$Tbl,256
1354
1355	.set	reorder
1356#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
1357	lw	$rk0,0($inp)		# load 128 bits
1358	lw	$rk1,4($inp)
1359	lw	$rk2,8($inp)
1360	lw	$rk3,12($inp)
1361#else
1362	lwl	$rk0,0+$MSB($inp)	# load 128 bits
1363	lwl	$rk1,4+$MSB($inp)
1364	lwl	$rk2,8+$MSB($inp)
1365	lwl	$rk3,12+$MSB($inp)
1366	lwr	$rk0,0+$LSB($inp)
1367	lwr	$rk1,4+$LSB($inp)
1368	lwr	$rk2,8+$LSB($inp)
1369	lwr	$rk3,12+$LSB($inp)
1370#endif
1371	li	$at,128
1372	.set	noreorder
1373	beq	$bits,$at,.L128bits
1374	li	$cnt,10
1375
1376	.set	reorder
1377#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
1378	lw	$rk4,16($inp)		# load 192 bits
1379	lw	$rk5,20($inp)
1380#else
1381	lwl	$rk4,16+$MSB($inp)	# load 192 bits
1382	lwl	$rk5,20+$MSB($inp)
1383	lwr	$rk4,16+$LSB($inp)
1384	lwr	$rk5,20+$LSB($inp)
1385#endif
1386	li	$at,192
1387	.set	noreorder
1388	beq	$bits,$at,.L192bits
1389	li	$cnt,8
1390
1391	.set	reorder
1392#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
1393	lw	$rk6,24($inp)		# load 256 bits
1394	lw	$rk7,28($inp)
1395#else
1396	lwl	$rk6,24+$MSB($inp)	# load 256 bits
1397	lwl	$rk7,28+$MSB($inp)
1398	lwr	$rk6,24+$LSB($inp)
1399	lwr	$rk7,28+$LSB($inp)
1400#endif
1401	li	$at,256
1402	.set	noreorder
1403	beq	$bits,$at,.L256bits
1404	li	$cnt,7
1405
1406	b	.Lekey_done
1407	li	$t0,-2
1408
1409.align	4
1410.L128bits:
1411	.set	reorder
1412	srl	$i0,$rk3,16
1413	srl	$i1,$rk3,8
1414	and	$i0,0xff
1415	and	$i1,0xff
1416	and	$i2,$rk3,0xff
1417	srl	$i3,$rk3,24
1418	$PTR_ADD $i0,$Tbl
1419	$PTR_ADD $i1,$Tbl
1420	$PTR_ADD $i2,$Tbl
1421	$PTR_ADD $i3,$Tbl
1422	lbu	$i0,0($i0)
1423	lbu	$i1,0($i1)
1424	lbu	$i2,0($i2)
1425	lbu	$i3,0($i3)
1426
1427	sw	$rk0,0($key)
1428	sw	$rk1,4($key)
1429	sw	$rk2,8($key)
1430	sw	$rk3,12($key)
1431	subu	$cnt,1
1432	$PTR_ADD $key,16
1433
1434	_bias	$i0,24
1435	_bias	$i1,16
1436	_bias	$i2,8
1437	_bias	$i3,0
1438
1439	xor	$rk0,$i0
1440	lw	$i0,0($rcon)
1441	xor	$rk0,$i1
1442	xor	$rk0,$i2
1443	xor	$rk0,$i3
1444	xor	$rk0,$i0
1445
1446	xor	$rk1,$rk0
1447	xor	$rk2,$rk1
1448	xor	$rk3,$rk2
1449
1450	.set	noreorder
1451	bnez	$cnt,.L128bits
1452	$PTR_ADD $rcon,4
1453
1454	sw	$rk0,0($key)
1455	sw	$rk1,4($key)
1456	sw	$rk2,8($key)
1457	li	$cnt,10
1458	sw	$rk3,12($key)
1459	li	$t0,0
1460	sw	$cnt,80($key)
1461	b	.Lekey_done
1462	$PTR_SUB $key,10*16
1463
1464.align	4
1465.L192bits:
1466	.set	reorder
1467	srl	$i0,$rk5,16
1468	srl	$i1,$rk5,8
1469	and	$i0,0xff
1470	and	$i1,0xff
1471	and	$i2,$rk5,0xff
1472	srl	$i3,$rk5,24
1473	$PTR_ADD $i0,$Tbl
1474	$PTR_ADD $i1,$Tbl
1475	$PTR_ADD $i2,$Tbl
1476	$PTR_ADD $i3,$Tbl
1477	lbu	$i0,0($i0)
1478	lbu	$i1,0($i1)
1479	lbu	$i2,0($i2)
1480	lbu	$i3,0($i3)
1481
1482	sw	$rk0,0($key)
1483	sw	$rk1,4($key)
1484	sw	$rk2,8($key)
1485	sw	$rk3,12($key)
1486	sw	$rk4,16($key)
1487	sw	$rk5,20($key)
1488	subu	$cnt,1
1489	$PTR_ADD $key,24
1490
1491	_bias	$i0,24
1492	_bias	$i1,16
1493	_bias	$i2,8
1494	_bias	$i3,0
1495
1496	xor	$rk0,$i0
1497	lw	$i0,0($rcon)
1498	xor	$rk0,$i1
1499	xor	$rk0,$i2
1500	xor	$rk0,$i3
1501	xor	$rk0,$i0
1502
1503	xor	$rk1,$rk0
1504	xor	$rk2,$rk1
1505	xor	$rk3,$rk2
1506	xor	$rk4,$rk3
1507	xor	$rk5,$rk4
1508
1509	.set	noreorder
1510	bnez	$cnt,.L192bits
1511	$PTR_ADD $rcon,4
1512
1513	sw	$rk0,0($key)
1514	sw	$rk1,4($key)
1515	sw	$rk2,8($key)
1516	li	$cnt,12
1517	sw	$rk3,12($key)
1518	li	$t0,0
1519	sw	$cnt,48($key)
1520	b	.Lekey_done
1521	$PTR_SUB $key,12*16
1522
1523.align	4
1524.L256bits:
1525	.set	reorder
1526	srl	$i0,$rk7,16
1527	srl	$i1,$rk7,8
1528	and	$i0,0xff
1529	and	$i1,0xff
1530	and	$i2,$rk7,0xff
1531	srl	$i3,$rk7,24
1532	$PTR_ADD $i0,$Tbl
1533	$PTR_ADD $i1,$Tbl
1534	$PTR_ADD $i2,$Tbl
1535	$PTR_ADD $i3,$Tbl
1536	lbu	$i0,0($i0)
1537	lbu	$i1,0($i1)
1538	lbu	$i2,0($i2)
1539	lbu	$i3,0($i3)
1540
1541	sw	$rk0,0($key)
1542	sw	$rk1,4($key)
1543	sw	$rk2,8($key)
1544	sw	$rk3,12($key)
1545	sw	$rk4,16($key)
1546	sw	$rk5,20($key)
1547	sw	$rk6,24($key)
1548	sw	$rk7,28($key)
1549	subu	$cnt,1
1550
1551	_bias	$i0,24
1552	_bias	$i1,16
1553	_bias	$i2,8
1554	_bias	$i3,0
1555
1556	xor	$rk0,$i0
1557	lw	$i0,0($rcon)
1558	xor	$rk0,$i1
1559	xor	$rk0,$i2
1560	xor	$rk0,$i3
1561	xor	$rk0,$i0
1562
1563	xor	$rk1,$rk0
1564	xor	$rk2,$rk1
1565	xor	$rk3,$rk2
1566	beqz	$cnt,.L256bits_done
1567
1568	srl	$i0,$rk3,24
1569	srl	$i1,$rk3,16
1570	srl	$i2,$rk3,8
1571	and	$i3,$rk3,0xff
1572	and	$i1,0xff
1573	and	$i2,0xff
1574	$PTR_ADD $i0,$Tbl
1575	$PTR_ADD $i1,$Tbl
1576	$PTR_ADD $i2,$Tbl
1577	$PTR_ADD $i3,$Tbl
1578	lbu	$i0,0($i0)
1579	lbu	$i1,0($i1)
1580	lbu	$i2,0($i2)
1581	lbu	$i3,0($i3)
1582	sll	$i0,24
1583	sll	$i1,16
1584	sll	$i2,8
1585
1586	xor	$rk4,$i0
1587	xor	$rk4,$i1
1588	xor	$rk4,$i2
1589	xor	$rk4,$i3
1590
1591	xor	$rk5,$rk4
1592	xor	$rk6,$rk5
1593	xor	$rk7,$rk6
1594
1595	$PTR_ADD $key,32
1596	.set	noreorder
1597	b	.L256bits
1598	$PTR_ADD $rcon,4
1599
1600.L256bits_done:
1601	sw	$rk0,32($key)
1602	sw	$rk1,36($key)
1603	sw	$rk2,40($key)
1604	li	$cnt,14
1605	sw	$rk3,44($key)
1606	li	$t0,0
1607	sw	$cnt,48($key)
1608	$PTR_SUB $key,12*16
1609
1610.Lekey_done:
1611	jr	$ra
1612	nop
1613.end	_mips_AES_set_encrypt_key
1614
1615.globl	AES_set_encrypt_key
1616.ent	AES_set_encrypt_key
1617AES_set_encrypt_key:
1618	.frame	$sp,$FRAMESIZE,$ra
1619	.mask	$SAVED_REGS_MASK,-$SZREG
1620	.set	noreorder
1621___
1622$code.=<<___ if ($flavour =~ /o32/i);	# o32 PIC-ification
1623	.cpload	$pf
1624___
1625$code.=<<___;
1626	$PTR_SUB $sp,$FRAMESIZE
1627	$REG_S	$ra,$FRAMESIZE-1*$SZREG($sp)
1628	$REG_S	$fp,$FRAMESIZE-2*$SZREG($sp)
1629___
1630$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
1631	$REG_S	$s3,$FRAMESIZE-3*$SZREG($sp)
1632	$REG_S	$s2,$FRAMESIZE-4*$SZREG($sp)
1633	$REG_S	$s1,$FRAMESIZE-5*$SZREG($sp)
1634	$REG_S	$s0,$FRAMESIZE-6*$SZREG($sp)
1635	$REG_S	$gp,$FRAMESIZE-7*$SZREG($sp)
1636___
1637$code.=<<___ if ($flavour !~ /o32/i);	# non-o32 PIC-ification
1638	.cplocal	$Tbl
1639	.cpsetup	$pf,$zero,AES_set_encrypt_key
1640___
1641$code.=<<___;
1642	.set	reorder
1643	$PTR_LA	$Tbl,AES_Te4		# PIC-ified 'load address'
1644
1645	bal	_mips_AES_set_encrypt_key
1646
1647	.set	noreorder
1648	move	$a0,$t0
1649	$REG_L	$ra,$FRAMESIZE-1*$SZREG($sp)
1650	$REG_L	$fp,$FRAMESIZE-2*$SZREG($sp)
1651___
1652$code.=<<___ if ($flavour =~ /nubi/i);
1653	$REG_L	$s3,$FRAMESIZE-11*$SZREG($sp)
1654	$REG_L	$s2,$FRAMESIZE-12*$SZREG($sp)
1655	$REG_L	$s1,$FRAMESIZE-13*$SZREG($sp)
1656	$REG_L	$s0,$FRAMESIZE-14*$SZREG($sp)
1657	$REG_L	$gp,$FRAMESIZE-15*$SZREG($sp)
1658___
1659$code.=<<___;
1660	jr	$ra
1661	$PTR_ADD $sp,$FRAMESIZE
1662.end	AES_set_encrypt_key
1663___
1664
1665my ($head,$tail)=($inp,$bits);
1666my ($tp1,$tp2,$tp4,$tp8,$tp9,$tpb,$tpd,$tpe)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
1667my ($m,$x80808080,$x7f7f7f7f,$x1b1b1b1b)=($at,$t0,$t1,$t2);
1668$code.=<<___;
1669.align	5
1670.globl	AES_set_decrypt_key
1671.ent	AES_set_decrypt_key
1672AES_set_decrypt_key:
1673	.frame	$sp,$FRAMESIZE,$ra
1674	.mask	$SAVED_REGS_MASK,-$SZREG
1675	.set	noreorder
1676___
1677$code.=<<___ if ($flavour =~ /o32/i);	# o32 PIC-ification
1678	.cpload	$pf
1679___
1680$code.=<<___;
1681	$PTR_SUB $sp,$FRAMESIZE
1682	$REG_S	$ra,$FRAMESIZE-1*$SZREG($sp)
1683	$REG_S	$fp,$FRAMESIZE-2*$SZREG($sp)
1684___
1685$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
1686	$REG_S	$s3,$FRAMESIZE-3*$SZREG($sp)
1687	$REG_S	$s2,$FRAMESIZE-4*$SZREG($sp)
1688	$REG_S	$s1,$FRAMESIZE-5*$SZREG($sp)
1689	$REG_S	$s0,$FRAMESIZE-6*$SZREG($sp)
1690	$REG_S	$gp,$FRAMESIZE-7*$SZREG($sp)
1691___
1692$code.=<<___ if ($flavour !~ /o32/i);	# non-o32 PIC-ification
1693	.cplocal	$Tbl
1694	.cpsetup	$pf,$zero,AES_set_decrypt_key
1695___
1696$code.=<<___;
1697	.set	reorder
1698	$PTR_LA	$Tbl,AES_Te4		# PIC-ified 'load address'
1699
1700	bal	_mips_AES_set_encrypt_key
1701
1702	bltz	$t0,.Ldkey_done
1703
1704	sll	$at,$cnt,4
1705	$PTR_ADD $head,$key,0
1706	$PTR_ADD $tail,$key,$at
1707.align	4
1708.Lswap:
1709	lw	$rk0,0($head)
1710	lw	$rk1,4($head)
1711	lw	$rk2,8($head)
1712	lw	$rk3,12($head)
1713	lw	$rk4,0($tail)
1714	lw	$rk5,4($tail)
1715	lw	$rk6,8($tail)
1716	lw	$rk7,12($tail)
1717	sw	$rk0,0($tail)
1718	sw	$rk1,4($tail)
1719	sw	$rk2,8($tail)
1720	sw	$rk3,12($tail)
1721	$PTR_ADD $head,16
1722	$PTR_SUB $tail,16
1723	sw	$rk4,-16($head)
1724	sw	$rk5,-12($head)
1725	sw	$rk6,-8($head)
1726	sw	$rk7,-4($head)
1727	bne	$head,$tail,.Lswap
1728
1729	lw	$tp1,16($key)		# modulo-scheduled
1730	lui	$x80808080,0x8080
1731	subu	$cnt,1
1732	or	$x80808080,0x8080
1733	sll	$cnt,2
1734	$PTR_ADD $key,16
1735	lui	$x1b1b1b1b,0x1b1b
1736	nor	$x7f7f7f7f,$zero,$x80808080
1737	or	$x1b1b1b1b,0x1b1b
1738.align	4
1739.Lmix:
1740	and	$m,$tp1,$x80808080
1741	and	$tp2,$tp1,$x7f7f7f7f
1742	srl	$tp4,$m,7
1743	addu	$tp2,$tp2		# tp2<<1
1744	subu	$m,$tp4
1745	and	$m,$x1b1b1b1b
1746	xor	$tp2,$m
1747
1748	and	$m,$tp2,$x80808080
1749	and	$tp4,$tp2,$x7f7f7f7f
1750	srl	$tp8,$m,7
1751	addu	$tp4,$tp4		# tp4<<1
1752	subu	$m,$tp8
1753	and	$m,$x1b1b1b1b
1754	xor	$tp4,$m
1755
1756	and	$m,$tp4,$x80808080
1757	and	$tp8,$tp4,$x7f7f7f7f
1758	srl	$tp9,$m,7
1759	addu	$tp8,$tp8		# tp8<<1
1760	subu	$m,$tp9
1761	and	$m,$x1b1b1b1b
1762	xor	$tp8,$m
1763
1764	xor	$tp9,$tp8,$tp1
1765	xor	$tpe,$tp8,$tp4
1766	xor	$tpb,$tp9,$tp2
1767	xor	$tpd,$tp9,$tp4
1768
1769#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1770	rotr	$tp1,$tpd,16
1771	 xor	$tpe,$tp2
1772	rotr	$tp2,$tp9,8
1773	xor	$tpe,$tp1
1774	rotr	$tp4,$tpb,24
1775	xor	$tpe,$tp2
1776	lw	$tp1,4($key)		# modulo-scheduled
1777	xor	$tpe,$tp4
1778#else
1779	_ror	$tp1,$tpd,16
1780	 xor	$tpe,$tp2
1781	_ror	$tp2,$tpd,-16
1782	xor	$tpe,$tp1
1783	_ror	$tp1,$tp9,8
1784	xor	$tpe,$tp2
1785	_ror	$tp2,$tp9,-24
1786	xor	$tpe,$tp1
1787	_ror	$tp1,$tpb,24
1788	xor	$tpe,$tp2
1789	_ror	$tp2,$tpb,-8
1790	xor	$tpe,$tp1
1791	lw	$tp1,4($key)		# modulo-scheduled
1792	xor	$tpe,$tp2
1793#endif
1794	subu	$cnt,1
1795	sw	$tpe,0($key)
1796	$PTR_ADD $key,4
1797	bnez	$cnt,.Lmix
1798
1799	li	$t0,0
1800.Ldkey_done:
1801	.set	noreorder
1802	move	$a0,$t0
1803	$REG_L	$ra,$FRAMESIZE-1*$SZREG($sp)
1804	$REG_L	$fp,$FRAMESIZE-2*$SZREG($sp)
1805___
1806$code.=<<___ if ($flavour =~ /nubi/i);
1807	$REG_L	$s3,$FRAMESIZE-11*$SZREG($sp)
1808	$REG_L	$s2,$FRAMESIZE-12*$SZREG($sp)
1809	$REG_L	$s1,$FRAMESIZE-13*$SZREG($sp)
1810	$REG_L	$s0,$FRAMESIZE-14*$SZREG($sp)
1811	$REG_L	$gp,$FRAMESIZE-15*$SZREG($sp)
1812___
1813$code.=<<___;
1814	jr	$ra
1815	$PTR_ADD $sp,$FRAMESIZE
1816.end	AES_set_decrypt_key
1817___
1818}}}
1819
1820######################################################################
1821# Tables are kept in endian-neutral manner
1822$code.=<<___;
1823.rdata
1824.align	10
1825AES_Te:
1826.byte	0xc6,0x63,0x63,0xa5,	0xf8,0x7c,0x7c,0x84	# Te0
1827.byte	0xee,0x77,0x77,0x99,	0xf6,0x7b,0x7b,0x8d
1828.byte	0xff,0xf2,0xf2,0x0d,	0xd6,0x6b,0x6b,0xbd
1829.byte	0xde,0x6f,0x6f,0xb1,	0x91,0xc5,0xc5,0x54
1830.byte	0x60,0x30,0x30,0x50,	0x02,0x01,0x01,0x03
1831.byte	0xce,0x67,0x67,0xa9,	0x56,0x2b,0x2b,0x7d
1832.byte	0xe7,0xfe,0xfe,0x19,	0xb5,0xd7,0xd7,0x62
1833.byte	0x4d,0xab,0xab,0xe6,	0xec,0x76,0x76,0x9a
1834.byte	0x8f,0xca,0xca,0x45,	0x1f,0x82,0x82,0x9d
1835.byte	0x89,0xc9,0xc9,0x40,	0xfa,0x7d,0x7d,0x87
1836.byte	0xef,0xfa,0xfa,0x15,	0xb2,0x59,0x59,0xeb
1837.byte	0x8e,0x47,0x47,0xc9,	0xfb,0xf0,0xf0,0x0b
1838.byte	0x41,0xad,0xad,0xec,	0xb3,0xd4,0xd4,0x67
1839.byte	0x5f,0xa2,0xa2,0xfd,	0x45,0xaf,0xaf,0xea
1840.byte	0x23,0x9c,0x9c,0xbf,	0x53,0xa4,0xa4,0xf7
1841.byte	0xe4,0x72,0x72,0x96,	0x9b,0xc0,0xc0,0x5b
1842.byte	0x75,0xb7,0xb7,0xc2,	0xe1,0xfd,0xfd,0x1c
1843.byte	0x3d,0x93,0x93,0xae,	0x4c,0x26,0x26,0x6a
1844.byte	0x6c,0x36,0x36,0x5a,	0x7e,0x3f,0x3f,0x41
1845.byte	0xf5,0xf7,0xf7,0x02,	0x83,0xcc,0xcc,0x4f
1846.byte	0x68,0x34,0x34,0x5c,	0x51,0xa5,0xa5,0xf4
1847.byte	0xd1,0xe5,0xe5,0x34,	0xf9,0xf1,0xf1,0x08
1848.byte	0xe2,0x71,0x71,0x93,	0xab,0xd8,0xd8,0x73
1849.byte	0x62,0x31,0x31,0x53,	0x2a,0x15,0x15,0x3f
1850.byte	0x08,0x04,0x04,0x0c,	0x95,0xc7,0xc7,0x52
1851.byte	0x46,0x23,0x23,0x65,	0x9d,0xc3,0xc3,0x5e
1852.byte	0x30,0x18,0x18,0x28,	0x37,0x96,0x96,0xa1
1853.byte	0x0a,0x05,0x05,0x0f,	0x2f,0x9a,0x9a,0xb5
1854.byte	0x0e,0x07,0x07,0x09,	0x24,0x12,0x12,0x36
1855.byte	0x1b,0x80,0x80,0x9b,	0xdf,0xe2,0xe2,0x3d
1856.byte	0xcd,0xeb,0xeb,0x26,	0x4e,0x27,0x27,0x69
1857.byte	0x7f,0xb2,0xb2,0xcd,	0xea,0x75,0x75,0x9f
1858.byte	0x12,0x09,0x09,0x1b,	0x1d,0x83,0x83,0x9e
1859.byte	0x58,0x2c,0x2c,0x74,	0x34,0x1a,0x1a,0x2e
1860.byte	0x36,0x1b,0x1b,0x2d,	0xdc,0x6e,0x6e,0xb2
1861.byte	0xb4,0x5a,0x5a,0xee,	0x5b,0xa0,0xa0,0xfb
1862.byte	0xa4,0x52,0x52,0xf6,	0x76,0x3b,0x3b,0x4d
1863.byte	0xb7,0xd6,0xd6,0x61,	0x7d,0xb3,0xb3,0xce
1864.byte	0x52,0x29,0x29,0x7b,	0xdd,0xe3,0xe3,0x3e
1865.byte	0x5e,0x2f,0x2f,0x71,	0x13,0x84,0x84,0x97
1866.byte	0xa6,0x53,0x53,0xf5,	0xb9,0xd1,0xd1,0x68
1867.byte	0x00,0x00,0x00,0x00,	0xc1,0xed,0xed,0x2c
1868.byte	0x40,0x20,0x20,0x60,	0xe3,0xfc,0xfc,0x1f
1869.byte	0x79,0xb1,0xb1,0xc8,	0xb6,0x5b,0x5b,0xed
1870.byte	0xd4,0x6a,0x6a,0xbe,	0x8d,0xcb,0xcb,0x46
1871.byte	0x67,0xbe,0xbe,0xd9,	0x72,0x39,0x39,0x4b
1872.byte	0x94,0x4a,0x4a,0xde,	0x98,0x4c,0x4c,0xd4
1873.byte	0xb0,0x58,0x58,0xe8,	0x85,0xcf,0xcf,0x4a
1874.byte	0xbb,0xd0,0xd0,0x6b,	0xc5,0xef,0xef,0x2a
1875.byte	0x4f,0xaa,0xaa,0xe5,	0xed,0xfb,0xfb,0x16
1876.byte	0x86,0x43,0x43,0xc5,	0x9a,0x4d,0x4d,0xd7
1877.byte	0x66,0x33,0x33,0x55,	0x11,0x85,0x85,0x94
1878.byte	0x8a,0x45,0x45,0xcf,	0xe9,0xf9,0xf9,0x10
1879.byte	0x04,0x02,0x02,0x06,	0xfe,0x7f,0x7f,0x81
1880.byte	0xa0,0x50,0x50,0xf0,	0x78,0x3c,0x3c,0x44
1881.byte	0x25,0x9f,0x9f,0xba,	0x4b,0xa8,0xa8,0xe3
1882.byte	0xa2,0x51,0x51,0xf3,	0x5d,0xa3,0xa3,0xfe
1883.byte	0x80,0x40,0x40,0xc0,	0x05,0x8f,0x8f,0x8a
1884.byte	0x3f,0x92,0x92,0xad,	0x21,0x9d,0x9d,0xbc
1885.byte	0x70,0x38,0x38,0x48,	0xf1,0xf5,0xf5,0x04
1886.byte	0x63,0xbc,0xbc,0xdf,	0x77,0xb6,0xb6,0xc1
1887.byte	0xaf,0xda,0xda,0x75,	0x42,0x21,0x21,0x63
1888.byte	0x20,0x10,0x10,0x30,	0xe5,0xff,0xff,0x1a
1889.byte	0xfd,0xf3,0xf3,0x0e,	0xbf,0xd2,0xd2,0x6d
1890.byte	0x81,0xcd,0xcd,0x4c,	0x18,0x0c,0x0c,0x14
1891.byte	0x26,0x13,0x13,0x35,	0xc3,0xec,0xec,0x2f
1892.byte	0xbe,0x5f,0x5f,0xe1,	0x35,0x97,0x97,0xa2
1893.byte	0x88,0x44,0x44,0xcc,	0x2e,0x17,0x17,0x39
1894.byte	0x93,0xc4,0xc4,0x57,	0x55,0xa7,0xa7,0xf2
1895.byte	0xfc,0x7e,0x7e,0x82,	0x7a,0x3d,0x3d,0x47
1896.byte	0xc8,0x64,0x64,0xac,	0xba,0x5d,0x5d,0xe7
1897.byte	0x32,0x19,0x19,0x2b,	0xe6,0x73,0x73,0x95
1898.byte	0xc0,0x60,0x60,0xa0,	0x19,0x81,0x81,0x98
1899.byte	0x9e,0x4f,0x4f,0xd1,	0xa3,0xdc,0xdc,0x7f
1900.byte	0x44,0x22,0x22,0x66,	0x54,0x2a,0x2a,0x7e
1901.byte	0x3b,0x90,0x90,0xab,	0x0b,0x88,0x88,0x83
1902.byte	0x8c,0x46,0x46,0xca,	0xc7,0xee,0xee,0x29
1903.byte	0x6b,0xb8,0xb8,0xd3,	0x28,0x14,0x14,0x3c
1904.byte	0xa7,0xde,0xde,0x79,	0xbc,0x5e,0x5e,0xe2
1905.byte	0x16,0x0b,0x0b,0x1d,	0xad,0xdb,0xdb,0x76
1906.byte	0xdb,0xe0,0xe0,0x3b,	0x64,0x32,0x32,0x56
1907.byte	0x74,0x3a,0x3a,0x4e,	0x14,0x0a,0x0a,0x1e
1908.byte	0x92,0x49,0x49,0xdb,	0x0c,0x06,0x06,0x0a
1909.byte	0x48,0x24,0x24,0x6c,	0xb8,0x5c,0x5c,0xe4
1910.byte	0x9f,0xc2,0xc2,0x5d,	0xbd,0xd3,0xd3,0x6e
1911.byte	0x43,0xac,0xac,0xef,	0xc4,0x62,0x62,0xa6
1912.byte	0x39,0x91,0x91,0xa8,	0x31,0x95,0x95,0xa4
1913.byte	0xd3,0xe4,0xe4,0x37,	0xf2,0x79,0x79,0x8b
1914.byte	0xd5,0xe7,0xe7,0x32,	0x8b,0xc8,0xc8,0x43
1915.byte	0x6e,0x37,0x37,0x59,	0xda,0x6d,0x6d,0xb7
1916.byte	0x01,0x8d,0x8d,0x8c,	0xb1,0xd5,0xd5,0x64
1917.byte	0x9c,0x4e,0x4e,0xd2,	0x49,0xa9,0xa9,0xe0
1918.byte	0xd8,0x6c,0x6c,0xb4,	0xac,0x56,0x56,0xfa
1919.byte	0xf3,0xf4,0xf4,0x07,	0xcf,0xea,0xea,0x25
1920.byte	0xca,0x65,0x65,0xaf,	0xf4,0x7a,0x7a,0x8e
1921.byte	0x47,0xae,0xae,0xe9,	0x10,0x08,0x08,0x18
1922.byte	0x6f,0xba,0xba,0xd5,	0xf0,0x78,0x78,0x88
1923.byte	0x4a,0x25,0x25,0x6f,	0x5c,0x2e,0x2e,0x72
1924.byte	0x38,0x1c,0x1c,0x24,	0x57,0xa6,0xa6,0xf1
1925.byte	0x73,0xb4,0xb4,0xc7,	0x97,0xc6,0xc6,0x51
1926.byte	0xcb,0xe8,0xe8,0x23,	0xa1,0xdd,0xdd,0x7c
1927.byte	0xe8,0x74,0x74,0x9c,	0x3e,0x1f,0x1f,0x21
1928.byte	0x96,0x4b,0x4b,0xdd,	0x61,0xbd,0xbd,0xdc
1929.byte	0x0d,0x8b,0x8b,0x86,	0x0f,0x8a,0x8a,0x85
1930.byte	0xe0,0x70,0x70,0x90,	0x7c,0x3e,0x3e,0x42
1931.byte	0x71,0xb5,0xb5,0xc4,	0xcc,0x66,0x66,0xaa
1932.byte	0x90,0x48,0x48,0xd8,	0x06,0x03,0x03,0x05
1933.byte	0xf7,0xf6,0xf6,0x01,	0x1c,0x0e,0x0e,0x12
1934.byte	0xc2,0x61,0x61,0xa3,	0x6a,0x35,0x35,0x5f
1935.byte	0xae,0x57,0x57,0xf9,	0x69,0xb9,0xb9,0xd0
1936.byte	0x17,0x86,0x86,0x91,	0x99,0xc1,0xc1,0x58
1937.byte	0x3a,0x1d,0x1d,0x27,	0x27,0x9e,0x9e,0xb9
1938.byte	0xd9,0xe1,0xe1,0x38,	0xeb,0xf8,0xf8,0x13
1939.byte	0x2b,0x98,0x98,0xb3,	0x22,0x11,0x11,0x33
1940.byte	0xd2,0x69,0x69,0xbb,	0xa9,0xd9,0xd9,0x70
1941.byte	0x07,0x8e,0x8e,0x89,	0x33,0x94,0x94,0xa7
1942.byte	0x2d,0x9b,0x9b,0xb6,	0x3c,0x1e,0x1e,0x22
1943.byte	0x15,0x87,0x87,0x92,	0xc9,0xe9,0xe9,0x20
1944.byte	0x87,0xce,0xce,0x49,	0xaa,0x55,0x55,0xff
1945.byte	0x50,0x28,0x28,0x78,	0xa5,0xdf,0xdf,0x7a
1946.byte	0x03,0x8c,0x8c,0x8f,	0x59,0xa1,0xa1,0xf8
1947.byte	0x09,0x89,0x89,0x80,	0x1a,0x0d,0x0d,0x17
1948.byte	0x65,0xbf,0xbf,0xda,	0xd7,0xe6,0xe6,0x31
1949.byte	0x84,0x42,0x42,0xc6,	0xd0,0x68,0x68,0xb8
1950.byte	0x82,0x41,0x41,0xc3,	0x29,0x99,0x99,0xb0
1951.byte	0x5a,0x2d,0x2d,0x77,	0x1e,0x0f,0x0f,0x11
1952.byte	0x7b,0xb0,0xb0,0xcb,	0xa8,0x54,0x54,0xfc
1953.byte	0x6d,0xbb,0xbb,0xd6,	0x2c,0x16,0x16,0x3a
1954
1955AES_Td:
1956.byte	0x51,0xf4,0xa7,0x50,	0x7e,0x41,0x65,0x53	# Td0
1957.byte	0x1a,0x17,0xa4,0xc3,	0x3a,0x27,0x5e,0x96
1958.byte	0x3b,0xab,0x6b,0xcb,	0x1f,0x9d,0x45,0xf1
1959.byte	0xac,0xfa,0x58,0xab,	0x4b,0xe3,0x03,0x93
1960.byte	0x20,0x30,0xfa,0x55,	0xad,0x76,0x6d,0xf6
1961.byte	0x88,0xcc,0x76,0x91,	0xf5,0x02,0x4c,0x25
1962.byte	0x4f,0xe5,0xd7,0xfc,	0xc5,0x2a,0xcb,0xd7
1963.byte	0x26,0x35,0x44,0x80,	0xb5,0x62,0xa3,0x8f
1964.byte	0xde,0xb1,0x5a,0x49,	0x25,0xba,0x1b,0x67
1965.byte	0x45,0xea,0x0e,0x98,	0x5d,0xfe,0xc0,0xe1
1966.byte	0xc3,0x2f,0x75,0x02,	0x81,0x4c,0xf0,0x12
1967.byte	0x8d,0x46,0x97,0xa3,	0x6b,0xd3,0xf9,0xc6
1968.byte	0x03,0x8f,0x5f,0xe7,	0x15,0x92,0x9c,0x95
1969.byte	0xbf,0x6d,0x7a,0xeb,	0x95,0x52,0x59,0xda
1970.byte	0xd4,0xbe,0x83,0x2d,	0x58,0x74,0x21,0xd3
1971.byte	0x49,0xe0,0x69,0x29,	0x8e,0xc9,0xc8,0x44
1972.byte	0x75,0xc2,0x89,0x6a,	0xf4,0x8e,0x79,0x78
1973.byte	0x99,0x58,0x3e,0x6b,	0x27,0xb9,0x71,0xdd
1974.byte	0xbe,0xe1,0x4f,0xb6,	0xf0,0x88,0xad,0x17
1975.byte	0xc9,0x20,0xac,0x66,	0x7d,0xce,0x3a,0xb4
1976.byte	0x63,0xdf,0x4a,0x18,	0xe5,0x1a,0x31,0x82
1977.byte	0x97,0x51,0x33,0x60,	0x62,0x53,0x7f,0x45
1978.byte	0xb1,0x64,0x77,0xe0,	0xbb,0x6b,0xae,0x84
1979.byte	0xfe,0x81,0xa0,0x1c,	0xf9,0x08,0x2b,0x94
1980.byte	0x70,0x48,0x68,0x58,	0x8f,0x45,0xfd,0x19
1981.byte	0x94,0xde,0x6c,0x87,	0x52,0x7b,0xf8,0xb7
1982.byte	0xab,0x73,0xd3,0x23,	0x72,0x4b,0x02,0xe2
1983.byte	0xe3,0x1f,0x8f,0x57,	0x66,0x55,0xab,0x2a
1984.byte	0xb2,0xeb,0x28,0x07,	0x2f,0xb5,0xc2,0x03
1985.byte	0x86,0xc5,0x7b,0x9a,	0xd3,0x37,0x08,0xa5
1986.byte	0x30,0x28,0x87,0xf2,	0x23,0xbf,0xa5,0xb2
1987.byte	0x02,0x03,0x6a,0xba,	0xed,0x16,0x82,0x5c
1988.byte	0x8a,0xcf,0x1c,0x2b,	0xa7,0x79,0xb4,0x92
1989.byte	0xf3,0x07,0xf2,0xf0,	0x4e,0x69,0xe2,0xa1
1990.byte	0x65,0xda,0xf4,0xcd,	0x06,0x05,0xbe,0xd5
1991.byte	0xd1,0x34,0x62,0x1f,	0xc4,0xa6,0xfe,0x8a
1992.byte	0x34,0x2e,0x53,0x9d,	0xa2,0xf3,0x55,0xa0
1993.byte	0x05,0x8a,0xe1,0x32,	0xa4,0xf6,0xeb,0x75
1994.byte	0x0b,0x83,0xec,0x39,	0x40,0x60,0xef,0xaa
1995.byte	0x5e,0x71,0x9f,0x06,	0xbd,0x6e,0x10,0x51
1996.byte	0x3e,0x21,0x8a,0xf9,	0x96,0xdd,0x06,0x3d
1997.byte	0xdd,0x3e,0x05,0xae,	0x4d,0xe6,0xbd,0x46
1998.byte	0x91,0x54,0x8d,0xb5,	0x71,0xc4,0x5d,0x05
1999.byte	0x04,0x06,0xd4,0x6f,	0x60,0x50,0x15,0xff
2000.byte	0x19,0x98,0xfb,0x24,	0xd6,0xbd,0xe9,0x97
2001.byte	0x89,0x40,0x43,0xcc,	0x67,0xd9,0x9e,0x77
2002.byte	0xb0,0xe8,0x42,0xbd,	0x07,0x89,0x8b,0x88
2003.byte	0xe7,0x19,0x5b,0x38,	0x79,0xc8,0xee,0xdb
2004.byte	0xa1,0x7c,0x0a,0x47,	0x7c,0x42,0x0f,0xe9
2005.byte	0xf8,0x84,0x1e,0xc9,	0x00,0x00,0x00,0x00
2006.byte	0x09,0x80,0x86,0x83,	0x32,0x2b,0xed,0x48
2007.byte	0x1e,0x11,0x70,0xac,	0x6c,0x5a,0x72,0x4e
2008.byte	0xfd,0x0e,0xff,0xfb,	0x0f,0x85,0x38,0x56
2009.byte	0x3d,0xae,0xd5,0x1e,	0x36,0x2d,0x39,0x27
2010.byte	0x0a,0x0f,0xd9,0x64,	0x68,0x5c,0xa6,0x21
2011.byte	0x9b,0x5b,0x54,0xd1,	0x24,0x36,0x2e,0x3a
2012.byte	0x0c,0x0a,0x67,0xb1,	0x93,0x57,0xe7,0x0f
2013.byte	0xb4,0xee,0x96,0xd2,	0x1b,0x9b,0x91,0x9e
2014.byte	0x80,0xc0,0xc5,0x4f,	0x61,0xdc,0x20,0xa2
2015.byte	0x5a,0x77,0x4b,0x69,	0x1c,0x12,0x1a,0x16
2016.byte	0xe2,0x93,0xba,0x0a,	0xc0,0xa0,0x2a,0xe5
2017.byte	0x3c,0x22,0xe0,0x43,	0x12,0x1b,0x17,0x1d
2018.byte	0x0e,0x09,0x0d,0x0b,	0xf2,0x8b,0xc7,0xad
2019.byte	0x2d,0xb6,0xa8,0xb9,	0x14,0x1e,0xa9,0xc8
2020.byte	0x57,0xf1,0x19,0x85,	0xaf,0x75,0x07,0x4c
2021.byte	0xee,0x99,0xdd,0xbb,	0xa3,0x7f,0x60,0xfd
2022.byte	0xf7,0x01,0x26,0x9f,	0x5c,0x72,0xf5,0xbc
2023.byte	0x44,0x66,0x3b,0xc5,	0x5b,0xfb,0x7e,0x34
2024.byte	0x8b,0x43,0x29,0x76,	0xcb,0x23,0xc6,0xdc
2025.byte	0xb6,0xed,0xfc,0x68,	0xb8,0xe4,0xf1,0x63
2026.byte	0xd7,0x31,0xdc,0xca,	0x42,0x63,0x85,0x10
2027.byte	0x13,0x97,0x22,0x40,	0x84,0xc6,0x11,0x20
2028.byte	0x85,0x4a,0x24,0x7d,	0xd2,0xbb,0x3d,0xf8
2029.byte	0xae,0xf9,0x32,0x11,	0xc7,0x29,0xa1,0x6d
2030.byte	0x1d,0x9e,0x2f,0x4b,	0xdc,0xb2,0x30,0xf3
2031.byte	0x0d,0x86,0x52,0xec,	0x77,0xc1,0xe3,0xd0
2032.byte	0x2b,0xb3,0x16,0x6c,	0xa9,0x70,0xb9,0x99
2033.byte	0x11,0x94,0x48,0xfa,	0x47,0xe9,0x64,0x22
2034.byte	0xa8,0xfc,0x8c,0xc4,	0xa0,0xf0,0x3f,0x1a
2035.byte	0x56,0x7d,0x2c,0xd8,	0x22,0x33,0x90,0xef
2036.byte	0x87,0x49,0x4e,0xc7,	0xd9,0x38,0xd1,0xc1
2037.byte	0x8c,0xca,0xa2,0xfe,	0x98,0xd4,0x0b,0x36
2038.byte	0xa6,0xf5,0x81,0xcf,	0xa5,0x7a,0xde,0x28
2039.byte	0xda,0xb7,0x8e,0x26,	0x3f,0xad,0xbf,0xa4
2040.byte	0x2c,0x3a,0x9d,0xe4,	0x50,0x78,0x92,0x0d
2041.byte	0x6a,0x5f,0xcc,0x9b,	0x54,0x7e,0x46,0x62
2042.byte	0xf6,0x8d,0x13,0xc2,	0x90,0xd8,0xb8,0xe8
2043.byte	0x2e,0x39,0xf7,0x5e,	0x82,0xc3,0xaf,0xf5
2044.byte	0x9f,0x5d,0x80,0xbe,	0x69,0xd0,0x93,0x7c
2045.byte	0x6f,0xd5,0x2d,0xa9,	0xcf,0x25,0x12,0xb3
2046.byte	0xc8,0xac,0x99,0x3b,	0x10,0x18,0x7d,0xa7
2047.byte	0xe8,0x9c,0x63,0x6e,	0xdb,0x3b,0xbb,0x7b
2048.byte	0xcd,0x26,0x78,0x09,	0x6e,0x59,0x18,0xf4
2049.byte	0xec,0x9a,0xb7,0x01,	0x83,0x4f,0x9a,0xa8
2050.byte	0xe6,0x95,0x6e,0x65,	0xaa,0xff,0xe6,0x7e
2051.byte	0x21,0xbc,0xcf,0x08,	0xef,0x15,0xe8,0xe6
2052.byte	0xba,0xe7,0x9b,0xd9,	0x4a,0x6f,0x36,0xce
2053.byte	0xea,0x9f,0x09,0xd4,	0x29,0xb0,0x7c,0xd6
2054.byte	0x31,0xa4,0xb2,0xaf,	0x2a,0x3f,0x23,0x31
2055.byte	0xc6,0xa5,0x94,0x30,	0x35,0xa2,0x66,0xc0
2056.byte	0x74,0x4e,0xbc,0x37,	0xfc,0x82,0xca,0xa6
2057.byte	0xe0,0x90,0xd0,0xb0,	0x33,0xa7,0xd8,0x15
2058.byte	0xf1,0x04,0x98,0x4a,	0x41,0xec,0xda,0xf7
2059.byte	0x7f,0xcd,0x50,0x0e,	0x17,0x91,0xf6,0x2f
2060.byte	0x76,0x4d,0xd6,0x8d,	0x43,0xef,0xb0,0x4d
2061.byte	0xcc,0xaa,0x4d,0x54,	0xe4,0x96,0x04,0xdf
2062.byte	0x9e,0xd1,0xb5,0xe3,	0x4c,0x6a,0x88,0x1b
2063.byte	0xc1,0x2c,0x1f,0xb8,	0x46,0x65,0x51,0x7f
2064.byte	0x9d,0x5e,0xea,0x04,	0x01,0x8c,0x35,0x5d
2065.byte	0xfa,0x87,0x74,0x73,	0xfb,0x0b,0x41,0x2e
2066.byte	0xb3,0x67,0x1d,0x5a,	0x92,0xdb,0xd2,0x52
2067.byte	0xe9,0x10,0x56,0x33,	0x6d,0xd6,0x47,0x13
2068.byte	0x9a,0xd7,0x61,0x8c,	0x37,0xa1,0x0c,0x7a
2069.byte	0x59,0xf8,0x14,0x8e,	0xeb,0x13,0x3c,0x89
2070.byte	0xce,0xa9,0x27,0xee,	0xb7,0x61,0xc9,0x35
2071.byte	0xe1,0x1c,0xe5,0xed,	0x7a,0x47,0xb1,0x3c
2072.byte	0x9c,0xd2,0xdf,0x59,	0x55,0xf2,0x73,0x3f
2073.byte	0x18,0x14,0xce,0x79,	0x73,0xc7,0x37,0xbf
2074.byte	0x53,0xf7,0xcd,0xea,	0x5f,0xfd,0xaa,0x5b
2075.byte	0xdf,0x3d,0x6f,0x14,	0x78,0x44,0xdb,0x86
2076.byte	0xca,0xaf,0xf3,0x81,	0xb9,0x68,0xc4,0x3e
2077.byte	0x38,0x24,0x34,0x2c,	0xc2,0xa3,0x40,0x5f
2078.byte	0x16,0x1d,0xc3,0x72,	0xbc,0xe2,0x25,0x0c
2079.byte	0x28,0x3c,0x49,0x8b,	0xff,0x0d,0x95,0x41
2080.byte	0x39,0xa8,0x01,0x71,	0x08,0x0c,0xb3,0xde
2081.byte	0xd8,0xb4,0xe4,0x9c,	0x64,0x56,0xc1,0x90
2082.byte	0x7b,0xcb,0x84,0x61,	0xd5,0x32,0xb6,0x70
2083.byte	0x48,0x6c,0x5c,0x74,	0xd0,0xb8,0x57,0x42
2084
2085.byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38	# Td4
2086.byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
2087.byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
2088.byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
2089.byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
2090.byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
2091.byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
2092.byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
2093.byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
2094.byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
2095.byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
2096.byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
2097.byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
2098.byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
2099.byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
2100.byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
2101.byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
2102.byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
2103.byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
2104.byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
2105.byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
2106.byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
2107.byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
2108.byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
2109.byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
2110.byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
2111.byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
2112.byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
2113.byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
2114.byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
2115.byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
2116.byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
2117
2118AES_Te4:
2119.byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5	# Te4
2120.byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
2121.byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
2122.byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
2123.byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
2124.byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
2125.byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
2126.byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
2127.byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
2128.byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
2129.byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
2130.byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
2131.byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
2132.byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
2133.byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
2134.byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
2135.byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
2136.byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
2137.byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
2138.byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
2139.byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
2140.byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
2141.byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
2142.byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
2143.byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
2144.byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
2145.byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
2146.byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
2147.byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
2148.byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
2149.byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
2150.byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
2151
2152.byte	0x01,0x00,0x00,0x00,	0x02,0x00,0x00,0x00	# rcon
2153.byte	0x04,0x00,0x00,0x00,	0x08,0x00,0x00,0x00
2154.byte	0x10,0x00,0x00,0x00,	0x20,0x00,0x00,0x00
2155.byte	0x40,0x00,0x00,0x00,	0x80,0x00,0x00,0x00
2156.byte	0x1B,0x00,0x00,0x00,	0x36,0x00,0x00,0x00
2157___
2158
2159foreach (split("\n",$code)) {
2160	s/\`([^\`]*)\`/eval $1/ge;
2161
2162	# made-up _instructions, _xtr, _ins, _ror and _bias, cope
2163	# with byte order dependencies...
2164	if (/^\s+_/) {
2165	    s/(_[a-z]+\s+)(\$[0-9]+),([^,]+)(#.*)*$/$1$2,$2,$3/;
2166
2167	    s/_xtr\s+(\$[0-9]+),(\$[0-9]+),([0-9]+(\-2)*)/
2168		sprintf("srl\t$1,$2,%d",$big_endian ?	eval($3)
2169					:		eval("24-$3"))/e or
2170	    s/_ins\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
2171		sprintf("sll\t$1,$2,%d",$big_endian ?	eval($3)
2172					:		eval("24-$3"))/e or
2173	    s/_ins2\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
2174		sprintf("ins\t$1,$2,%d,8",$big_endian ?	eval($3)
2175					:		eval("24-$3"))/e or
2176	    s/_ror\s+(\$[0-9]+),(\$[0-9]+),(\-?[0-9]+)/
2177		sprintf("srl\t$1,$2,%d",$big_endian ?	eval($3)
2178					:		eval("$3*-1"))/e or
2179	    s/_bias\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
2180		sprintf("sll\t$1,$2,%d",$big_endian ?	eval($3)
2181					:		eval("($3-16)&31"))/e;
2182
2183	    s/srl\s+(\$[0-9]+),(\$[0-9]+),\-([0-9]+)/
2184		sprintf("sll\t$1,$2,$3")/e				or
2185	    s/srl\s+(\$[0-9]+),(\$[0-9]+),0/
2186		sprintf("and\t$1,$2,0xff")/e				or
2187	    s/(sll\s+\$[0-9]+,\$[0-9]+,0)/#$1/;
2188	}
2189
2190	# convert lwl/lwr and swr/swl to little-endian order
2191	if (!$big_endian && /^\s+[sl]w[lr]\s+/) {
2192	    s/([sl]wl.*)([0-9]+)\((\$[0-9]+)\)/
2193		sprintf("$1%d($3)",eval("$2-$2%4+($2%4-1)&3"))/e	or
2194	    s/([sl]wr.*)([0-9]+)\((\$[0-9]+)\)/
2195		sprintf("$1%d($3)",eval("$2-$2%4+($2%4+1)&3"))/e;
2196	}
2197
2198	if (!$big_endian) {
2199	    s/(rotr\s+\$[0-9]+,\$[0-9]+),([0-9]+)/sprintf("$1,%d",32-$2)/e;
2200	    s/(ext\s+\$[0-9]+,\$[0-9]+),([0-9]+),8/sprintf("$1,%d,8",24-$2)/e;
2201	}
2202
2203	print $_,"\n";
2204}
2205
2206close STDOUT or die "error closing STDOUT: $!";
2207