xref: /openssl/crypto/aes/asm/aes-ppc.pl (revision 33388b44)
1#! /usr/bin/env perl
2# Copyright 2007-2020 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the Apache License 2.0 (the "License").  You may not use
5# this file except in compliance with the License.  You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9
10# ====================================================================
11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12# project. The module is, however, dual licensed under OpenSSL and
13# CRYPTOGAMS licenses depending on where you obtain it. For further
14# details see http://www.openssl.org/~appro/cryptogams/.
15# ====================================================================
16
17# Needs more work: key setup, CBC routine...
18#
19# ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
20# 128-bit key, which is ~40% better than 64-bit code generated by gcc
21# 4.0. But these are not the ones currently used! Their "compact"
22# counterparts are, for security reason. ppc_AES_encrypt_compact runs
23# at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
24# at 1/3 of ppc_AES_decrypt.
25
26# February 2010
27#
28# Rescheduling instructions to favour Power6 pipeline gave 10%
29# performance improvement on the platform in question (and marginal
30# improvement even on others). It should be noted that Power6 fails
31# to process byte in 18 cycles, only in 23, because it fails to issue
32# 4 load instructions in two cycles, only in 3. As result non-compact
33# block subroutines are 25% slower than one would expect. Compact
34# functions scale better, because they have pure computational part,
35# which scales perfectly with clock frequency. To be specific
36# ppc_AES_encrypt_compact operates at 42 cycles per byte, while
37# ppc_AES_decrypt_compact - at 55 (in 64-bit build).
38
39# $output is the last argument if it looks like a file (it has an extension)
40# $flavour is the first argument if it doesn't look like a file
41$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
42$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
43
44if ($flavour =~ /64/) {
45	$SIZE_T	=8;
46	$LRSAVE	=2*$SIZE_T;
47	$STU	="stdu";
48	$POP	="ld";
49	$PUSH	="std";
50} elsif ($flavour =~ /32/) {
51	$SIZE_T	=4;
52	$LRSAVE	=$SIZE_T;
53	$STU	="stwu";
54	$POP	="lwz";
55	$PUSH	="stw";
56} else { die "nonsense $flavour"; }
57
58$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
59
60$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
61( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
62( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
63die "can't locate ppc-xlate.pl";
64
65open STDOUT,"| $^X $xlate $flavour \"$output\""
66    or die "can't call $xlate: $!";
67
68$FRAME=32*$SIZE_T;
69
70sub _data_word()
71{ my $i;
72    while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
73}
74
75$sp="r1";
76$toc="r2";
77$inp="r3";
78$out="r4";
79$key="r5";
80
81$Tbl0="r3";
82$Tbl1="r6";
83$Tbl2="r7";
84$Tbl3=$out;	# stay away from "r2"; $out is offloaded to stack
85
86$s0="r8";
87$s1="r9";
88$s2="r10";
89$s3="r11";
90
91$t0="r12";
92$t1="r0";	# stay away from "r13";
93$t2="r14";
94$t3="r15";
95
96$acc00="r16";
97$acc01="r17";
98$acc02="r18";
99$acc03="r19";
100
101$acc04="r20";
102$acc05="r21";
103$acc06="r22";
104$acc07="r23";
105
106$acc08="r24";
107$acc09="r25";
108$acc10="r26";
109$acc11="r27";
110
111$acc12="r28";
112$acc13="r29";
113$acc14="r30";
114$acc15="r31";
115
116$mask80=$Tbl2;
117$mask1b=$Tbl3;
118
119$code.=<<___;
120.machine	"any"
121.text
122
123.align	7
124LAES_Te:
125	mflr	r0
126	bcl	20,31,\$+4
127	mflr	$Tbl0	;    vvvvv "distance" between . and 1st data entry
128	addi	$Tbl0,$Tbl0,`128-8`
129	mtlr	r0
130	blr
131	.long	0
132	.byte	0,12,0x14,0,0,0,0,0
133	.space	`64-9*4`
134LAES_Td:
135	mflr	r0
136	bcl	20,31,\$+4
137	mflr	$Tbl0	;    vvvvvvvv "distance" between . and 1st data entry
138	addi	$Tbl0,$Tbl0,`128-64-8+2048+256`
139	mtlr	r0
140	blr
141	.long	0
142	.byte	0,12,0x14,0,0,0,0,0
143	.space	`128-64-9*4`
144___
145&_data_word(
146	0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
147	0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
148	0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
149	0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
150	0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
151	0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
152	0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
153	0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
154	0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
155	0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
156	0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
157	0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
158	0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
159	0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
160	0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
161	0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
162	0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
163	0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
164	0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
165	0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
166	0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
167	0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
168	0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
169	0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
170	0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
171	0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
172	0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
173	0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
174	0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
175	0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
176	0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
177	0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
178	0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
179	0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
180	0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
181	0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
182	0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
183	0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
184	0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
185	0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
186	0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
187	0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
188	0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
189	0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
190	0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
191	0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
192	0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
193	0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
194	0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
195	0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
196	0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
197	0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
198	0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
199	0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
200	0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
201	0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
202	0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
203	0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
204	0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
205	0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
206	0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
207	0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
208	0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
209	0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
210$code.=<<___;
211.byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
212.byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
213.byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
214.byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
215.byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
216.byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
217.byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
218.byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
219.byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
220.byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
221.byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
222.byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
223.byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
224.byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
225.byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
226.byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
227.byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
228.byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
229.byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
230.byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
231.byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
232.byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
233.byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
234.byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
235.byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
236.byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
237.byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
238.byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
239.byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
240.byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
241.byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
242.byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
243___
244&_data_word(
245	0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
246	0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
247	0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
248	0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
249	0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
250	0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
251	0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
252	0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
253	0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
254	0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
255	0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
256	0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
257	0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
258	0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
259	0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
260	0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
261	0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
262	0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
263	0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
264	0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
265	0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
266	0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
267	0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
268	0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
269	0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
270	0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
271	0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
272	0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
273	0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
274	0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
275	0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
276	0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
277	0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
278	0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
279	0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
280	0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
281	0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
282	0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
283	0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
284	0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
285	0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
286	0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
287	0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
288	0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
289	0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
290	0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
291	0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
292	0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
293	0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
294	0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
295	0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
296	0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
297	0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
298	0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
299	0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
300	0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
301	0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
302	0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
303	0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
304	0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
305	0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
306	0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
307	0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
308	0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
309$code.=<<___;
310.byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
311.byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
312.byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
313.byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
314.byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
315.byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
316.byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
317.byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
318.byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
319.byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
320.byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
321.byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
322.byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
323.byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
324.byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
325.byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
326.byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
327.byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
328.byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
329.byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
330.byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
331.byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
332.byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
333.byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
334.byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
335.byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
336.byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
337.byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
338.byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
339.byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
340.byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
341.byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
342
343
344.globl	.AES_encrypt
345.align	7
346.AES_encrypt:
347	$STU	$sp,-$FRAME($sp)
348	mflr	r0
349
350	$PUSH	$out,`$FRAME-$SIZE_T*19`($sp)
351	$PUSH	r14,`$FRAME-$SIZE_T*18`($sp)
352	$PUSH	r15,`$FRAME-$SIZE_T*17`($sp)
353	$PUSH	r16,`$FRAME-$SIZE_T*16`($sp)
354	$PUSH	r17,`$FRAME-$SIZE_T*15`($sp)
355	$PUSH	r18,`$FRAME-$SIZE_T*14`($sp)
356	$PUSH	r19,`$FRAME-$SIZE_T*13`($sp)
357	$PUSH	r20,`$FRAME-$SIZE_T*12`($sp)
358	$PUSH	r21,`$FRAME-$SIZE_T*11`($sp)
359	$PUSH	r22,`$FRAME-$SIZE_T*10`($sp)
360	$PUSH	r23,`$FRAME-$SIZE_T*9`($sp)
361	$PUSH	r24,`$FRAME-$SIZE_T*8`($sp)
362	$PUSH	r25,`$FRAME-$SIZE_T*7`($sp)
363	$PUSH	r26,`$FRAME-$SIZE_T*6`($sp)
364	$PUSH	r27,`$FRAME-$SIZE_T*5`($sp)
365	$PUSH	r28,`$FRAME-$SIZE_T*4`($sp)
366	$PUSH	r29,`$FRAME-$SIZE_T*3`($sp)
367	$PUSH	r30,`$FRAME-$SIZE_T*2`($sp)
368	$PUSH	r31,`$FRAME-$SIZE_T*1`($sp)
369	$PUSH	r0,`$FRAME+$LRSAVE`($sp)
370
371	andi.	$t0,$inp,3
372	andi.	$t1,$out,3
373	or.	$t0,$t0,$t1
374	bne	Lenc_unaligned
375
376Lenc_unaligned_ok:
377___
378$code.=<<___ if (!$LITTLE_ENDIAN);
379	lwz	$s0,0($inp)
380	lwz	$s1,4($inp)
381	lwz	$s2,8($inp)
382	lwz	$s3,12($inp)
383___
384$code.=<<___ if ($LITTLE_ENDIAN);
385	lwz	$t0,0($inp)
386	lwz	$t1,4($inp)
387	lwz	$t2,8($inp)
388	lwz	$t3,12($inp)
389	rotlwi	$s0,$t0,8
390	rotlwi	$s1,$t1,8
391	rotlwi	$s2,$t2,8
392	rotlwi	$s3,$t3,8
393	rlwimi	$s0,$t0,24,0,7
394	rlwimi	$s1,$t1,24,0,7
395	rlwimi	$s2,$t2,24,0,7
396	rlwimi	$s3,$t3,24,0,7
397	rlwimi	$s0,$t0,24,16,23
398	rlwimi	$s1,$t1,24,16,23
399	rlwimi	$s2,$t2,24,16,23
400	rlwimi	$s3,$t3,24,16,23
401___
402$code.=<<___;
403	bl	LAES_Te
404	bl	Lppc_AES_encrypt_compact
405	$POP	$out,`$FRAME-$SIZE_T*19`($sp)
406___
407$code.=<<___ if ($LITTLE_ENDIAN);
408	rotlwi	$t0,$s0,8
409	rotlwi	$t1,$s1,8
410	rotlwi	$t2,$s2,8
411	rotlwi	$t3,$s3,8
412	rlwimi	$t0,$s0,24,0,7
413	rlwimi	$t1,$s1,24,0,7
414	rlwimi	$t2,$s2,24,0,7
415	rlwimi	$t3,$s3,24,0,7
416	rlwimi	$t0,$s0,24,16,23
417	rlwimi	$t1,$s1,24,16,23
418	rlwimi	$t2,$s2,24,16,23
419	rlwimi	$t3,$s3,24,16,23
420	stw	$t0,0($out)
421	stw	$t1,4($out)
422	stw	$t2,8($out)
423	stw	$t3,12($out)
424___
425$code.=<<___ if (!$LITTLE_ENDIAN);
426	stw	$s0,0($out)
427	stw	$s1,4($out)
428	stw	$s2,8($out)
429	stw	$s3,12($out)
430___
431$code.=<<___;
432	b	Lenc_done
433
434Lenc_unaligned:
435	subfic	$t0,$inp,4096
436	subfic	$t1,$out,4096
437	andi.	$t0,$t0,4096-16
438	beq	Lenc_xpage
439	andi.	$t1,$t1,4096-16
440	bne	Lenc_unaligned_ok
441
442Lenc_xpage:
443	lbz	$acc00,0($inp)
444	lbz	$acc01,1($inp)
445	lbz	$acc02,2($inp)
446	lbz	$s0,3($inp)
447	lbz	$acc04,4($inp)
448	lbz	$acc05,5($inp)
449	lbz	$acc06,6($inp)
450	lbz	$s1,7($inp)
451	lbz	$acc08,8($inp)
452	lbz	$acc09,9($inp)
453	lbz	$acc10,10($inp)
454	insrwi	$s0,$acc00,8,0
455	lbz	$s2,11($inp)
456	insrwi	$s1,$acc04,8,0
457	lbz	$acc12,12($inp)
458	insrwi	$s0,$acc01,8,8
459	lbz	$acc13,13($inp)
460	insrwi	$s1,$acc05,8,8
461	lbz	$acc14,14($inp)
462	insrwi	$s0,$acc02,8,16
463	lbz	$s3,15($inp)
464	insrwi	$s1,$acc06,8,16
465	insrwi	$s2,$acc08,8,0
466	insrwi	$s3,$acc12,8,0
467	insrwi	$s2,$acc09,8,8
468	insrwi	$s3,$acc13,8,8
469	insrwi	$s2,$acc10,8,16
470	insrwi	$s3,$acc14,8,16
471
472	bl	LAES_Te
473	bl	Lppc_AES_encrypt_compact
474	$POP	$out,`$FRAME-$SIZE_T*19`($sp)
475
476	extrwi	$acc00,$s0,8,0
477	extrwi	$acc01,$s0,8,8
478	stb	$acc00,0($out)
479	extrwi	$acc02,$s0,8,16
480	stb	$acc01,1($out)
481	stb	$acc02,2($out)
482	extrwi	$acc04,$s1,8,0
483	stb	$s0,3($out)
484	extrwi	$acc05,$s1,8,8
485	stb	$acc04,4($out)
486	extrwi	$acc06,$s1,8,16
487	stb	$acc05,5($out)
488	stb	$acc06,6($out)
489	extrwi	$acc08,$s2,8,0
490	stb	$s1,7($out)
491	extrwi	$acc09,$s2,8,8
492	stb	$acc08,8($out)
493	extrwi	$acc10,$s2,8,16
494	stb	$acc09,9($out)
495	stb	$acc10,10($out)
496	extrwi	$acc12,$s3,8,0
497	stb	$s2,11($out)
498	extrwi	$acc13,$s3,8,8
499	stb	$acc12,12($out)
500	extrwi	$acc14,$s3,8,16
501	stb	$acc13,13($out)
502	stb	$acc14,14($out)
503	stb	$s3,15($out)
504
505Lenc_done:
506	$POP	r0,`$FRAME+$LRSAVE`($sp)
507	$POP	r14,`$FRAME-$SIZE_T*18`($sp)
508	$POP	r15,`$FRAME-$SIZE_T*17`($sp)
509	$POP	r16,`$FRAME-$SIZE_T*16`($sp)
510	$POP	r17,`$FRAME-$SIZE_T*15`($sp)
511	$POP	r18,`$FRAME-$SIZE_T*14`($sp)
512	$POP	r19,`$FRAME-$SIZE_T*13`($sp)
513	$POP	r20,`$FRAME-$SIZE_T*12`($sp)
514	$POP	r21,`$FRAME-$SIZE_T*11`($sp)
515	$POP	r22,`$FRAME-$SIZE_T*10`($sp)
516	$POP	r23,`$FRAME-$SIZE_T*9`($sp)
517	$POP	r24,`$FRAME-$SIZE_T*8`($sp)
518	$POP	r25,`$FRAME-$SIZE_T*7`($sp)
519	$POP	r26,`$FRAME-$SIZE_T*6`($sp)
520	$POP	r27,`$FRAME-$SIZE_T*5`($sp)
521	$POP	r28,`$FRAME-$SIZE_T*4`($sp)
522	$POP	r29,`$FRAME-$SIZE_T*3`($sp)
523	$POP	r30,`$FRAME-$SIZE_T*2`($sp)
524	$POP	r31,`$FRAME-$SIZE_T*1`($sp)
525	mtlr	r0
526	addi	$sp,$sp,$FRAME
527	blr
528	.long	0
529	.byte	0,12,4,1,0x80,18,3,0
530	.long	0
531
532.align	5
533Lppc_AES_encrypt:
534	lwz	$acc00,240($key)
535	addi	$Tbl1,$Tbl0,3
536	lwz	$t0,0($key)
537	addi	$Tbl2,$Tbl0,2
538	lwz	$t1,4($key)
539	addi	$Tbl3,$Tbl0,1
540	lwz	$t2,8($key)
541	addi	$acc00,$acc00,-1
542	lwz	$t3,12($key)
543	addi	$key,$key,16
544	xor	$s0,$s0,$t0
545	xor	$s1,$s1,$t1
546	xor	$s2,$s2,$t2
547	xor	$s3,$s3,$t3
548	mtctr	$acc00
549.align	4
550Lenc_loop:
551	rlwinm	$acc00,$s0,`32-24+3`,21,28
552	rlwinm	$acc01,$s1,`32-24+3`,21,28
553	rlwinm	$acc02,$s2,`32-24+3`,21,28
554	rlwinm	$acc03,$s3,`32-24+3`,21,28
555	lwz	$t0,0($key)
556	rlwinm	$acc04,$s1,`32-16+3`,21,28
557	lwz	$t1,4($key)
558	rlwinm	$acc05,$s2,`32-16+3`,21,28
559	lwz	$t2,8($key)
560	rlwinm	$acc06,$s3,`32-16+3`,21,28
561	lwz	$t3,12($key)
562	rlwinm	$acc07,$s0,`32-16+3`,21,28
563	lwzx	$acc00,$Tbl0,$acc00
564	rlwinm	$acc08,$s2,`32-8+3`,21,28
565	lwzx	$acc01,$Tbl0,$acc01
566	rlwinm	$acc09,$s3,`32-8+3`,21,28
567	lwzx	$acc02,$Tbl0,$acc02
568	rlwinm	$acc10,$s0,`32-8+3`,21,28
569	lwzx	$acc03,$Tbl0,$acc03
570	rlwinm	$acc11,$s1,`32-8+3`,21,28
571	lwzx	$acc04,$Tbl1,$acc04
572	rlwinm	$acc12,$s3,`0+3`,21,28
573	lwzx	$acc05,$Tbl1,$acc05
574	rlwinm	$acc13,$s0,`0+3`,21,28
575	lwzx	$acc06,$Tbl1,$acc06
576	rlwinm	$acc14,$s1,`0+3`,21,28
577	lwzx	$acc07,$Tbl1,$acc07
578	rlwinm	$acc15,$s2,`0+3`,21,28
579	lwzx	$acc08,$Tbl2,$acc08
580	xor	$t0,$t0,$acc00
581	lwzx	$acc09,$Tbl2,$acc09
582	xor	$t1,$t1,$acc01
583	lwzx	$acc10,$Tbl2,$acc10
584	xor	$t2,$t2,$acc02
585	lwzx	$acc11,$Tbl2,$acc11
586	xor	$t3,$t3,$acc03
587	lwzx	$acc12,$Tbl3,$acc12
588	xor	$t0,$t0,$acc04
589	lwzx	$acc13,$Tbl3,$acc13
590	xor	$t1,$t1,$acc05
591	lwzx	$acc14,$Tbl3,$acc14
592	xor	$t2,$t2,$acc06
593	lwzx	$acc15,$Tbl3,$acc15
594	xor	$t3,$t3,$acc07
595	xor	$t0,$t0,$acc08
596	xor	$t1,$t1,$acc09
597	xor	$t2,$t2,$acc10
598	xor	$t3,$t3,$acc11
599	xor	$s0,$t0,$acc12
600	xor	$s1,$t1,$acc13
601	xor	$s2,$t2,$acc14
602	xor	$s3,$t3,$acc15
603	addi	$key,$key,16
604	bdnz	Lenc_loop
605
606	addi	$Tbl2,$Tbl0,2048
607	nop
608	lwz	$t0,0($key)
609	rlwinm	$acc00,$s0,`32-24`,24,31
610	lwz	$t1,4($key)
611	rlwinm	$acc01,$s1,`32-24`,24,31
612	lwz	$t2,8($key)
613	rlwinm	$acc02,$s2,`32-24`,24,31
614	lwz	$t3,12($key)
615	rlwinm	$acc03,$s3,`32-24`,24,31
616	lwz	$acc08,`2048+0`($Tbl0)	! prefetch Te4
617	rlwinm	$acc04,$s1,`32-16`,24,31
618	lwz	$acc09,`2048+32`($Tbl0)
619	rlwinm	$acc05,$s2,`32-16`,24,31
620	lwz	$acc10,`2048+64`($Tbl0)
621	rlwinm	$acc06,$s3,`32-16`,24,31
622	lwz	$acc11,`2048+96`($Tbl0)
623	rlwinm	$acc07,$s0,`32-16`,24,31
624	lwz	$acc12,`2048+128`($Tbl0)
625	rlwinm	$acc08,$s2,`32-8`,24,31
626	lwz	$acc13,`2048+160`($Tbl0)
627	rlwinm	$acc09,$s3,`32-8`,24,31
628	lwz	$acc14,`2048+192`($Tbl0)
629	rlwinm	$acc10,$s0,`32-8`,24,31
630	lwz	$acc15,`2048+224`($Tbl0)
631	rlwinm	$acc11,$s1,`32-8`,24,31
632	lbzx	$acc00,$Tbl2,$acc00
633	rlwinm	$acc12,$s3,`0`,24,31
634	lbzx	$acc01,$Tbl2,$acc01
635	rlwinm	$acc13,$s0,`0`,24,31
636	lbzx	$acc02,$Tbl2,$acc02
637	rlwinm	$acc14,$s1,`0`,24,31
638	lbzx	$acc03,$Tbl2,$acc03
639	rlwinm	$acc15,$s2,`0`,24,31
640	lbzx	$acc04,$Tbl2,$acc04
641	rlwinm	$s0,$acc00,24,0,7
642	lbzx	$acc05,$Tbl2,$acc05
643	rlwinm	$s1,$acc01,24,0,7
644	lbzx	$acc06,$Tbl2,$acc06
645	rlwinm	$s2,$acc02,24,0,7
646	lbzx	$acc07,$Tbl2,$acc07
647	rlwinm	$s3,$acc03,24,0,7
648	lbzx	$acc08,$Tbl2,$acc08
649	rlwimi	$s0,$acc04,16,8,15
650	lbzx	$acc09,$Tbl2,$acc09
651	rlwimi	$s1,$acc05,16,8,15
652	lbzx	$acc10,$Tbl2,$acc10
653	rlwimi	$s2,$acc06,16,8,15
654	lbzx	$acc11,$Tbl2,$acc11
655	rlwimi	$s3,$acc07,16,8,15
656	lbzx	$acc12,$Tbl2,$acc12
657	rlwimi	$s0,$acc08,8,16,23
658	lbzx	$acc13,$Tbl2,$acc13
659	rlwimi	$s1,$acc09,8,16,23
660	lbzx	$acc14,$Tbl2,$acc14
661	rlwimi	$s2,$acc10,8,16,23
662	lbzx	$acc15,$Tbl2,$acc15
663	rlwimi	$s3,$acc11,8,16,23
664	or	$s0,$s0,$acc12
665	or	$s1,$s1,$acc13
666	or	$s2,$s2,$acc14
667	or	$s3,$s3,$acc15
668	xor	$s0,$s0,$t0
669	xor	$s1,$s1,$t1
670	xor	$s2,$s2,$t2
671	xor	$s3,$s3,$t3
672	blr
673	.long	0
674	.byte	0,12,0x14,0,0,0,0,0
675
676.align	4
677Lppc_AES_encrypt_compact:
678	lwz	$acc00,240($key)
679	addi	$Tbl1,$Tbl0,2048
680	lwz	$t0,0($key)
681	lis	$mask80,0x8080
682	lwz	$t1,4($key)
683	lis	$mask1b,0x1b1b
684	lwz	$t2,8($key)
685	ori	$mask80,$mask80,0x8080
686	lwz	$t3,12($key)
687	ori	$mask1b,$mask1b,0x1b1b
688	addi	$key,$key,16
689	mtctr	$acc00
690.align	4
691Lenc_compact_loop:
692	xor	$s0,$s0,$t0
693	xor	$s1,$s1,$t1
694	rlwinm	$acc00,$s0,`32-24`,24,31
695	xor	$s2,$s2,$t2
696	rlwinm	$acc01,$s1,`32-24`,24,31
697	xor	$s3,$s3,$t3
698	rlwinm	$acc02,$s2,`32-24`,24,31
699	rlwinm	$acc03,$s3,`32-24`,24,31
700	rlwinm	$acc04,$s1,`32-16`,24,31
701	rlwinm	$acc05,$s2,`32-16`,24,31
702	rlwinm	$acc06,$s3,`32-16`,24,31
703	rlwinm	$acc07,$s0,`32-16`,24,31
704	lbzx	$acc00,$Tbl1,$acc00
705	rlwinm	$acc08,$s2,`32-8`,24,31
706	lbzx	$acc01,$Tbl1,$acc01
707	rlwinm	$acc09,$s3,`32-8`,24,31
708	lbzx	$acc02,$Tbl1,$acc02
709	rlwinm	$acc10,$s0,`32-8`,24,31
710	lbzx	$acc03,$Tbl1,$acc03
711	rlwinm	$acc11,$s1,`32-8`,24,31
712	lbzx	$acc04,$Tbl1,$acc04
713	rlwinm	$acc12,$s3,`0`,24,31
714	lbzx	$acc05,$Tbl1,$acc05
715	rlwinm	$acc13,$s0,`0`,24,31
716	lbzx	$acc06,$Tbl1,$acc06
717	rlwinm	$acc14,$s1,`0`,24,31
718	lbzx	$acc07,$Tbl1,$acc07
719	rlwinm	$acc15,$s2,`0`,24,31
720	lbzx	$acc08,$Tbl1,$acc08
721	rlwinm	$s0,$acc00,24,0,7
722	lbzx	$acc09,$Tbl1,$acc09
723	rlwinm	$s1,$acc01,24,0,7
724	lbzx	$acc10,$Tbl1,$acc10
725	rlwinm	$s2,$acc02,24,0,7
726	lbzx	$acc11,$Tbl1,$acc11
727	rlwinm	$s3,$acc03,24,0,7
728	lbzx	$acc12,$Tbl1,$acc12
729	rlwimi	$s0,$acc04,16,8,15
730	lbzx	$acc13,$Tbl1,$acc13
731	rlwimi	$s1,$acc05,16,8,15
732	lbzx	$acc14,$Tbl1,$acc14
733	rlwimi	$s2,$acc06,16,8,15
734	lbzx	$acc15,$Tbl1,$acc15
735	rlwimi	$s3,$acc07,16,8,15
736	rlwimi	$s0,$acc08,8,16,23
737	rlwimi	$s1,$acc09,8,16,23
738	rlwimi	$s2,$acc10,8,16,23
739	rlwimi	$s3,$acc11,8,16,23
740	lwz	$t0,0($key)
741	or	$s0,$s0,$acc12
742	lwz	$t1,4($key)
743	or	$s1,$s1,$acc13
744	lwz	$t2,8($key)
745	or	$s2,$s2,$acc14
746	lwz	$t3,12($key)
747	or	$s3,$s3,$acc15
748
749	addi	$key,$key,16
750	bdz	Lenc_compact_done
751
752	and	$acc00,$s0,$mask80	# r1=r0&0x80808080
753	and	$acc01,$s1,$mask80
754	and	$acc02,$s2,$mask80
755	and	$acc03,$s3,$mask80
756	srwi	$acc04,$acc00,7		# r1>>7
757	andc	$acc08,$s0,$mask80	# r0&0x7f7f7f7f
758	srwi	$acc05,$acc01,7
759	andc	$acc09,$s1,$mask80
760	srwi	$acc06,$acc02,7
761	andc	$acc10,$s2,$mask80
762	srwi	$acc07,$acc03,7
763	andc	$acc11,$s3,$mask80
764	sub	$acc00,$acc00,$acc04	# r1-(r1>>7)
765	sub	$acc01,$acc01,$acc05
766	sub	$acc02,$acc02,$acc06
767	sub	$acc03,$acc03,$acc07
768	add	$acc08,$acc08,$acc08	# (r0&0x7f7f7f7f)<<1
769	add	$acc09,$acc09,$acc09
770	add	$acc10,$acc10,$acc10
771	add	$acc11,$acc11,$acc11
772	and	$acc00,$acc00,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
773	and	$acc01,$acc01,$mask1b
774	and	$acc02,$acc02,$mask1b
775	and	$acc03,$acc03,$mask1b
776	xor	$acc00,$acc00,$acc08	# r2
777	xor	$acc01,$acc01,$acc09
778	 rotlwi	$acc12,$s0,16		# ROTATE(r0,16)
779	xor	$acc02,$acc02,$acc10
780	 rotlwi	$acc13,$s1,16
781	xor	$acc03,$acc03,$acc11
782	 rotlwi	$acc14,$s2,16
783
784	xor	$s0,$s0,$acc00		# r0^r2
785	rotlwi	$acc15,$s3,16
786	xor	$s1,$s1,$acc01
787	rotrwi	$s0,$s0,24		# ROTATE(r2^r0,24)
788	xor	$s2,$s2,$acc02
789	rotrwi	$s1,$s1,24
790	xor	$s3,$s3,$acc03
791	rotrwi	$s2,$s2,24
792	xor	$s0,$s0,$acc00		# ROTATE(r2^r0,24)^r2
793	rotrwi	$s3,$s3,24
794	xor	$s1,$s1,$acc01
795	xor	$s2,$s2,$acc02
796	xor	$s3,$s3,$acc03
797	rotlwi	$acc08,$acc12,8		# ROTATE(r0,24)
798	xor	$s0,$s0,$acc12		#
799	rotlwi	$acc09,$acc13,8
800	xor	$s1,$s1,$acc13
801	rotlwi	$acc10,$acc14,8
802	xor	$s2,$s2,$acc14
803	rotlwi	$acc11,$acc15,8
804	xor	$s3,$s3,$acc15
805	xor	$s0,$s0,$acc08		#
806	xor	$s1,$s1,$acc09
807	xor	$s2,$s2,$acc10
808	xor	$s3,$s3,$acc11
809
810	b	Lenc_compact_loop
811.align	4
812Lenc_compact_done:
813	xor	$s0,$s0,$t0
814	xor	$s1,$s1,$t1
815	xor	$s2,$s2,$t2
816	xor	$s3,$s3,$t3
817	blr
818	.long	0
819	.byte	0,12,0x14,0,0,0,0,0
820.size	.AES_encrypt,.-.AES_encrypt
821
822.globl	.AES_decrypt
823.align	7
824.AES_decrypt:
825	$STU	$sp,-$FRAME($sp)
826	mflr	r0
827
828	$PUSH	$out,`$FRAME-$SIZE_T*19`($sp)
829	$PUSH	r14,`$FRAME-$SIZE_T*18`($sp)
830	$PUSH	r15,`$FRAME-$SIZE_T*17`($sp)
831	$PUSH	r16,`$FRAME-$SIZE_T*16`($sp)
832	$PUSH	r17,`$FRAME-$SIZE_T*15`($sp)
833	$PUSH	r18,`$FRAME-$SIZE_T*14`($sp)
834	$PUSH	r19,`$FRAME-$SIZE_T*13`($sp)
835	$PUSH	r20,`$FRAME-$SIZE_T*12`($sp)
836	$PUSH	r21,`$FRAME-$SIZE_T*11`($sp)
837	$PUSH	r22,`$FRAME-$SIZE_T*10`($sp)
838	$PUSH	r23,`$FRAME-$SIZE_T*9`($sp)
839	$PUSH	r24,`$FRAME-$SIZE_T*8`($sp)
840	$PUSH	r25,`$FRAME-$SIZE_T*7`($sp)
841	$PUSH	r26,`$FRAME-$SIZE_T*6`($sp)
842	$PUSH	r27,`$FRAME-$SIZE_T*5`($sp)
843	$PUSH	r28,`$FRAME-$SIZE_T*4`($sp)
844	$PUSH	r29,`$FRAME-$SIZE_T*3`($sp)
845	$PUSH	r30,`$FRAME-$SIZE_T*2`($sp)
846	$PUSH	r31,`$FRAME-$SIZE_T*1`($sp)
847	$PUSH	r0,`$FRAME+$LRSAVE`($sp)
848
849	andi.	$t0,$inp,3
850	andi.	$t1,$out,3
851	or.	$t0,$t0,$t1
852	bne	Ldec_unaligned
853
854Ldec_unaligned_ok:
855___
856$code.=<<___ if (!$LITTLE_ENDIAN);
857	lwz	$s0,0($inp)
858	lwz	$s1,4($inp)
859	lwz	$s2,8($inp)
860	lwz	$s3,12($inp)
861___
862$code.=<<___ if ($LITTLE_ENDIAN);
863	lwz	$t0,0($inp)
864	lwz	$t1,4($inp)
865	lwz	$t2,8($inp)
866	lwz	$t3,12($inp)
867	rotlwi	$s0,$t0,8
868	rotlwi	$s1,$t1,8
869	rotlwi	$s2,$t2,8
870	rotlwi	$s3,$t3,8
871	rlwimi	$s0,$t0,24,0,7
872	rlwimi	$s1,$t1,24,0,7
873	rlwimi	$s2,$t2,24,0,7
874	rlwimi	$s3,$t3,24,0,7
875	rlwimi	$s0,$t0,24,16,23
876	rlwimi	$s1,$t1,24,16,23
877	rlwimi	$s2,$t2,24,16,23
878	rlwimi	$s3,$t3,24,16,23
879___
880$code.=<<___;
881	bl	LAES_Td
882	bl	Lppc_AES_decrypt_compact
883	$POP	$out,`$FRAME-$SIZE_T*19`($sp)
884___
885$code.=<<___ if ($LITTLE_ENDIAN);
886	rotlwi	$t0,$s0,8
887	rotlwi	$t1,$s1,8
888	rotlwi	$t2,$s2,8
889	rotlwi	$t3,$s3,8
890	rlwimi	$t0,$s0,24,0,7
891	rlwimi	$t1,$s1,24,0,7
892	rlwimi	$t2,$s2,24,0,7
893	rlwimi	$t3,$s3,24,0,7
894	rlwimi	$t0,$s0,24,16,23
895	rlwimi	$t1,$s1,24,16,23
896	rlwimi	$t2,$s2,24,16,23
897	rlwimi	$t3,$s3,24,16,23
898	stw	$t0,0($out)
899	stw	$t1,4($out)
900	stw	$t2,8($out)
901	stw	$t3,12($out)
902___
903$code.=<<___ if (!$LITTLE_ENDIAN);
904	stw	$s0,0($out)
905	stw	$s1,4($out)
906	stw	$s2,8($out)
907	stw	$s3,12($out)
908___
909$code.=<<___;
910	b	Ldec_done
911
912Ldec_unaligned:
913	subfic	$t0,$inp,4096
914	subfic	$t1,$out,4096
915	andi.	$t0,$t0,4096-16
916	beq	Ldec_xpage
917	andi.	$t1,$t1,4096-16
918	bne	Ldec_unaligned_ok
919
920Ldec_xpage:
921	lbz	$acc00,0($inp)
922	lbz	$acc01,1($inp)
923	lbz	$acc02,2($inp)
924	lbz	$s0,3($inp)
925	lbz	$acc04,4($inp)
926	lbz	$acc05,5($inp)
927	lbz	$acc06,6($inp)
928	lbz	$s1,7($inp)
929	lbz	$acc08,8($inp)
930	lbz	$acc09,9($inp)
931	lbz	$acc10,10($inp)
932	insrwi	$s0,$acc00,8,0
933	lbz	$s2,11($inp)
934	insrwi	$s1,$acc04,8,0
935	lbz	$acc12,12($inp)
936	insrwi	$s0,$acc01,8,8
937	lbz	$acc13,13($inp)
938	insrwi	$s1,$acc05,8,8
939	lbz	$acc14,14($inp)
940	insrwi	$s0,$acc02,8,16
941	lbz	$s3,15($inp)
942	insrwi	$s1,$acc06,8,16
943	insrwi	$s2,$acc08,8,0
944	insrwi	$s3,$acc12,8,0
945	insrwi	$s2,$acc09,8,8
946	insrwi	$s3,$acc13,8,8
947	insrwi	$s2,$acc10,8,16
948	insrwi	$s3,$acc14,8,16
949
950	bl	LAES_Td
951	bl	Lppc_AES_decrypt_compact
952	$POP	$out,`$FRAME-$SIZE_T*19`($sp)
953
954	extrwi	$acc00,$s0,8,0
955	extrwi	$acc01,$s0,8,8
956	stb	$acc00,0($out)
957	extrwi	$acc02,$s0,8,16
958	stb	$acc01,1($out)
959	stb	$acc02,2($out)
960	extrwi	$acc04,$s1,8,0
961	stb	$s0,3($out)
962	extrwi	$acc05,$s1,8,8
963	stb	$acc04,4($out)
964	extrwi	$acc06,$s1,8,16
965	stb	$acc05,5($out)
966	stb	$acc06,6($out)
967	extrwi	$acc08,$s2,8,0
968	stb	$s1,7($out)
969	extrwi	$acc09,$s2,8,8
970	stb	$acc08,8($out)
971	extrwi	$acc10,$s2,8,16
972	stb	$acc09,9($out)
973	stb	$acc10,10($out)
974	extrwi	$acc12,$s3,8,0
975	stb	$s2,11($out)
976	extrwi	$acc13,$s3,8,8
977	stb	$acc12,12($out)
978	extrwi	$acc14,$s3,8,16
979	stb	$acc13,13($out)
980	stb	$acc14,14($out)
981	stb	$s3,15($out)
982
983Ldec_done:
984	$POP	r0,`$FRAME+$LRSAVE`($sp)
985	$POP	r14,`$FRAME-$SIZE_T*18`($sp)
986	$POP	r15,`$FRAME-$SIZE_T*17`($sp)
987	$POP	r16,`$FRAME-$SIZE_T*16`($sp)
988	$POP	r17,`$FRAME-$SIZE_T*15`($sp)
989	$POP	r18,`$FRAME-$SIZE_T*14`($sp)
990	$POP	r19,`$FRAME-$SIZE_T*13`($sp)
991	$POP	r20,`$FRAME-$SIZE_T*12`($sp)
992	$POP	r21,`$FRAME-$SIZE_T*11`($sp)
993	$POP	r22,`$FRAME-$SIZE_T*10`($sp)
994	$POP	r23,`$FRAME-$SIZE_T*9`($sp)
995	$POP	r24,`$FRAME-$SIZE_T*8`($sp)
996	$POP	r25,`$FRAME-$SIZE_T*7`($sp)
997	$POP	r26,`$FRAME-$SIZE_T*6`($sp)
998	$POP	r27,`$FRAME-$SIZE_T*5`($sp)
999	$POP	r28,`$FRAME-$SIZE_T*4`($sp)
1000	$POP	r29,`$FRAME-$SIZE_T*3`($sp)
1001	$POP	r30,`$FRAME-$SIZE_T*2`($sp)
1002	$POP	r31,`$FRAME-$SIZE_T*1`($sp)
1003	mtlr	r0
1004	addi	$sp,$sp,$FRAME
1005	blr
1006	.long	0
1007	.byte	0,12,4,1,0x80,18,3,0
1008	.long	0
1009
1010.align	5
1011Lppc_AES_decrypt:
1012	lwz	$acc00,240($key)
1013	addi	$Tbl1,$Tbl0,3
1014	lwz	$t0,0($key)
1015	addi	$Tbl2,$Tbl0,2
1016	lwz	$t1,4($key)
1017	addi	$Tbl3,$Tbl0,1
1018	lwz	$t2,8($key)
1019	addi	$acc00,$acc00,-1
1020	lwz	$t3,12($key)
1021	addi	$key,$key,16
1022	xor	$s0,$s0,$t0
1023	xor	$s1,$s1,$t1
1024	xor	$s2,$s2,$t2
1025	xor	$s3,$s3,$t3
1026	mtctr	$acc00
1027.align	4
1028Ldec_loop:
1029	rlwinm	$acc00,$s0,`32-24+3`,21,28
1030	rlwinm	$acc01,$s1,`32-24+3`,21,28
1031	rlwinm	$acc02,$s2,`32-24+3`,21,28
1032	rlwinm	$acc03,$s3,`32-24+3`,21,28
1033	lwz	$t0,0($key)
1034	rlwinm	$acc04,$s3,`32-16+3`,21,28
1035	lwz	$t1,4($key)
1036	rlwinm	$acc05,$s0,`32-16+3`,21,28
1037	lwz	$t2,8($key)
1038	rlwinm	$acc06,$s1,`32-16+3`,21,28
1039	lwz	$t3,12($key)
1040	rlwinm	$acc07,$s2,`32-16+3`,21,28
1041	lwzx	$acc00,$Tbl0,$acc00
1042	rlwinm	$acc08,$s2,`32-8+3`,21,28
1043	lwzx	$acc01,$Tbl0,$acc01
1044	rlwinm	$acc09,$s3,`32-8+3`,21,28
1045	lwzx	$acc02,$Tbl0,$acc02
1046	rlwinm	$acc10,$s0,`32-8+3`,21,28
1047	lwzx	$acc03,$Tbl0,$acc03
1048	rlwinm	$acc11,$s1,`32-8+3`,21,28
1049	lwzx	$acc04,$Tbl1,$acc04
1050	rlwinm	$acc12,$s1,`0+3`,21,28
1051	lwzx	$acc05,$Tbl1,$acc05
1052	rlwinm	$acc13,$s2,`0+3`,21,28
1053	lwzx	$acc06,$Tbl1,$acc06
1054	rlwinm	$acc14,$s3,`0+3`,21,28
1055	lwzx	$acc07,$Tbl1,$acc07
1056	rlwinm	$acc15,$s0,`0+3`,21,28
1057	lwzx	$acc08,$Tbl2,$acc08
1058	xor	$t0,$t0,$acc00
1059	lwzx	$acc09,$Tbl2,$acc09
1060	xor	$t1,$t1,$acc01
1061	lwzx	$acc10,$Tbl2,$acc10
1062	xor	$t2,$t2,$acc02
1063	lwzx	$acc11,$Tbl2,$acc11
1064	xor	$t3,$t3,$acc03
1065	lwzx	$acc12,$Tbl3,$acc12
1066	xor	$t0,$t0,$acc04
1067	lwzx	$acc13,$Tbl3,$acc13
1068	xor	$t1,$t1,$acc05
1069	lwzx	$acc14,$Tbl3,$acc14
1070	xor	$t2,$t2,$acc06
1071	lwzx	$acc15,$Tbl3,$acc15
1072	xor	$t3,$t3,$acc07
1073	xor	$t0,$t0,$acc08
1074	xor	$t1,$t1,$acc09
1075	xor	$t2,$t2,$acc10
1076	xor	$t3,$t3,$acc11
1077	xor	$s0,$t0,$acc12
1078	xor	$s1,$t1,$acc13
1079	xor	$s2,$t2,$acc14
1080	xor	$s3,$t3,$acc15
1081	addi	$key,$key,16
1082	bdnz	Ldec_loop
1083
1084	addi	$Tbl2,$Tbl0,2048
1085	nop
1086	lwz	$t0,0($key)
1087	rlwinm	$acc00,$s0,`32-24`,24,31
1088	lwz	$t1,4($key)
1089	rlwinm	$acc01,$s1,`32-24`,24,31
1090	lwz	$t2,8($key)
1091	rlwinm	$acc02,$s2,`32-24`,24,31
1092	lwz	$t3,12($key)
1093	rlwinm	$acc03,$s3,`32-24`,24,31
1094	lwz	$acc08,`2048+0`($Tbl0)	! prefetch Td4
1095	rlwinm	$acc04,$s3,`32-16`,24,31
1096	lwz	$acc09,`2048+32`($Tbl0)
1097	rlwinm	$acc05,$s0,`32-16`,24,31
1098	lwz	$acc10,`2048+64`($Tbl0)
1099	lbzx	$acc00,$Tbl2,$acc00
1100	lwz	$acc11,`2048+96`($Tbl0)
1101	lbzx	$acc01,$Tbl2,$acc01
1102	lwz	$acc12,`2048+128`($Tbl0)
1103	rlwinm	$acc06,$s1,`32-16`,24,31
1104	lwz	$acc13,`2048+160`($Tbl0)
1105	rlwinm	$acc07,$s2,`32-16`,24,31
1106	lwz	$acc14,`2048+192`($Tbl0)
1107	rlwinm	$acc08,$s2,`32-8`,24,31
1108	lwz	$acc15,`2048+224`($Tbl0)
1109	rlwinm	$acc09,$s3,`32-8`,24,31
1110	lbzx	$acc02,$Tbl2,$acc02
1111	rlwinm	$acc10,$s0,`32-8`,24,31
1112	lbzx	$acc03,$Tbl2,$acc03
1113	rlwinm	$acc11,$s1,`32-8`,24,31
1114	lbzx	$acc04,$Tbl2,$acc04
1115	rlwinm	$acc12,$s1,`0`,24,31
1116	lbzx	$acc05,$Tbl2,$acc05
1117	rlwinm	$acc13,$s2,`0`,24,31
1118	lbzx	$acc06,$Tbl2,$acc06
1119	rlwinm	$acc14,$s3,`0`,24,31
1120	lbzx	$acc07,$Tbl2,$acc07
1121	rlwinm	$acc15,$s0,`0`,24,31
1122	lbzx	$acc08,$Tbl2,$acc08
1123	rlwinm	$s0,$acc00,24,0,7
1124	lbzx	$acc09,$Tbl2,$acc09
1125	rlwinm	$s1,$acc01,24,0,7
1126	lbzx	$acc10,$Tbl2,$acc10
1127	rlwinm	$s2,$acc02,24,0,7
1128	lbzx	$acc11,$Tbl2,$acc11
1129	rlwinm	$s3,$acc03,24,0,7
1130	lbzx	$acc12,$Tbl2,$acc12
1131	rlwimi	$s0,$acc04,16,8,15
1132	lbzx	$acc13,$Tbl2,$acc13
1133	rlwimi	$s1,$acc05,16,8,15
1134	lbzx	$acc14,$Tbl2,$acc14
1135	rlwimi	$s2,$acc06,16,8,15
1136	lbzx	$acc15,$Tbl2,$acc15
1137	rlwimi	$s3,$acc07,16,8,15
1138	rlwimi	$s0,$acc08,8,16,23
1139	rlwimi	$s1,$acc09,8,16,23
1140	rlwimi	$s2,$acc10,8,16,23
1141	rlwimi	$s3,$acc11,8,16,23
1142	or	$s0,$s0,$acc12
1143	or	$s1,$s1,$acc13
1144	or	$s2,$s2,$acc14
1145	or	$s3,$s3,$acc15
1146	xor	$s0,$s0,$t0
1147	xor	$s1,$s1,$t1
1148	xor	$s2,$s2,$t2
1149	xor	$s3,$s3,$t3
1150	blr
1151	.long	0
1152	.byte	0,12,0x14,0,0,0,0,0
1153
1154.align	4
1155Lppc_AES_decrypt_compact:
1156	lwz	$acc00,240($key)
1157	addi	$Tbl1,$Tbl0,2048
1158	lwz	$t0,0($key)
1159	lis	$mask80,0x8080
1160	lwz	$t1,4($key)
1161	lis	$mask1b,0x1b1b
1162	lwz	$t2,8($key)
1163	ori	$mask80,$mask80,0x8080
1164	lwz	$t3,12($key)
1165	ori	$mask1b,$mask1b,0x1b1b
1166	addi	$key,$key,16
1167___
1168$code.=<<___ if ($SIZE_T==8);
1169	insrdi	$mask80,$mask80,32,0
1170	insrdi	$mask1b,$mask1b,32,0
1171___
1172$code.=<<___;
1173	mtctr	$acc00
1174.align	4
1175Ldec_compact_loop:
1176	xor	$s0,$s0,$t0
1177	xor	$s1,$s1,$t1
1178	rlwinm	$acc00,$s0,`32-24`,24,31
1179	xor	$s2,$s2,$t2
1180	rlwinm	$acc01,$s1,`32-24`,24,31
1181	xor	$s3,$s3,$t3
1182	rlwinm	$acc02,$s2,`32-24`,24,31
1183	rlwinm	$acc03,$s3,`32-24`,24,31
1184	rlwinm	$acc04,$s3,`32-16`,24,31
1185	rlwinm	$acc05,$s0,`32-16`,24,31
1186	rlwinm	$acc06,$s1,`32-16`,24,31
1187	rlwinm	$acc07,$s2,`32-16`,24,31
1188	lbzx	$acc00,$Tbl1,$acc00
1189	rlwinm	$acc08,$s2,`32-8`,24,31
1190	lbzx	$acc01,$Tbl1,$acc01
1191	rlwinm	$acc09,$s3,`32-8`,24,31
1192	lbzx	$acc02,$Tbl1,$acc02
1193	rlwinm	$acc10,$s0,`32-8`,24,31
1194	lbzx	$acc03,$Tbl1,$acc03
1195	rlwinm	$acc11,$s1,`32-8`,24,31
1196	lbzx	$acc04,$Tbl1,$acc04
1197	rlwinm	$acc12,$s1,`0`,24,31
1198	lbzx	$acc05,$Tbl1,$acc05
1199	rlwinm	$acc13,$s2,`0`,24,31
1200	lbzx	$acc06,$Tbl1,$acc06
1201	rlwinm	$acc14,$s3,`0`,24,31
1202	lbzx	$acc07,$Tbl1,$acc07
1203	rlwinm	$acc15,$s0,`0`,24,31
1204	lbzx	$acc08,$Tbl1,$acc08
1205	rlwinm	$s0,$acc00,24,0,7
1206	lbzx	$acc09,$Tbl1,$acc09
1207	rlwinm	$s1,$acc01,24,0,7
1208	lbzx	$acc10,$Tbl1,$acc10
1209	rlwinm	$s2,$acc02,24,0,7
1210	lbzx	$acc11,$Tbl1,$acc11
1211	rlwinm	$s3,$acc03,24,0,7
1212	lbzx	$acc12,$Tbl1,$acc12
1213	rlwimi	$s0,$acc04,16,8,15
1214	lbzx	$acc13,$Tbl1,$acc13
1215	rlwimi	$s1,$acc05,16,8,15
1216	lbzx	$acc14,$Tbl1,$acc14
1217	rlwimi	$s2,$acc06,16,8,15
1218	lbzx	$acc15,$Tbl1,$acc15
1219	rlwimi	$s3,$acc07,16,8,15
1220	rlwimi	$s0,$acc08,8,16,23
1221	rlwimi	$s1,$acc09,8,16,23
1222	rlwimi	$s2,$acc10,8,16,23
1223	rlwimi	$s3,$acc11,8,16,23
1224	lwz	$t0,0($key)
1225	or	$s0,$s0,$acc12
1226	lwz	$t1,4($key)
1227	or	$s1,$s1,$acc13
1228	lwz	$t2,8($key)
1229	or	$s2,$s2,$acc14
1230	lwz	$t3,12($key)
1231	or	$s3,$s3,$acc15
1232
1233	addi	$key,$key,16
1234	bdz	Ldec_compact_done
1235___
1236$code.=<<___ if ($SIZE_T==8);
1237	# vectorized permutation improves decrypt performance by 10%
1238	insrdi	$s0,$s1,32,0
1239	insrdi	$s2,$s3,32,0
1240
1241	and	$acc00,$s0,$mask80	# r1=r0&0x80808080
1242	and	$acc02,$s2,$mask80
1243	srdi	$acc04,$acc00,7		# r1>>7
1244	srdi	$acc06,$acc02,7
1245	andc	$acc08,$s0,$mask80	# r0&0x7f7f7f7f
1246	andc	$acc10,$s2,$mask80
1247	sub	$acc00,$acc00,$acc04	# r1-(r1>>7)
1248	sub	$acc02,$acc02,$acc06
1249	add	$acc08,$acc08,$acc08	# (r0&0x7f7f7f7f)<<1
1250	add	$acc10,$acc10,$acc10
1251	and	$acc00,$acc00,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
1252	and	$acc02,$acc02,$mask1b
1253	xor	$acc00,$acc00,$acc08	# r2
1254	xor	$acc02,$acc02,$acc10
1255
1256	and	$acc04,$acc00,$mask80	# r1=r2&0x80808080
1257	and	$acc06,$acc02,$mask80
1258	srdi	$acc08,$acc04,7		# r1>>7
1259	srdi	$acc10,$acc06,7
1260	andc	$acc12,$acc00,$mask80	# r2&0x7f7f7f7f
1261	andc	$acc14,$acc02,$mask80
1262	sub	$acc04,$acc04,$acc08	# r1-(r1>>7)
1263	sub	$acc06,$acc06,$acc10
1264	add	$acc12,$acc12,$acc12	# (r2&0x7f7f7f7f)<<1
1265	add	$acc14,$acc14,$acc14
1266	and	$acc04,$acc04,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
1267	and	$acc06,$acc06,$mask1b
1268	xor	$acc04,$acc04,$acc12	# r4
1269	xor	$acc06,$acc06,$acc14
1270
1271	and	$acc08,$acc04,$mask80	# r1=r4&0x80808080
1272	and	$acc10,$acc06,$mask80
1273	srdi	$acc12,$acc08,7		# r1>>7
1274	srdi	$acc14,$acc10,7
1275	sub	$acc08,$acc08,$acc12	# r1-(r1>>7)
1276	sub	$acc10,$acc10,$acc14
1277	andc	$acc12,$acc04,$mask80	# r4&0x7f7f7f7f
1278	andc	$acc14,$acc06,$mask80
1279	add	$acc12,$acc12,$acc12	# (r4&0x7f7f7f7f)<<1
1280	add	$acc14,$acc14,$acc14
1281	and	$acc08,$acc08,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
1282	and	$acc10,$acc10,$mask1b
1283	xor	$acc08,$acc08,$acc12	# r8
1284	xor	$acc10,$acc10,$acc14
1285
1286	xor	$acc00,$acc00,$s0	# r2^r0
1287	xor	$acc02,$acc02,$s2
1288	xor	$acc04,$acc04,$s0	# r4^r0
1289	xor	$acc06,$acc06,$s2
1290
1291	extrdi	$acc01,$acc00,32,0
1292	extrdi	$acc03,$acc02,32,0
1293	extrdi	$acc05,$acc04,32,0
1294	extrdi	$acc07,$acc06,32,0
1295	extrdi	$acc09,$acc08,32,0
1296	extrdi	$acc11,$acc10,32,0
1297___
1298$code.=<<___ if ($SIZE_T==4);
1299	and	$acc00,$s0,$mask80	# r1=r0&0x80808080
1300	and	$acc01,$s1,$mask80
1301	and	$acc02,$s2,$mask80
1302	and	$acc03,$s3,$mask80
1303	srwi	$acc04,$acc00,7		# r1>>7
1304	andc	$acc08,$s0,$mask80	# r0&0x7f7f7f7f
1305	srwi	$acc05,$acc01,7
1306	andc	$acc09,$s1,$mask80
1307	srwi	$acc06,$acc02,7
1308	andc	$acc10,$s2,$mask80
1309	srwi	$acc07,$acc03,7
1310	andc	$acc11,$s3,$mask80
1311	sub	$acc00,$acc00,$acc04	# r1-(r1>>7)
1312	sub	$acc01,$acc01,$acc05
1313	sub	$acc02,$acc02,$acc06
1314	sub	$acc03,$acc03,$acc07
1315	add	$acc08,$acc08,$acc08	# (r0&0x7f7f7f7f)<<1
1316	add	$acc09,$acc09,$acc09
1317	add	$acc10,$acc10,$acc10
1318	add	$acc11,$acc11,$acc11
1319	and	$acc00,$acc00,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
1320	and	$acc01,$acc01,$mask1b
1321	and	$acc02,$acc02,$mask1b
1322	and	$acc03,$acc03,$mask1b
1323	xor	$acc00,$acc00,$acc08	# r2
1324	xor	$acc01,$acc01,$acc09
1325	xor	$acc02,$acc02,$acc10
1326	xor	$acc03,$acc03,$acc11
1327
1328	and	$acc04,$acc00,$mask80	# r1=r2&0x80808080
1329	and	$acc05,$acc01,$mask80
1330	and	$acc06,$acc02,$mask80
1331	and	$acc07,$acc03,$mask80
1332	srwi	$acc08,$acc04,7		# r1>>7
1333	andc	$acc12,$acc00,$mask80	# r2&0x7f7f7f7f
1334	srwi	$acc09,$acc05,7
1335	andc	$acc13,$acc01,$mask80
1336	srwi	$acc10,$acc06,7
1337	andc	$acc14,$acc02,$mask80
1338	srwi	$acc11,$acc07,7
1339	andc	$acc15,$acc03,$mask80
1340	sub	$acc04,$acc04,$acc08	# r1-(r1>>7)
1341	sub	$acc05,$acc05,$acc09
1342	sub	$acc06,$acc06,$acc10
1343	sub	$acc07,$acc07,$acc11
1344	add	$acc12,$acc12,$acc12	# (r2&0x7f7f7f7f)<<1
1345	add	$acc13,$acc13,$acc13
1346	add	$acc14,$acc14,$acc14
1347	add	$acc15,$acc15,$acc15
1348	and	$acc04,$acc04,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
1349	and	$acc05,$acc05,$mask1b
1350	and	$acc06,$acc06,$mask1b
1351	and	$acc07,$acc07,$mask1b
1352	xor	$acc04,$acc04,$acc12	# r4
1353	xor	$acc05,$acc05,$acc13
1354	xor	$acc06,$acc06,$acc14
1355	xor	$acc07,$acc07,$acc15
1356
1357	and	$acc08,$acc04,$mask80	# r1=r4&0x80808080
1358	and	$acc09,$acc05,$mask80
1359	srwi	$acc12,$acc08,7		# r1>>7
1360	and	$acc10,$acc06,$mask80
1361	srwi	$acc13,$acc09,7
1362	and	$acc11,$acc07,$mask80
1363	srwi	$acc14,$acc10,7
1364	sub	$acc08,$acc08,$acc12	# r1-(r1>>7)
1365	srwi	$acc15,$acc11,7
1366	sub	$acc09,$acc09,$acc13
1367	sub	$acc10,$acc10,$acc14
1368	sub	$acc11,$acc11,$acc15
1369	andc	$acc12,$acc04,$mask80	# r4&0x7f7f7f7f
1370	andc	$acc13,$acc05,$mask80
1371	andc	$acc14,$acc06,$mask80
1372	andc	$acc15,$acc07,$mask80
1373	add	$acc12,$acc12,$acc12	# (r4&0x7f7f7f7f)<<1
1374	add	$acc13,$acc13,$acc13
1375	add	$acc14,$acc14,$acc14
1376	add	$acc15,$acc15,$acc15
1377	and	$acc08,$acc08,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
1378	and	$acc09,$acc09,$mask1b
1379	and	$acc10,$acc10,$mask1b
1380	and	$acc11,$acc11,$mask1b
1381	xor	$acc08,$acc08,$acc12	# r8
1382	xor	$acc09,$acc09,$acc13
1383	xor	$acc10,$acc10,$acc14
1384	xor	$acc11,$acc11,$acc15
1385
1386	xor	$acc00,$acc00,$s0	# r2^r0
1387	xor	$acc01,$acc01,$s1
1388	xor	$acc02,$acc02,$s2
1389	xor	$acc03,$acc03,$s3
1390	xor	$acc04,$acc04,$s0	# r4^r0
1391	xor	$acc05,$acc05,$s1
1392	xor	$acc06,$acc06,$s2
1393	xor	$acc07,$acc07,$s3
1394___
1395$code.=<<___;
1396	rotrwi	$s0,$s0,8		# = ROTATE(r0,8)
1397	rotrwi	$s1,$s1,8
1398	xor	$s0,$s0,$acc00		# ^= r2^r0
1399	rotrwi	$s2,$s2,8
1400	xor	$s1,$s1,$acc01
1401	rotrwi	$s3,$s3,8
1402	xor	$s2,$s2,$acc02
1403	xor	$s3,$s3,$acc03
1404	xor	$acc00,$acc00,$acc08
1405	xor	$acc01,$acc01,$acc09
1406	xor	$acc02,$acc02,$acc10
1407	xor	$acc03,$acc03,$acc11
1408	xor	$s0,$s0,$acc04		# ^= r4^r0
1409	rotrwi	$acc00,$acc00,24
1410	xor	$s1,$s1,$acc05
1411	rotrwi	$acc01,$acc01,24
1412	xor	$s2,$s2,$acc06
1413	rotrwi	$acc02,$acc02,24
1414	xor	$s3,$s3,$acc07
1415	rotrwi	$acc03,$acc03,24
1416	xor	$acc04,$acc04,$acc08
1417	xor	$acc05,$acc05,$acc09
1418	xor	$acc06,$acc06,$acc10
1419	xor	$acc07,$acc07,$acc11
1420	xor	$s0,$s0,$acc08		# ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
1421	rotrwi	$acc04,$acc04,16
1422	xor	$s1,$s1,$acc09
1423	rotrwi	$acc05,$acc05,16
1424	xor	$s2,$s2,$acc10
1425	rotrwi	$acc06,$acc06,16
1426	xor	$s3,$s3,$acc11
1427	rotrwi	$acc07,$acc07,16
1428	xor	$s0,$s0,$acc00		# ^= ROTATE(r8^r2^r0,24)
1429	rotrwi	$acc08,$acc08,8
1430	xor	$s1,$s1,$acc01
1431	rotrwi	$acc09,$acc09,8
1432	xor	$s2,$s2,$acc02
1433	rotrwi	$acc10,$acc10,8
1434	xor	$s3,$s3,$acc03
1435	rotrwi	$acc11,$acc11,8
1436	xor	$s0,$s0,$acc04		# ^= ROTATE(r8^r4^r0,16)
1437	xor	$s1,$s1,$acc05
1438	xor	$s2,$s2,$acc06
1439	xor	$s3,$s3,$acc07
1440	xor	$s0,$s0,$acc08		# ^= ROTATE(r8,8)
1441	xor	$s1,$s1,$acc09
1442	xor	$s2,$s2,$acc10
1443	xor	$s3,$s3,$acc11
1444
1445	b	Ldec_compact_loop
1446.align	4
1447Ldec_compact_done:
1448	xor	$s0,$s0,$t0
1449	xor	$s1,$s1,$t1
1450	xor	$s2,$s2,$t2
1451	xor	$s3,$s3,$t3
1452	blr
1453	.long	0
1454	.byte	0,12,0x14,0,0,0,0,0
1455.size	.AES_decrypt,.-.AES_decrypt
1456
1457.asciz	"AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
1458.align	7
1459___
1460
1461$code =~ s/\`([^\`]*)\`/eval $1/gem;
1462print $code;
1463close STDOUT or die "error closing STDOUT: $!";
1464