xref: /openssl/crypto/aes/aes_core.c (revision fecb3aae)
1 /*
2  * Copyright 2002-2022 The OpenSSL Project Authors. All Rights Reserved.
3  *
4  * Licensed under the Apache License 2.0 (the "License").  You may not use
5  * this file except in compliance with the License.  You can obtain a copy
6  * in the file LICENSE in the source distribution or at
7  * https://www.openssl.org/source/license.html
8  */
9 
10 /**
11  * rijndael-alg-fst.c
12  *
13  * @version 3.0 (December 2000)
14  *
15  * Optimised ANSI C code for the Rijndael cipher (now AES)
16  *
17  * @author Vincent Rijmen
18  * @author Antoon Bosselaers
19  * @author Paulo Barreto
20  *
21  * This code is hereby placed in the public domain.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
24  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
25  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
27  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
30  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
31  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
32  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
33  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 /* Note: rewritten a little bit to provide error control and an OpenSSL-
37    compatible API */
38 
39 /*
40  * AES low level APIs are deprecated for public use, but still ok for internal
41  * use where we're using them to implement the higher level EVP interface, as is
42  * the case here.
43  */
44 #include "internal/deprecated.h"
45 
46 #include <assert.h>
47 
48 #include <stdlib.h>
49 #include <openssl/crypto.h>
50 #include <openssl/aes.h>
51 #include "aes_local.h"
52 
53 #if defined(OPENSSL_AES_CONST_TIME) && !defined(AES_ASM)
54 
55 # if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
56 #  define U64(C) C##UI64
57 # elif defined(__arch64__)
58 #  define U64(C) C##UL
59 # else
60 #  define U64(C) C##ULL
61 # endif
62 
63 typedef union {
64     unsigned char b[8];
65     u32 w[2];
66     u64 d;
67 } uni;
68 
69 /*
70  * Compute w := (w * x) mod (x^8 + x^4 + x^3 + x^1 + 1)
71  * Therefore the name "xtime".
72  */
XtimeWord(u32 * w)73 static void XtimeWord(u32 *w)
74 {
75     u32 a, b;
76 
77     a = *w;
78     b = a & 0x80808080u;
79     a ^= b;
80     b -= b >> 7;
81     b &= 0x1B1B1B1Bu;
82     b ^= a << 1;
83     *w = b;
84 }
85 
XtimeLong(u64 * w)86 static void XtimeLong(u64 *w)
87 {
88     u64 a, b;
89 
90     a = *w;
91     b = a & U64(0x8080808080808080);
92     a ^= b;
93     b -= b >> 7;
94     b &= U64(0x1B1B1B1B1B1B1B1B);
95     b ^= a << 1;
96     *w = b;
97 }
98 
99 /*
100  * This computes w := S * w ^ -1 + c, where c = {01100011}.
101  * Instead of using GF(2^8) mod (x^8+x^4+x^3+x+1} we do the inversion
102  * in GF(GF(GF(2^2)^2)^2) mod (X^2+X+8)
103  * and GF(GF(2^2)^2) mod (X^2+X+2)
104  * and GF(2^2) mod (X^2+X+1)
105  * The first part of the algorithm below transfers the coordinates
106  * {0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80} =>
107  * {1,Y,Y^2,Y^3,Y^4,Y^5,Y^6,Y^7} with Y=0x41:
108  * {0x01,0x41,0x66,0x6c,0x56,0x9a,0x58,0xc4}
109  * The last part undoes the coordinate transfer and the final affine
110  * transformation S:
111  * b[i] = b[i] + b[(i+4)%8] + b[(i+5)%8] + b[(i+6)%8] + b[(i+7)%8] + c[i]
112  * in one step.
113  * The multiplication in GF(2^2^2^2) is done in ordinary coords:
114  * A = (a0*1 + a1*x^4)
115  * B = (b0*1 + b1*x^4)
116  * AB = ((a0*b0 + 8*a1*b1)*1 + (a1*b0 + (a0+a1)*b1)*x^4)
117  * When A = (a0,a1) is given we want to solve AB = 1:
118  * (a) 1 = a0*b0 + 8*a1*b1
119  * (b) 0 = a1*b0 + (a0+a1)*b1
120  * => multiply (a) by a1 and (b) by a0
121  * (c) a1 = a1*a0*b0 + (8*a1*a1)*b1
122  * (d) 0 = a1*a0*b0 + (a0*a0+a1*a0)*b1
123  * => add (c) + (d)
124  * (e) a1 = (a0*a0 + a1*a0 + 8*a1*a1)*b1
125  * => therefore
126  * b1 = (a0*a0 + a1*a0 + 8*a1*a1)^-1 * a1
127  * => and adding (a1*b0) to (b) we get
128  * (f) a1*b0 = (a0+a1)*b1
129  * => therefore
130  * b0 = (a0*a0 + a1*a0 + 8*a1*a1)^-1 * (a0+a1)
131  * Note this formula also works for the case
132  * (a0+a1)*a0 + 8*a1*a1 = 0
133  * if the inverse element for 0^-1 is mapped to 0.
134  * Repeat the same for GF(2^2^2) and GF(2^2).
135  * We get the following algorithm:
136  * inv8(a0,a1):
137  *   x0 = a0^a1
138  *   [y0,y1] = mul4([x0,a1],[a0,a1]); (*)
139  *   y1 = mul4(8,y1);
140  *   t = inv4(y0^y1);
141  *   [b0,b1] = mul4([x0,a1],[t,t]); (*)
142  *   return [b0,b1];
143  * The non-linear multiplies (*) can be done in parallel at no extra cost.
144  */
SubWord(u32 * w)145 static void SubWord(u32 *w)
146 {
147     u32 x, y, a1, a2, a3, a4, a5, a6;
148 
149     x = *w;
150     y = ((x & 0xFEFEFEFEu) >> 1) | ((x & 0x01010101u) << 7);
151     x &= 0xDDDDDDDDu;
152     x ^= y & 0x57575757u;
153     y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
154     x ^= y & 0x1C1C1C1Cu;
155     y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
156     x ^= y & 0x4A4A4A4Au;
157     y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
158     x ^= y & 0x42424242u;
159     y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
160     x ^= y & 0x64646464u;
161     y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
162     x ^= y & 0xE0E0E0E0u;
163     a1 = x;
164     a1 ^= (x & 0xF0F0F0F0u) >> 4;
165     a2 = ((x & 0xCCCCCCCCu) >> 2) | ((x & 0x33333333u) << 2);
166     a3 = x & a1;
167     a3 ^= (a3 & 0xAAAAAAAAu) >> 1;
168     a3 ^= (((x << 1) & a1) ^ ((a1 << 1) & x)) & 0xAAAAAAAAu;
169     a4 = a2 & a1;
170     a4 ^= (a4 & 0xAAAAAAAAu) >> 1;
171     a4 ^= (((a2 << 1) & a1) ^ ((a1 << 1) & a2)) & 0xAAAAAAAAu;
172     a5 = (a3 & 0xCCCCCCCCu) >> 2;
173     a3 ^= ((a4 << 2) ^ a4) & 0xCCCCCCCCu;
174     a4 = a5 & 0x22222222u;
175     a4 |= a4 >> 1;
176     a4 ^= (a5 << 1) & 0x22222222u;
177     a3 ^= a4;
178     a5 = a3 & 0xA0A0A0A0u;
179     a5 |= a5 >> 1;
180     a5 ^= (a3 << 1) & 0xA0A0A0A0u;
181     a4 = a5 & 0xC0C0C0C0u;
182     a6 = a4 >> 2;
183     a4 ^= (a5 << 2) & 0xC0C0C0C0u;
184     a5 = a6 & 0x20202020u;
185     a5 |= a5 >> 1;
186     a5 ^= (a6 << 1) & 0x20202020u;
187     a4 |= a5;
188     a3 ^= a4 >> 4;
189     a3 &= 0x0F0F0F0Fu;
190     a2 = a3;
191     a2 ^= (a3 & 0x0C0C0C0Cu) >> 2;
192     a4 = a3 & a2;
193     a4 ^= (a4 & 0x0A0A0A0A0Au) >> 1;
194     a4 ^= (((a3 << 1) & a2) ^ ((a2 << 1) & a3)) & 0x0A0A0A0Au;
195     a5 = a4 & 0x08080808u;
196     a5 |= a5 >> 1;
197     a5 ^= (a4 << 1) & 0x08080808u;
198     a4 ^= a5 >> 2;
199     a4 &= 0x03030303u;
200     a4 ^= (a4 & 0x02020202u) >> 1;
201     a4 |= a4 << 2;
202     a3 = a2 & a4;
203     a3 ^= (a3 & 0x0A0A0A0Au) >> 1;
204     a3 ^= (((a2 << 1) & a4) ^ ((a4 << 1) & a2)) & 0x0A0A0A0Au;
205     a3 |= a3 << 4;
206     a2 = ((a1 & 0xCCCCCCCCu) >> 2) | ((a1 & 0x33333333u) << 2);
207     x = a1 & a3;
208     x ^= (x & 0xAAAAAAAAu) >> 1;
209     x ^= (((a1 << 1) & a3) ^ ((a3 << 1) & a1)) & 0xAAAAAAAAu;
210     a4 = a2 & a3;
211     a4 ^= (a4 & 0xAAAAAAAAu) >> 1;
212     a4 ^= (((a2 << 1) & a3) ^ ((a3 << 1) & a2)) & 0xAAAAAAAAu;
213     a5 = (x & 0xCCCCCCCCu) >> 2;
214     x ^= ((a4 << 2) ^ a4) & 0xCCCCCCCCu;
215     a4 = a5 & 0x22222222u;
216     a4 |= a4 >> 1;
217     a4 ^= (a5 << 1) & 0x22222222u;
218     x ^= a4;
219     y = ((x & 0xFEFEFEFEu) >> 1) | ((x & 0x01010101u) << 7);
220     x &= 0x39393939u;
221     x ^= y & 0x3F3F3F3Fu;
222     y = ((y & 0xFCFCFCFCu) >> 2) | ((y & 0x03030303u) << 6);
223     x ^= y & 0x97979797u;
224     y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
225     x ^= y & 0x9B9B9B9Bu;
226     y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
227     x ^= y & 0x3C3C3C3Cu;
228     y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
229     x ^= y & 0xDDDDDDDDu;
230     y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
231     x ^= y & 0x72727272u;
232     x ^= 0x63636363u;
233     *w = x;
234 }
235 
SubLong(u64 * w)236 static void SubLong(u64 *w)
237 {
238     u64 x, y, a1, a2, a3, a4, a5, a6;
239 
240     x = *w;
241     y = ((x & U64(0xFEFEFEFEFEFEFEFE)) >> 1) | ((x & U64(0x0101010101010101)) << 7);
242     x &= U64(0xDDDDDDDDDDDDDDDD);
243     x ^= y & U64(0x5757575757575757);
244     y = ((y & U64(0xFEFEFEFEFEFEFEFE)) >> 1) | ((y & U64(0x0101010101010101)) << 7);
245     x ^= y & U64(0x1C1C1C1C1C1C1C1C);
246     y = ((y & U64(0xFEFEFEFEFEFEFEFE)) >> 1) | ((y & U64(0x0101010101010101)) << 7);
247     x ^= y & U64(0x4A4A4A4A4A4A4A4A);
248     y = ((y & U64(0xFEFEFEFEFEFEFEFE)) >> 1) | ((y & U64(0x0101010101010101)) << 7);
249     x ^= y & U64(0x4242424242424242);
250     y = ((y & U64(0xFEFEFEFEFEFEFEFE)) >> 1) | ((y & U64(0x0101010101010101)) << 7);
251     x ^= y & U64(0x6464646464646464);
252     y = ((y & U64(0xFEFEFEFEFEFEFEFE)) >> 1) | ((y & U64(0x0101010101010101)) << 7);
253     x ^= y & U64(0xE0E0E0E0E0E0E0E0);
254     a1 = x;
255     a1 ^= (x & U64(0xF0F0F0F0F0F0F0F0)) >> 4;
256     a2 = ((x & U64(0xCCCCCCCCCCCCCCCC)) >> 2) | ((x & U64(0x3333333333333333)) << 2);
257     a3 = x & a1;
258     a3 ^= (a3 & U64(0xAAAAAAAAAAAAAAAA)) >> 1;
259     a3 ^= (((x << 1) & a1) ^ ((a1 << 1) & x)) & U64(0xAAAAAAAAAAAAAAAA);
260     a4 = a2 & a1;
261     a4 ^= (a4 & U64(0xAAAAAAAAAAAAAAAA)) >> 1;
262     a4 ^= (((a2 << 1) & a1) ^ ((a1 << 1) & a2)) & U64(0xAAAAAAAAAAAAAAAA);
263     a5 = (a3 & U64(0xCCCCCCCCCCCCCCCC)) >> 2;
264     a3 ^= ((a4 << 2) ^ a4) & U64(0xCCCCCCCCCCCCCCCC);
265     a4 = a5 & U64(0x2222222222222222);
266     a4 |= a4 >> 1;
267     a4 ^= (a5 << 1) & U64(0x2222222222222222);
268     a3 ^= a4;
269     a5 = a3 & U64(0xA0A0A0A0A0A0A0A0);
270     a5 |= a5 >> 1;
271     a5 ^= (a3 << 1) & U64(0xA0A0A0A0A0A0A0A0);
272     a4 = a5 & U64(0xC0C0C0C0C0C0C0C0);
273     a6 = a4 >> 2;
274     a4 ^= (a5 << 2) & U64(0xC0C0C0C0C0C0C0C0);
275     a5 = a6 & U64(0x2020202020202020);
276     a5 |= a5 >> 1;
277     a5 ^= (a6 << 1) & U64(0x2020202020202020);
278     a4 |= a5;
279     a3 ^= a4 >> 4;
280     a3 &= U64(0x0F0F0F0F0F0F0F0F);
281     a2 = a3;
282     a2 ^= (a3 & U64(0x0C0C0C0C0C0C0C0C)) >> 2;
283     a4 = a3 & a2;
284     a4 ^= (a4 & U64(0x0A0A0A0A0A0A0A0A)) >> 1;
285     a4 ^= (((a3 << 1) & a2) ^ ((a2 << 1) & a3)) & U64(0x0A0A0A0A0A0A0A0A);
286     a5 = a4 & U64(0x0808080808080808);
287     a5 |= a5 >> 1;
288     a5 ^= (a4 << 1) & U64(0x0808080808080808);
289     a4 ^= a5 >> 2;
290     a4 &= U64(0x0303030303030303);
291     a4 ^= (a4 & U64(0x0202020202020202)) >> 1;
292     a4 |= a4 << 2;
293     a3 = a2 & a4;
294     a3 ^= (a3 & U64(0x0A0A0A0A0A0A0A0A)) >> 1;
295     a3 ^= (((a2 << 1) & a4) ^ ((a4 << 1) & a2)) & U64(0x0A0A0A0A0A0A0A0A);
296     a3 |= a3 << 4;
297     a2 = ((a1 & U64(0xCCCCCCCCCCCCCCCC)) >> 2) | ((a1 & U64(0x3333333333333333)) << 2);
298     x = a1 & a3;
299     x ^= (x & U64(0xAAAAAAAAAAAAAAAA)) >> 1;
300     x ^= (((a1 << 1) & a3) ^ ((a3 << 1) & a1)) & U64(0xAAAAAAAAAAAAAAAA);
301     a4 = a2 & a3;
302     a4 ^= (a4 & U64(0xAAAAAAAAAAAAAAAA)) >> 1;
303     a4 ^= (((a2 << 1) & a3) ^ ((a3 << 1) & a2)) & U64(0xAAAAAAAAAAAAAAAA);
304     a5 = (x & U64(0xCCCCCCCCCCCCCCCC)) >> 2;
305     x ^= ((a4 << 2) ^ a4) & U64(0xCCCCCCCCCCCCCCCC);
306     a4 = a5 & U64(0x2222222222222222);
307     a4 |= a4 >> 1;
308     a4 ^= (a5 << 1) & U64(0x2222222222222222);
309     x ^= a4;
310     y = ((x & U64(0xFEFEFEFEFEFEFEFE)) >> 1) | ((x & U64(0x0101010101010101)) << 7);
311     x &= U64(0x3939393939393939);
312     x ^= y & U64(0x3F3F3F3F3F3F3F3F);
313     y = ((y & U64(0xFCFCFCFCFCFCFCFC)) >> 2) | ((y & U64(0x0303030303030303)) << 6);
314     x ^= y & U64(0x9797979797979797);
315     y = ((y & U64(0xFEFEFEFEFEFEFEFE)) >> 1) | ((y & U64(0x0101010101010101)) << 7);
316     x ^= y & U64(0x9B9B9B9B9B9B9B9B);
317     y = ((y & U64(0xFEFEFEFEFEFEFEFE)) >> 1) | ((y & U64(0x0101010101010101)) << 7);
318     x ^= y & U64(0x3C3C3C3C3C3C3C3C);
319     y = ((y & U64(0xFEFEFEFEFEFEFEFE)) >> 1) | ((y & U64(0x0101010101010101)) << 7);
320     x ^= y & U64(0xDDDDDDDDDDDDDDDD);
321     y = ((y & U64(0xFEFEFEFEFEFEFEFE)) >> 1) | ((y & U64(0x0101010101010101)) << 7);
322     x ^= y & U64(0x7272727272727272);
323     x ^= U64(0x6363636363636363);
324     *w = x;
325 }
326 
327 /*
328  * This computes w := (S^-1 * (w + c))^-1
329  */
InvSubLong(u64 * w)330 static void InvSubLong(u64 *w)
331 {
332     u64 x, y, a1, a2, a3, a4, a5, a6;
333 
334     x = *w;
335     x ^= U64(0x6363636363636363);
336     y = ((x & U64(0xFEFEFEFEFEFEFEFE)) >> 1) | ((x & U64(0x0101010101010101)) << 7);
337     x &= U64(0xFDFDFDFDFDFDFDFD);
338     x ^= y & U64(0x5E5E5E5E5E5E5E5E);
339     y = ((y & U64(0xFEFEFEFEFEFEFEFE)) >> 1) | ((y & U64(0x0101010101010101)) << 7);
340     x ^= y & U64(0xF3F3F3F3F3F3F3F3);
341     y = ((y & U64(0xFEFEFEFEFEFEFEFE)) >> 1) | ((y & U64(0x0101010101010101)) << 7);
342     x ^= y & U64(0xF5F5F5F5F5F5F5F5);
343     y = ((y & U64(0xFEFEFEFEFEFEFEFE)) >> 1) | ((y & U64(0x0101010101010101)) << 7);
344     x ^= y & U64(0x7878787878787878);
345     y = ((y & U64(0xFEFEFEFEFEFEFEFE)) >> 1) | ((y & U64(0x0101010101010101)) << 7);
346     x ^= y & U64(0x7777777777777777);
347     y = ((y & U64(0xFEFEFEFEFEFEFEFE)) >> 1) | ((y & U64(0x0101010101010101)) << 7);
348     x ^= y & U64(0x1515151515151515);
349     y = ((y & U64(0xFEFEFEFEFEFEFEFE)) >> 1) | ((y & U64(0x0101010101010101)) << 7);
350     x ^= y & U64(0xA5A5A5A5A5A5A5A5);
351     a1 = x;
352     a1 ^= (x & U64(0xF0F0F0F0F0F0F0F0)) >> 4;
353     a2 = ((x & U64(0xCCCCCCCCCCCCCCCC)) >> 2) | ((x & U64(0x3333333333333333)) << 2);
354     a3 = x & a1;
355     a3 ^= (a3 & U64(0xAAAAAAAAAAAAAAAA)) >> 1;
356     a3 ^= (((x << 1) & a1) ^ ((a1 << 1) & x)) & U64(0xAAAAAAAAAAAAAAAA);
357     a4 = a2 & a1;
358     a4 ^= (a4 & U64(0xAAAAAAAAAAAAAAAA)) >> 1;
359     a4 ^= (((a2 << 1) & a1) ^ ((a1 << 1) & a2)) & U64(0xAAAAAAAAAAAAAAAA);
360     a5 = (a3 & U64(0xCCCCCCCCCCCCCCCC)) >> 2;
361     a3 ^= ((a4 << 2) ^ a4) & U64(0xCCCCCCCCCCCCCCCC);
362     a4 = a5 & U64(0x2222222222222222);
363     a4 |= a4 >> 1;
364     a4 ^= (a5 << 1) & U64(0x2222222222222222);
365     a3 ^= a4;
366     a5 = a3 & U64(0xA0A0A0A0A0A0A0A0);
367     a5 |= a5 >> 1;
368     a5 ^= (a3 << 1) & U64(0xA0A0A0A0A0A0A0A0);
369     a4 = a5 & U64(0xC0C0C0C0C0C0C0C0);
370     a6 = a4 >> 2;
371     a4 ^= (a5 << 2) & U64(0xC0C0C0C0C0C0C0C0);
372     a5 = a6 & U64(0x2020202020202020);
373     a5 |= a5 >> 1;
374     a5 ^= (a6 << 1) & U64(0x2020202020202020);
375     a4 |= a5;
376     a3 ^= a4 >> 4;
377     a3 &= U64(0x0F0F0F0F0F0F0F0F);
378     a2 = a3;
379     a2 ^= (a3 & U64(0x0C0C0C0C0C0C0C0C)) >> 2;
380     a4 = a3 & a2;
381     a4 ^= (a4 & U64(0x0A0A0A0A0A0A0A0A)) >> 1;
382     a4 ^= (((a3 << 1) & a2) ^ ((a2 << 1) & a3)) & U64(0x0A0A0A0A0A0A0A0A);
383     a5 = a4 & U64(0x0808080808080808);
384     a5 |= a5 >> 1;
385     a5 ^= (a4 << 1) & U64(0x0808080808080808);
386     a4 ^= a5 >> 2;
387     a4 &= U64(0x0303030303030303);
388     a4 ^= (a4 & U64(0x0202020202020202)) >> 1;
389     a4 |= a4 << 2;
390     a3 = a2 & a4;
391     a3 ^= (a3 & U64(0x0A0A0A0A0A0A0A0A)) >> 1;
392     a3 ^= (((a2 << 1) & a4) ^ ((a4 << 1) & a2)) & U64(0x0A0A0A0A0A0A0A0A);
393     a3 |= a3 << 4;
394     a2 = ((a1 & U64(0xCCCCCCCCCCCCCCCC)) >> 2) | ((a1 & U64(0x3333333333333333)) << 2);
395     x = a1 & a3;
396     x ^= (x & U64(0xAAAAAAAAAAAAAAAA)) >> 1;
397     x ^= (((a1 << 1) & a3) ^ ((a3 << 1) & a1)) & U64(0xAAAAAAAAAAAAAAAA);
398     a4 = a2 & a3;
399     a4 ^= (a4 & U64(0xAAAAAAAAAAAAAAAA)) >> 1;
400     a4 ^= (((a2 << 1) & a3) ^ ((a3 << 1) & a2)) & U64(0xAAAAAAAAAAAAAAAA);
401     a5 = (x & U64(0xCCCCCCCCCCCCCCCC)) >> 2;
402     x ^= ((a4 << 2) ^ a4) & U64(0xCCCCCCCCCCCCCCCC);
403     a4 = a5 & U64(0x2222222222222222);
404     a4 |= a4 >> 1;
405     a4 ^= (a5 << 1) & U64(0x2222222222222222);
406     x ^= a4;
407     y = ((x & U64(0xFEFEFEFEFEFEFEFE)) >> 1) | ((x & U64(0x0101010101010101)) << 7);
408     x &= U64(0xB5B5B5B5B5B5B5B5);
409     x ^= y & U64(0x4040404040404040);
410     y = ((y & U64(0xFEFEFEFEFEFEFEFE)) >> 1) | ((y & U64(0x0101010101010101)) << 7);
411     x ^= y & U64(0x8080808080808080);
412     y = ((y & U64(0xFEFEFEFEFEFEFEFE)) >> 1) | ((y & U64(0x0101010101010101)) << 7);
413     x ^= y & U64(0x1616161616161616);
414     y = ((y & U64(0xFEFEFEFEFEFEFEFE)) >> 1) | ((y & U64(0x0101010101010101)) << 7);
415     x ^= y & U64(0xEBEBEBEBEBEBEBEB);
416     y = ((y & U64(0xFEFEFEFEFEFEFEFE)) >> 1) | ((y & U64(0x0101010101010101)) << 7);
417     x ^= y & U64(0x9797979797979797);
418     y = ((y & U64(0xFEFEFEFEFEFEFEFE)) >> 1) | ((y & U64(0x0101010101010101)) << 7);
419     x ^= y & U64(0xFBFBFBFBFBFBFBFB);
420     y = ((y & U64(0xFEFEFEFEFEFEFEFE)) >> 1) | ((y & U64(0x0101010101010101)) << 7);
421     x ^= y & U64(0x7D7D7D7D7D7D7D7D);
422     *w = x;
423 }
424 
ShiftRows(u64 * state)425 static void ShiftRows(u64 *state)
426 {
427     unsigned char s[4];
428     unsigned char *s0;
429     int r;
430 
431     s0 = (unsigned char *)state;
432     for (r = 0; r < 4; r++) {
433         s[0] = s0[0*4 + r];
434         s[1] = s0[1*4 + r];
435         s[2] = s0[2*4 + r];
436         s[3] = s0[3*4 + r];
437         s0[0*4 + r] = s[(r+0) % 4];
438         s0[1*4 + r] = s[(r+1) % 4];
439         s0[2*4 + r] = s[(r+2) % 4];
440         s0[3*4 + r] = s[(r+3) % 4];
441     }
442 }
443 
InvShiftRows(u64 * state)444 static void InvShiftRows(u64 *state)
445 {
446     unsigned char s[4];
447     unsigned char *s0;
448     int r;
449 
450     s0 = (unsigned char *)state;
451     for (r = 0; r < 4; r++) {
452         s[0] = s0[0*4 + r];
453         s[1] = s0[1*4 + r];
454         s[2] = s0[2*4 + r];
455         s[3] = s0[3*4 + r];
456         s0[0*4 + r] = s[(4-r) % 4];
457         s0[1*4 + r] = s[(5-r) % 4];
458         s0[2*4 + r] = s[(6-r) % 4];
459         s0[3*4 + r] = s[(7-r) % 4];
460     }
461 }
462 
MixColumns(u64 * state)463 static void MixColumns(u64 *state)
464 {
465     uni s1;
466     uni s;
467     int c;
468 
469     for (c = 0; c < 2; c++) {
470         s1.d = state[c];
471         s.d = s1.d;
472         s.d ^= ((s.d & U64(0xFFFF0000FFFF0000)) >> 16)
473                | ((s.d & U64(0x0000FFFF0000FFFF)) << 16);
474         s.d ^= ((s.d & U64(0xFF00FF00FF00FF00)) >> 8)
475                | ((s.d & U64(0x00FF00FF00FF00FF)) << 8);
476         s.d ^= s1.d;
477         XtimeLong(&s1.d);
478         s.d ^= s1.d;
479         s.b[0] ^= s1.b[1];
480         s.b[1] ^= s1.b[2];
481         s.b[2] ^= s1.b[3];
482         s.b[3] ^= s1.b[0];
483         s.b[4] ^= s1.b[5];
484         s.b[5] ^= s1.b[6];
485         s.b[6] ^= s1.b[7];
486         s.b[7] ^= s1.b[4];
487         state[c] = s.d;
488     }
489 }
490 
InvMixColumns(u64 * state)491 static void InvMixColumns(u64 *state)
492 {
493     uni s1;
494     uni s;
495     int c;
496 
497     for (c = 0; c < 2; c++) {
498         s1.d = state[c];
499         s.d = s1.d;
500         s.d ^= ((s.d & U64(0xFFFF0000FFFF0000)) >> 16)
501                | ((s.d & U64(0x0000FFFF0000FFFF)) << 16);
502         s.d ^= ((s.d & U64(0xFF00FF00FF00FF00)) >> 8)
503                | ((s.d & U64(0x00FF00FF00FF00FF)) << 8);
504         s.d ^= s1.d;
505         XtimeLong(&s1.d);
506         s.d ^= s1.d;
507         s.b[0] ^= s1.b[1];
508         s.b[1] ^= s1.b[2];
509         s.b[2] ^= s1.b[3];
510         s.b[3] ^= s1.b[0];
511         s.b[4] ^= s1.b[5];
512         s.b[5] ^= s1.b[6];
513         s.b[6] ^= s1.b[7];
514         s.b[7] ^= s1.b[4];
515         XtimeLong(&s1.d);
516         s1.d ^= ((s1.d & U64(0xFFFF0000FFFF0000)) >> 16)
517                 | ((s1.d & U64(0x0000FFFF0000FFFF)) << 16);
518         s.d ^= s1.d;
519         XtimeLong(&s1.d);
520         s1.d ^= ((s1.d & U64(0xFF00FF00FF00FF00)) >> 8)
521                 | ((s1.d & U64(0x00FF00FF00FF00FF)) << 8);
522         s.d ^= s1.d;
523         state[c] = s.d;
524     }
525 }
526 
AddRoundKey(u64 * state,const u64 * w)527 static void AddRoundKey(u64 *state, const u64 *w)
528 {
529     state[0] ^= w[0];
530     state[1] ^= w[1];
531 }
532 
Cipher(const unsigned char * in,unsigned char * out,const u64 * w,int nr)533 static void Cipher(const unsigned char *in, unsigned char *out,
534                    const u64 *w, int nr)
535 {
536     u64 state[2];
537     int i;
538 
539     memcpy(state, in, 16);
540 
541     AddRoundKey(state, w);
542 
543     for (i = 1; i < nr; i++) {
544         SubLong(&state[0]);
545         SubLong(&state[1]);
546         ShiftRows(state);
547         MixColumns(state);
548         AddRoundKey(state, w + i*2);
549     }
550 
551     SubLong(&state[0]);
552     SubLong(&state[1]);
553     ShiftRows(state);
554     AddRoundKey(state, w + nr*2);
555 
556     memcpy(out, state, 16);
557 }
558 
InvCipher(const unsigned char * in,unsigned char * out,const u64 * w,int nr)559 static void InvCipher(const unsigned char *in, unsigned char *out,
560                       const u64 *w, int nr)
561 
562 {
563     u64 state[2];
564     int i;
565 
566     memcpy(state, in, 16);
567 
568     AddRoundKey(state, w + nr*2);
569 
570     for (i = nr - 1; i > 0; i--) {
571         InvShiftRows(state);
572         InvSubLong(&state[0]);
573         InvSubLong(&state[1]);
574         AddRoundKey(state, w + i*2);
575         InvMixColumns(state);
576     }
577 
578     InvShiftRows(state);
579     InvSubLong(&state[0]);
580     InvSubLong(&state[1]);
581     AddRoundKey(state, w);
582 
583     memcpy(out, state, 16);
584 }
585 
RotWord(u32 * x)586 static void RotWord(u32 *x)
587 {
588     unsigned char *w0;
589     unsigned char tmp;
590 
591     w0 = (unsigned char *)x;
592     tmp = w0[0];
593     w0[0] = w0[1];
594     w0[1] = w0[2];
595     w0[2] = w0[3];
596     w0[3] = tmp;
597 }
598 
KeyExpansion(const unsigned char * key,u64 * w,int nr,int nk)599 static void KeyExpansion(const unsigned char *key, u64 *w,
600                          int nr, int nk)
601 {
602     u32 rcon;
603     uni prev;
604     u32 temp;
605     int i, n;
606 
607     memcpy(w, key, nk*4);
608     memcpy(&rcon, "\1\0\0\0", 4);
609     n = nk/2;
610     prev.d = w[n-1];
611     for (i = n; i < (nr+1)*2; i++) {
612         temp = prev.w[1];
613         if (i % n == 0) {
614             RotWord(&temp);
615             SubWord(&temp);
616             temp ^= rcon;
617             XtimeWord(&rcon);
618         } else if (nk > 6 && i % n == 2) {
619             SubWord(&temp);
620         }
621         prev.d = w[i-n];
622         prev.w[0] ^= temp;
623         prev.w[1] ^= prev.w[0];
624         w[i] = prev.d;
625     }
626 }
627 
628 /**
629  * Expand the cipher key into the encryption key schedule.
630  */
AES_set_encrypt_key(const unsigned char * userKey,const int bits,AES_KEY * key)631 int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
632                         AES_KEY *key)
633 {
634     u64 *rk;
635 
636     if (!userKey || !key)
637         return -1;
638     if (bits != 128 && bits != 192 && bits != 256)
639         return -2;
640 
641     rk = (u64*)key->rd_key;
642 
643     if (bits == 128)
644         key->rounds = 10;
645     else if (bits == 192)
646         key->rounds = 12;
647     else
648         key->rounds = 14;
649 
650     KeyExpansion(userKey, rk, key->rounds, bits/32);
651     return 0;
652 }
653 
654 /**
655  * Expand the cipher key into the decryption key schedule.
656  */
AES_set_decrypt_key(const unsigned char * userKey,const int bits,AES_KEY * key)657 int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
658                         AES_KEY *key)
659 {
660     return AES_set_encrypt_key(userKey, bits, key);
661 }
662 
663 /*
664  * Encrypt a single block
665  * in and out can overlap
666  */
AES_encrypt(const unsigned char * in,unsigned char * out,const AES_KEY * key)667 void AES_encrypt(const unsigned char *in, unsigned char *out,
668                  const AES_KEY *key)
669 {
670     const u64 *rk;
671 
672     assert(in && out && key);
673     rk = (u64*)key->rd_key;
674 
675     Cipher(in, out, rk, key->rounds);
676 }
677 
678 /*
679  * Decrypt a single block
680  * in and out can overlap
681  */
AES_decrypt(const unsigned char * in,unsigned char * out,const AES_KEY * key)682 void AES_decrypt(const unsigned char *in, unsigned char *out,
683                  const AES_KEY *key)
684 {
685     const u64 *rk;
686 
687     assert(in && out && key);
688     rk = (u64*)key->rd_key;
689 
690     InvCipher(in, out, rk, key->rounds);
691 }
692 #elif !defined(AES_ASM)
693 /*-
694 Te0[x] = S [x].[02, 01, 01, 03];
695 Te1[x] = S [x].[03, 02, 01, 01];
696 Te2[x] = S [x].[01, 03, 02, 01];
697 Te3[x] = S [x].[01, 01, 03, 02];
698 
699 Td0[x] = Si[x].[0e, 09, 0d, 0b];
700 Td1[x] = Si[x].[0b, 0e, 09, 0d];
701 Td2[x] = Si[x].[0d, 0b, 0e, 09];
702 Td3[x] = Si[x].[09, 0d, 0b, 0e];
703 Td4[x] = Si[x].[01];
704 */
705 
706 static const u32 Te0[256] = {
707     0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
708     0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
709     0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
710     0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU,
711     0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U,
712     0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU,
713     0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU,
714     0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU,
715     0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU,
716     0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU,
717     0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U,
718     0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU,
719     0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU,
720     0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U,
721     0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU,
722     0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU,
723     0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU,
724     0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU,
725     0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU,
726     0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U,
727     0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU,
728     0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU,
729     0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU,
730     0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU,
731     0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U,
732     0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U,
733     0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U,
734     0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U,
735     0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU,
736     0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U,
737     0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U,
738     0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU,
739     0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU,
740     0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U,
741     0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U,
742     0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U,
743     0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU,
744     0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U,
745     0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU,
746     0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U,
747     0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU,
748     0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U,
749     0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U,
750     0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU,
751     0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U,
752     0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U,
753     0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U,
754     0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U,
755     0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U,
756     0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U,
757     0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U,
758     0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U,
759     0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU,
760     0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U,
761     0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U,
762     0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U,
763     0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U,
764     0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U,
765     0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U,
766     0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU,
767     0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U,
768     0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U,
769     0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
770     0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU,
771 };
772 static const u32 Te1[256] = {
773     0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
774     0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
775     0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU,
776     0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U,
777     0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU,
778     0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U,
779     0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU,
780     0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U,
781     0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U,
782     0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU,
783     0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U,
784     0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U,
785     0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U,
786     0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU,
787     0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U,
788     0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U,
789     0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU,
790     0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U,
791     0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U,
792     0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U,
793     0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU,
794     0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU,
795     0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U,
796     0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU,
797     0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU,
798     0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U,
799     0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU,
800     0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U,
801     0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU,
802     0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U,
803     0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U,
804     0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U,
805     0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU,
806     0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U,
807     0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU,
808     0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U,
809     0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU,
810     0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U,
811     0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U,
812     0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU,
813     0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU,
814     0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU,
815     0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U,
816     0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U,
817     0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU,
818     0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U,
819     0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU,
820     0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U,
821     0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU,
822     0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U,
823     0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU,
824     0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU,
825     0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U,
826     0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU,
827     0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U,
828     0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU,
829     0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U,
830     0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U,
831     0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U,
832     0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU,
833     0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU,
834     0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U,
835     0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU,
836     0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U,
837 };
838 static const u32 Te2[256] = {
839     0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU,
840     0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
841     0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU,
842     0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U,
843     0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU,
844     0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U,
845     0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU,
846     0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U,
847     0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U,
848     0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU,
849     0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U,
850     0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U,
851     0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U,
852     0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU,
853     0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U,
854     0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U,
855     0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU,
856     0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U,
857     0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U,
858     0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U,
859     0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU,
860     0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU,
861     0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U,
862     0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU,
863     0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU,
864     0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U,
865     0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU,
866     0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U,
867     0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU,
868     0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U,
869     0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U,
870     0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U,
871     0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU,
872     0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U,
873     0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU,
874     0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U,
875     0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU,
876     0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U,
877     0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U,
878     0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU,
879     0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU,
880     0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU,
881     0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U,
882     0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U,
883     0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU,
884     0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U,
885     0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU,
886     0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U,
887     0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU,
888     0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U,
889     0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU,
890     0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU,
891     0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U,
892     0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU,
893     0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U,
894     0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU,
895     0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U,
896     0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U,
897     0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U,
898     0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU,
899     0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU,
900     0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U,
901     0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU,
902     0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U,
903 };
904 static const u32 Te3[256] = {
905     0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
906     0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
907     0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U,
908     0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU,
909     0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU,
910     0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU,
911     0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U,
912     0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU,
913     0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU,
914     0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U,
915     0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U,
916     0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU,
917     0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU,
918     0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU,
919     0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU,
920     0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU,
921     0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U,
922     0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU,
923     0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU,
924     0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U,
925     0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U,
926     0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U,
927     0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U,
928     0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U,
929     0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU,
930     0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U,
931     0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU,
932     0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU,
933     0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U,
934     0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U,
935     0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U,
936     0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU,
937     0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U,
938     0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU,
939     0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU,
940     0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U,
941     0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U,
942     0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU,
943     0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U,
944     0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU,
945     0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U,
946     0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U,
947     0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U,
948     0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U,
949     0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU,
950     0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U,
951     0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU,
952     0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U,
953     0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU,
954     0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U,
955     0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU,
956     0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU,
957     0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU,
958     0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU,
959     0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U,
960     0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U,
961     0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U,
962     0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U,
963     0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U,
964     0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U,
965     0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU,
966     0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U,
967     0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
968     0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU,
969 };
970 
971 static const u32 Td0[256] = {
972     0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
973     0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
974     0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
975     0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU,
976     0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U,
977     0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U,
978     0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU,
979     0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U,
980     0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU,
981     0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U,
982     0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U,
983     0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U,
984     0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U,
985     0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU,
986     0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U,
987     0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU,
988     0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U,
989     0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU,
990     0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U,
991     0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U,
992     0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U,
993     0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU,
994     0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U,
995     0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU,
996     0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U,
997     0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU,
998     0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U,
999     0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU,
1000     0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU,
1001     0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U,
1002     0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU,
1003     0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U,
1004     0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU,
1005     0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U,
1006     0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U,
1007     0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U,
1008     0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU,
1009     0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U,
1010     0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U,
1011     0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU,
1012     0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U,
1013     0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U,
1014     0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U,
1015     0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U,
1016     0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U,
1017     0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU,
1018     0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U,
1019     0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U,
1020     0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U,
1021     0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U,
1022     0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U,
1023     0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU,
1024     0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU,
1025     0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU,
1026     0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU,
1027     0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U,
1028     0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U,
1029     0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU,
1030     0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU,
1031     0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U,
1032     0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU,
1033     0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U,
1034     0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
1035     0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U,
1036 };
1037 static const u32 Td1[256] = {
1038     0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
1039     0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
1040     0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU,
1041     0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U,
1042     0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U,
1043     0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U,
1044     0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U,
1045     0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U,
1046     0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U,
1047     0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU,
1048     0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU,
1049     0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU,
1050     0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U,
1051     0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU,
1052     0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U,
1053     0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U,
1054     0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U,
1055     0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU,
1056     0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU,
1057     0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U,
1058     0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU,
1059     0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U,
1060     0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU,
1061     0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU,
1062     0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U,
1063     0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U,
1064     0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U,
1065     0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU,
1066     0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U,
1067     0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU,
1068     0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U,
1069     0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U,
1070     0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U,
1071     0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU,
1072     0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U,
1073     0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U,
1074     0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U,
1075     0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U,
1076     0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U,
1077     0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U,
1078     0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU,
1079     0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU,
1080     0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U,
1081     0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU,
1082     0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U,
1083     0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU,
1084     0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU,
1085     0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U,
1086     0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU,
1087     0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U,
1088     0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U,
1089     0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U,
1090     0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U,
1091     0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U,
1092     0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U,
1093     0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U,
1094     0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU,
1095     0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U,
1096     0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U,
1097     0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU,
1098     0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U,
1099     0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U,
1100     0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U,
1101     0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U,
1102 };
1103 static const u32 Td2[256] = {
1104     0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U,
1105     0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
1106     0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U,
1107     0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U,
1108     0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU,
1109     0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U,
1110     0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U,
1111     0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U,
1112     0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U,
1113     0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU,
1114     0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U,
1115     0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U,
1116     0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU,
1117     0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U,
1118     0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U,
1119     0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U,
1120     0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U,
1121     0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U,
1122     0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U,
1123     0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU,
1124     0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U,
1125     0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U,
1126     0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U,
1127     0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U,
1128     0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U,
1129     0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU,
1130     0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU,
1131     0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U,
1132     0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU,
1133     0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U,
1134     0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU,
1135     0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU,
1136     0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU,
1137     0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU,
1138     0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U,
1139     0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U,
1140     0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U,
1141     0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U,
1142     0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U,
1143     0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U,
1144     0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U,
1145     0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU,
1146     0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU,
1147     0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U,
1148     0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U,
1149     0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU,
1150     0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU,
1151     0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U,
1152     0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U,
1153     0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U,
1154     0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U,
1155     0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U,
1156     0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U,
1157     0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U,
1158     0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU,
1159     0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U,
1160     0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U,
1161     0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U,
1162     0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U,
1163     0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U,
1164     0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U,
1165     0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU,
1166     0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U,
1167     0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U,
1168 };
1169 static const u32 Td3[256] = {
1170     0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU,
1171     0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
1172     0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U,
1173     0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U,
1174     0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU,
1175     0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU,
1176     0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U,
1177     0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU,
1178     0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U,
1179     0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU,
1180     0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U,
1181     0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U,
1182     0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U,
1183     0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U,
1184     0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U,
1185     0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU,
1186     0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU,
1187     0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U,
1188     0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U,
1189     0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU,
1190     0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU,
1191     0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U,
1192     0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U,
1193     0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U,
1194     0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U,
1195     0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU,
1196     0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U,
1197     0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U,
1198     0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU,
1199     0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU,
1200     0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U,
1201     0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U,
1202     0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U,
1203     0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU,
1204     0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U,
1205     0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U,
1206     0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U,
1207     0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U,
1208     0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U,
1209     0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U,
1210     0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U,
1211     0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU,
1212     0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U,
1213     0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U,
1214     0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU,
1215     0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU,
1216     0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U,
1217     0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU,
1218     0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U,
1219     0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U,
1220     0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U,
1221     0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U,
1222     0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U,
1223     0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U,
1224     0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU,
1225     0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU,
1226     0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU,
1227     0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU,
1228     0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U,
1229     0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U,
1230     0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U,
1231     0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU,
1232     0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
1233     0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
1234 };
1235 static const u8 Td4[256] = {
1236     0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U,
1237     0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU,
1238     0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U,
1239     0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU,
1240     0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU,
1241     0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU,
1242     0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U,
1243     0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U,
1244     0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U,
1245     0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U,
1246     0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU,
1247     0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U,
1248     0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU,
1249     0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U,
1250     0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U,
1251     0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU,
1252     0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU,
1253     0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U,
1254     0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U,
1255     0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU,
1256     0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U,
1257     0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU,
1258     0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U,
1259     0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U,
1260     0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U,
1261     0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU,
1262     0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU,
1263     0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU,
1264     0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U,
1265     0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U,
1266     0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U,
1267     0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU,
1268 };
1269 static const u32 rcon[] = {
1270     0x01000000, 0x02000000, 0x04000000, 0x08000000,
1271     0x10000000, 0x20000000, 0x40000000, 0x80000000,
1272     0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
1273 };
1274 
1275 /**
1276  * Expand the cipher key into the encryption key schedule.
1277  */
AES_set_encrypt_key(const unsigned char * userKey,const int bits,AES_KEY * key)1278 int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
1279                         AES_KEY *key)
1280 {
1281 
1282     u32 *rk;
1283     int i = 0;
1284     u32 temp;
1285 
1286     if (!userKey || !key)
1287         return -1;
1288     if (bits != 128 && bits != 192 && bits != 256)
1289         return -2;
1290 
1291     rk = key->rd_key;
1292 
1293     if (bits == 128)
1294         key->rounds = 10;
1295     else if (bits == 192)
1296         key->rounds = 12;
1297     else
1298         key->rounds = 14;
1299 
1300     rk[0] = GETU32(userKey     );
1301     rk[1] = GETU32(userKey +  4);
1302     rk[2] = GETU32(userKey +  8);
1303     rk[3] = GETU32(userKey + 12);
1304     if (bits == 128) {
1305         while (1) {
1306             temp  = rk[3];
1307             rk[4] = rk[0] ^
1308                 (Te2[(temp >> 16) & 0xff] & 0xff000000) ^
1309                 (Te3[(temp >>  8) & 0xff] & 0x00ff0000) ^
1310                 (Te0[(temp      ) & 0xff] & 0x0000ff00) ^
1311                 (Te1[(temp >> 24)       ] & 0x000000ff) ^
1312                 rcon[i];
1313             rk[5] = rk[1] ^ rk[4];
1314             rk[6] = rk[2] ^ rk[5];
1315             rk[7] = rk[3] ^ rk[6];
1316             if (++i == 10) {
1317                 return 0;
1318             }
1319             rk += 4;
1320         }
1321     }
1322     rk[4] = GETU32(userKey + 16);
1323     rk[5] = GETU32(userKey + 20);
1324     if (bits == 192) {
1325         while (1) {
1326             temp = rk[ 5];
1327             rk[ 6] = rk[ 0] ^
1328                 (Te2[(temp >> 16) & 0xff] & 0xff000000) ^
1329                 (Te3[(temp >>  8) & 0xff] & 0x00ff0000) ^
1330                 (Te0[(temp      ) & 0xff] & 0x0000ff00) ^
1331                 (Te1[(temp >> 24)       ] & 0x000000ff) ^
1332                 rcon[i];
1333             rk[ 7] = rk[ 1] ^ rk[ 6];
1334             rk[ 8] = rk[ 2] ^ rk[ 7];
1335             rk[ 9] = rk[ 3] ^ rk[ 8];
1336             if (++i == 8) {
1337                 return 0;
1338             }
1339             rk[10] = rk[ 4] ^ rk[ 9];
1340             rk[11] = rk[ 5] ^ rk[10];
1341             rk += 6;
1342         }
1343     }
1344     rk[6] = GETU32(userKey + 24);
1345     rk[7] = GETU32(userKey + 28);
1346     if (bits == 256) {
1347         while (1) {
1348             temp = rk[ 7];
1349             rk[ 8] = rk[ 0] ^
1350                 (Te2[(temp >> 16) & 0xff] & 0xff000000) ^
1351                 (Te3[(temp >>  8) & 0xff] & 0x00ff0000) ^
1352                 (Te0[(temp      ) & 0xff] & 0x0000ff00) ^
1353                 (Te1[(temp >> 24)       ] & 0x000000ff) ^
1354                 rcon[i];
1355             rk[ 9] = rk[ 1] ^ rk[ 8];
1356             rk[10] = rk[ 2] ^ rk[ 9];
1357             rk[11] = rk[ 3] ^ rk[10];
1358             if (++i == 7) {
1359                 return 0;
1360             }
1361             temp = rk[11];
1362             rk[12] = rk[ 4] ^
1363                 (Te2[(temp >> 24)       ] & 0xff000000) ^
1364                 (Te3[(temp >> 16) & 0xff] & 0x00ff0000) ^
1365                 (Te0[(temp >>  8) & 0xff] & 0x0000ff00) ^
1366                 (Te1[(temp      ) & 0xff] & 0x000000ff);
1367             rk[13] = rk[ 5] ^ rk[12];
1368             rk[14] = rk[ 6] ^ rk[13];
1369             rk[15] = rk[ 7] ^ rk[14];
1370 
1371             rk += 8;
1372             }
1373     }
1374     return 0;
1375 }
1376 
1377 /**
1378  * Expand the cipher key into the decryption key schedule.
1379  */
AES_set_decrypt_key(const unsigned char * userKey,const int bits,AES_KEY * key)1380 int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
1381                         AES_KEY *key)
1382 {
1383 
1384     u32 *rk;
1385     int i, j, status;
1386     u32 temp;
1387 
1388     /* first, start with an encryption schedule */
1389     status = AES_set_encrypt_key(userKey, bits, key);
1390     if (status < 0)
1391         return status;
1392 
1393     rk = key->rd_key;
1394 
1395     /* invert the order of the round keys: */
1396     for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
1397         temp = rk[i    ]; rk[i    ] = rk[j    ]; rk[j    ] = temp;
1398         temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
1399         temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
1400         temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
1401     }
1402     /* apply the inverse MixColumn transform to all round keys but the first and the last: */
1403     for (i = 1; i < (key->rounds); i++) {
1404         rk += 4;
1405         rk[0] =
1406             Td0[Te1[(rk[0] >> 24)       ] & 0xff] ^
1407             Td1[Te1[(rk[0] >> 16) & 0xff] & 0xff] ^
1408             Td2[Te1[(rk[0] >>  8) & 0xff] & 0xff] ^
1409             Td3[Te1[(rk[0]      ) & 0xff] & 0xff];
1410         rk[1] =
1411             Td0[Te1[(rk[1] >> 24)       ] & 0xff] ^
1412             Td1[Te1[(rk[1] >> 16) & 0xff] & 0xff] ^
1413             Td2[Te1[(rk[1] >>  8) & 0xff] & 0xff] ^
1414             Td3[Te1[(rk[1]      ) & 0xff] & 0xff];
1415         rk[2] =
1416             Td0[Te1[(rk[2] >> 24)       ] & 0xff] ^
1417             Td1[Te1[(rk[2] >> 16) & 0xff] & 0xff] ^
1418             Td2[Te1[(rk[2] >>  8) & 0xff] & 0xff] ^
1419             Td3[Te1[(rk[2]      ) & 0xff] & 0xff];
1420         rk[3] =
1421             Td0[Te1[(rk[3] >> 24)       ] & 0xff] ^
1422             Td1[Te1[(rk[3] >> 16) & 0xff] & 0xff] ^
1423             Td2[Te1[(rk[3] >>  8) & 0xff] & 0xff] ^
1424             Td3[Te1[(rk[3]      ) & 0xff] & 0xff];
1425     }
1426     return 0;
1427 }
1428 
1429 /*
1430  * Encrypt a single block
1431  * in and out can overlap
1432  */
AES_encrypt(const unsigned char * in,unsigned char * out,const AES_KEY * key)1433 void AES_encrypt(const unsigned char *in, unsigned char *out,
1434                  const AES_KEY *key) {
1435 
1436     const u32 *rk;
1437     u32 s0, s1, s2, s3, t0, t1, t2, t3;
1438 #ifndef FULL_UNROLL
1439     int r;
1440 #endif /* ?FULL_UNROLL */
1441 
1442     assert(in && out && key);
1443     rk = key->rd_key;
1444 
1445     /*
1446      * map byte array block to cipher state
1447      * and add initial round key:
1448      */
1449     s0 = GETU32(in     ) ^ rk[0];
1450     s1 = GETU32(in +  4) ^ rk[1];
1451     s2 = GETU32(in +  8) ^ rk[2];
1452     s3 = GETU32(in + 12) ^ rk[3];
1453 #ifdef FULL_UNROLL
1454     /* round 1: */
1455     t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4];
1456     t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5];
1457     t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6];
1458     t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7];
1459     /* round 2: */
1460     s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 8];
1461     s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 9];
1462     s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[10];
1463     s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[11];
1464     /* round 3: */
1465     t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[12];
1466     t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[13];
1467     t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[14];
1468     t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[15];
1469     /* round 4: */
1470     s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[16];
1471     s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[17];
1472     s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[18];
1473     s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[19];
1474     /* round 5: */
1475     t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[20];
1476     t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[21];
1477     t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[22];
1478     t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[23];
1479     /* round 6: */
1480     s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[24];
1481     s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[25];
1482     s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[26];
1483     s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[27];
1484     /* round 7: */
1485     t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[28];
1486     t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[29];
1487     t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[30];
1488     t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[31];
1489     /* round 8: */
1490     s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[32];
1491     s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[33];
1492     s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[34];
1493     s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[35];
1494     /* round 9: */
1495     t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[36];
1496     t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[37];
1497     t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[38];
1498     t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[39];
1499     if (key->rounds > 10) {
1500         /* round 10: */
1501         s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[40];
1502         s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[41];
1503         s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[42];
1504         s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[43];
1505         /* round 11: */
1506         t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[44];
1507         t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[45];
1508         t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[46];
1509         t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[47];
1510         if (key->rounds > 12) {
1511             /* round 12: */
1512             s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[48];
1513             s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[49];
1514             s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[50];
1515             s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[51];
1516             /* round 13: */
1517             t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[52];
1518             t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[53];
1519             t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[54];
1520             t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[55];
1521         }
1522     }
1523     rk += key->rounds << 2;
1524 #else  /* !FULL_UNROLL */
1525     /*
1526      * Nr - 1 full rounds:
1527      */
1528     r = key->rounds >> 1;
1529     for (;;) {
1530         t0 =
1531             Te0[(s0 >> 24)       ] ^
1532             Te1[(s1 >> 16) & 0xff] ^
1533             Te2[(s2 >>  8) & 0xff] ^
1534             Te3[(s3      ) & 0xff] ^
1535             rk[4];
1536         t1 =
1537             Te0[(s1 >> 24)       ] ^
1538             Te1[(s2 >> 16) & 0xff] ^
1539             Te2[(s3 >>  8) & 0xff] ^
1540             Te3[(s0      ) & 0xff] ^
1541             rk[5];
1542         t2 =
1543             Te0[(s2 >> 24)       ] ^
1544             Te1[(s3 >> 16) & 0xff] ^
1545             Te2[(s0 >>  8) & 0xff] ^
1546             Te3[(s1      ) & 0xff] ^
1547             rk[6];
1548         t3 =
1549             Te0[(s3 >> 24)       ] ^
1550             Te1[(s0 >> 16) & 0xff] ^
1551             Te2[(s1 >>  8) & 0xff] ^
1552             Te3[(s2      ) & 0xff] ^
1553             rk[7];
1554 
1555         rk += 8;
1556         if (--r == 0) {
1557             break;
1558         }
1559 
1560         s0 =
1561             Te0[(t0 >> 24)       ] ^
1562             Te1[(t1 >> 16) & 0xff] ^
1563             Te2[(t2 >>  8) & 0xff] ^
1564             Te3[(t3      ) & 0xff] ^
1565             rk[0];
1566         s1 =
1567             Te0[(t1 >> 24)       ] ^
1568             Te1[(t2 >> 16) & 0xff] ^
1569             Te2[(t3 >>  8) & 0xff] ^
1570             Te3[(t0      ) & 0xff] ^
1571             rk[1];
1572         s2 =
1573             Te0[(t2 >> 24)       ] ^
1574             Te1[(t3 >> 16) & 0xff] ^
1575             Te2[(t0 >>  8) & 0xff] ^
1576             Te3[(t1      ) & 0xff] ^
1577             rk[2];
1578         s3 =
1579             Te0[(t3 >> 24)       ] ^
1580             Te1[(t0 >> 16) & 0xff] ^
1581             Te2[(t1 >>  8) & 0xff] ^
1582             Te3[(t2      ) & 0xff] ^
1583             rk[3];
1584     }
1585 #endif /* ?FULL_UNROLL */
1586     /*
1587      * apply last round and
1588      * map cipher state to byte array block:
1589      */
1590     s0 =
1591         (Te2[(t0 >> 24)       ] & 0xff000000) ^
1592         (Te3[(t1 >> 16) & 0xff] & 0x00ff0000) ^
1593         (Te0[(t2 >>  8) & 0xff] & 0x0000ff00) ^
1594         (Te1[(t3      ) & 0xff] & 0x000000ff) ^
1595         rk[0];
1596     PUTU32(out     , s0);
1597     s1 =
1598         (Te2[(t1 >> 24)       ] & 0xff000000) ^
1599         (Te3[(t2 >> 16) & 0xff] & 0x00ff0000) ^
1600         (Te0[(t3 >>  8) & 0xff] & 0x0000ff00) ^
1601         (Te1[(t0      ) & 0xff] & 0x000000ff) ^
1602         rk[1];
1603     PUTU32(out +  4, s1);
1604     s2 =
1605         (Te2[(t2 >> 24)       ] & 0xff000000) ^
1606         (Te3[(t3 >> 16) & 0xff] & 0x00ff0000) ^
1607         (Te0[(t0 >>  8) & 0xff] & 0x0000ff00) ^
1608         (Te1[(t1      ) & 0xff] & 0x000000ff) ^
1609         rk[2];
1610     PUTU32(out +  8, s2);
1611     s3 =
1612         (Te2[(t3 >> 24)       ] & 0xff000000) ^
1613         (Te3[(t0 >> 16) & 0xff] & 0x00ff0000) ^
1614         (Te0[(t1 >>  8) & 0xff] & 0x0000ff00) ^
1615         (Te1[(t2      ) & 0xff] & 0x000000ff) ^
1616         rk[3];
1617     PUTU32(out + 12, s3);
1618 }
1619 
1620 /*
1621  * Decrypt a single block
1622  * in and out can overlap
1623  */
AES_decrypt(const unsigned char * in,unsigned char * out,const AES_KEY * key)1624 void AES_decrypt(const unsigned char *in, unsigned char *out,
1625                  const AES_KEY *key)
1626 {
1627 
1628     const u32 *rk;
1629     u32 s0, s1, s2, s3, t0, t1, t2, t3;
1630 #ifndef FULL_UNROLL
1631     int r;
1632 #endif /* ?FULL_UNROLL */
1633 
1634     assert(in && out && key);
1635     rk = key->rd_key;
1636 
1637     /*
1638      * map byte array block to cipher state
1639      * and add initial round key:
1640      */
1641     s0 = GETU32(in     ) ^ rk[0];
1642     s1 = GETU32(in +  4) ^ rk[1];
1643     s2 = GETU32(in +  8) ^ rk[2];
1644     s3 = GETU32(in + 12) ^ rk[3];
1645 #ifdef FULL_UNROLL
1646     /* round 1: */
1647     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[ 4];
1648     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[ 5];
1649     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[ 6];
1650     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[ 7];
1651     /* round 2: */
1652     s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[ 8];
1653     s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[ 9];
1654     s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[10];
1655     s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[11];
1656     /* round 3: */
1657     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[12];
1658     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[13];
1659     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[14];
1660     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[15];
1661     /* round 4: */
1662     s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[16];
1663     s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[17];
1664     s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[18];
1665     s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[19];
1666     /* round 5: */
1667     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[20];
1668     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[21];
1669     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[22];
1670     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[23];
1671     /* round 6: */
1672     s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[24];
1673     s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[25];
1674     s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[26];
1675     s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[27];
1676     /* round 7: */
1677     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[28];
1678     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[29];
1679     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[30];
1680     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[31];
1681     /* round 8: */
1682     s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[32];
1683     s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[33];
1684     s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[34];
1685     s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[35];
1686     /* round 9: */
1687     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[36];
1688     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[37];
1689     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[38];
1690     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[39];
1691     if (key->rounds > 10) {
1692         /* round 10: */
1693         s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[40];
1694         s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[41];
1695         s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[42];
1696         s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[43];
1697         /* round 11: */
1698         t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[44];
1699         t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[45];
1700         t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[46];
1701         t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[47];
1702         if (key->rounds > 12) {
1703             /* round 12: */
1704             s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[48];
1705             s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[49];
1706             s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[50];
1707             s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[51];
1708             /* round 13: */
1709             t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[52];
1710             t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[53];
1711             t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[54];
1712             t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[55];
1713         }
1714     }
1715     rk += key->rounds << 2;
1716 #else  /* !FULL_UNROLL */
1717     /*
1718      * Nr - 1 full rounds:
1719      */
1720     r = key->rounds >> 1;
1721     for (;;) {
1722         t0 =
1723             Td0[(s0 >> 24)       ] ^
1724             Td1[(s3 >> 16) & 0xff] ^
1725             Td2[(s2 >>  8) & 0xff] ^
1726             Td3[(s1      ) & 0xff] ^
1727             rk[4];
1728         t1 =
1729             Td0[(s1 >> 24)       ] ^
1730             Td1[(s0 >> 16) & 0xff] ^
1731             Td2[(s3 >>  8) & 0xff] ^
1732             Td3[(s2      ) & 0xff] ^
1733             rk[5];
1734         t2 =
1735             Td0[(s2 >> 24)       ] ^
1736             Td1[(s1 >> 16) & 0xff] ^
1737             Td2[(s0 >>  8) & 0xff] ^
1738             Td3[(s3      ) & 0xff] ^
1739             rk[6];
1740         t3 =
1741             Td0[(s3 >> 24)       ] ^
1742             Td1[(s2 >> 16) & 0xff] ^
1743             Td2[(s1 >>  8) & 0xff] ^
1744             Td3[(s0      ) & 0xff] ^
1745             rk[7];
1746 
1747         rk += 8;
1748         if (--r == 0) {
1749             break;
1750         }
1751 
1752         s0 =
1753             Td0[(t0 >> 24)       ] ^
1754             Td1[(t3 >> 16) & 0xff] ^
1755             Td2[(t2 >>  8) & 0xff] ^
1756             Td3[(t1      ) & 0xff] ^
1757             rk[0];
1758         s1 =
1759             Td0[(t1 >> 24)       ] ^
1760             Td1[(t0 >> 16) & 0xff] ^
1761             Td2[(t3 >>  8) & 0xff] ^
1762             Td3[(t2      ) & 0xff] ^
1763             rk[1];
1764         s2 =
1765             Td0[(t2 >> 24)       ] ^
1766             Td1[(t1 >> 16) & 0xff] ^
1767             Td2[(t0 >>  8) & 0xff] ^
1768             Td3[(t3      ) & 0xff] ^
1769             rk[2];
1770         s3 =
1771             Td0[(t3 >> 24)       ] ^
1772             Td1[(t2 >> 16) & 0xff] ^
1773             Td2[(t1 >>  8) & 0xff] ^
1774             Td3[(t0      ) & 0xff] ^
1775             rk[3];
1776     }
1777 #endif /* ?FULL_UNROLL */
1778     /*
1779      * apply last round and
1780      * map cipher state to byte array block:
1781      */
1782     s0 =
1783         ((u32)Td4[(t0 >> 24)       ] << 24) ^
1784         ((u32)Td4[(t3 >> 16) & 0xff] << 16) ^
1785         ((u32)Td4[(t2 >>  8) & 0xff] <<  8) ^
1786         ((u32)Td4[(t1      ) & 0xff])       ^
1787         rk[0];
1788     PUTU32(out     , s0);
1789     s1 =
1790         ((u32)Td4[(t1 >> 24)       ] << 24) ^
1791         ((u32)Td4[(t0 >> 16) & 0xff] << 16) ^
1792         ((u32)Td4[(t3 >>  8) & 0xff] <<  8) ^
1793         ((u32)Td4[(t2      ) & 0xff])       ^
1794         rk[1];
1795     PUTU32(out +  4, s1);
1796     s2 =
1797         ((u32)Td4[(t2 >> 24)       ] << 24) ^
1798         ((u32)Td4[(t1 >> 16) & 0xff] << 16) ^
1799         ((u32)Td4[(t0 >>  8) & 0xff] <<  8) ^
1800         ((u32)Td4[(t3      ) & 0xff])       ^
1801         rk[2];
1802     PUTU32(out +  8, s2);
1803     s3 =
1804         ((u32)Td4[(t3 >> 24)       ] << 24) ^
1805         ((u32)Td4[(t2 >> 16) & 0xff] << 16) ^
1806         ((u32)Td4[(t1 >>  8) & 0xff] <<  8) ^
1807         ((u32)Td4[(t0      ) & 0xff])       ^
1808         rk[3];
1809     PUTU32(out + 12, s3);
1810 }
1811 
1812 #else /* AES_ASM */
1813 
1814 static const u8 Te4[256] = {
1815     0x63U, 0x7cU, 0x77U, 0x7bU, 0xf2U, 0x6bU, 0x6fU, 0xc5U,
1816     0x30U, 0x01U, 0x67U, 0x2bU, 0xfeU, 0xd7U, 0xabU, 0x76U,
1817     0xcaU, 0x82U, 0xc9U, 0x7dU, 0xfaU, 0x59U, 0x47U, 0xf0U,
1818     0xadU, 0xd4U, 0xa2U, 0xafU, 0x9cU, 0xa4U, 0x72U, 0xc0U,
1819     0xb7U, 0xfdU, 0x93U, 0x26U, 0x36U, 0x3fU, 0xf7U, 0xccU,
1820     0x34U, 0xa5U, 0xe5U, 0xf1U, 0x71U, 0xd8U, 0x31U, 0x15U,
1821     0x04U, 0xc7U, 0x23U, 0xc3U, 0x18U, 0x96U, 0x05U, 0x9aU,
1822     0x07U, 0x12U, 0x80U, 0xe2U, 0xebU, 0x27U, 0xb2U, 0x75U,
1823     0x09U, 0x83U, 0x2cU, 0x1aU, 0x1bU, 0x6eU, 0x5aU, 0xa0U,
1824     0x52U, 0x3bU, 0xd6U, 0xb3U, 0x29U, 0xe3U, 0x2fU, 0x84U,
1825     0x53U, 0xd1U, 0x00U, 0xedU, 0x20U, 0xfcU, 0xb1U, 0x5bU,
1826     0x6aU, 0xcbU, 0xbeU, 0x39U, 0x4aU, 0x4cU, 0x58U, 0xcfU,
1827     0xd0U, 0xefU, 0xaaU, 0xfbU, 0x43U, 0x4dU, 0x33U, 0x85U,
1828     0x45U, 0xf9U, 0x02U, 0x7fU, 0x50U, 0x3cU, 0x9fU, 0xa8U,
1829     0x51U, 0xa3U, 0x40U, 0x8fU, 0x92U, 0x9dU, 0x38U, 0xf5U,
1830     0xbcU, 0xb6U, 0xdaU, 0x21U, 0x10U, 0xffU, 0xf3U, 0xd2U,
1831     0xcdU, 0x0cU, 0x13U, 0xecU, 0x5fU, 0x97U, 0x44U, 0x17U,
1832     0xc4U, 0xa7U, 0x7eU, 0x3dU, 0x64U, 0x5dU, 0x19U, 0x73U,
1833     0x60U, 0x81U, 0x4fU, 0xdcU, 0x22U, 0x2aU, 0x90U, 0x88U,
1834     0x46U, 0xeeU, 0xb8U, 0x14U, 0xdeU, 0x5eU, 0x0bU, 0xdbU,
1835     0xe0U, 0x32U, 0x3aU, 0x0aU, 0x49U, 0x06U, 0x24U, 0x5cU,
1836     0xc2U, 0xd3U, 0xacU, 0x62U, 0x91U, 0x95U, 0xe4U, 0x79U,
1837     0xe7U, 0xc8U, 0x37U, 0x6dU, 0x8dU, 0xd5U, 0x4eU, 0xa9U,
1838     0x6cU, 0x56U, 0xf4U, 0xeaU, 0x65U, 0x7aU, 0xaeU, 0x08U,
1839     0xbaU, 0x78U, 0x25U, 0x2eU, 0x1cU, 0xa6U, 0xb4U, 0xc6U,
1840     0xe8U, 0xddU, 0x74U, 0x1fU, 0x4bU, 0xbdU, 0x8bU, 0x8aU,
1841     0x70U, 0x3eU, 0xb5U, 0x66U, 0x48U, 0x03U, 0xf6U, 0x0eU,
1842     0x61U, 0x35U, 0x57U, 0xb9U, 0x86U, 0xc1U, 0x1dU, 0x9eU,
1843     0xe1U, 0xf8U, 0x98U, 0x11U, 0x69U, 0xd9U, 0x8eU, 0x94U,
1844     0x9bU, 0x1eU, 0x87U, 0xe9U, 0xceU, 0x55U, 0x28U, 0xdfU,
1845     0x8cU, 0xa1U, 0x89U, 0x0dU, 0xbfU, 0xe6U, 0x42U, 0x68U,
1846     0x41U, 0x99U, 0x2dU, 0x0fU, 0xb0U, 0x54U, 0xbbU, 0x16U
1847 };
1848 static const u32 rcon[] = {
1849     0x01000000, 0x02000000, 0x04000000, 0x08000000,
1850     0x10000000, 0x20000000, 0x40000000, 0x80000000,
1851     0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
1852 };
1853 
1854 /**
1855  * Expand the cipher key into the encryption key schedule.
1856  */
AES_set_encrypt_key(const unsigned char * userKey,const int bits,AES_KEY * key)1857 int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
1858                         AES_KEY *key)
1859 {
1860     u32 *rk;
1861     int i = 0;
1862     u32 temp;
1863 
1864     if (!userKey || !key)
1865         return -1;
1866     if (bits != 128 && bits != 192 && bits != 256)
1867         return -2;
1868 
1869     rk = key->rd_key;
1870 
1871     if (bits == 128)
1872         key->rounds = 10;
1873     else if (bits == 192)
1874         key->rounds = 12;
1875     else
1876         key->rounds = 14;
1877 
1878     rk[0] = GETU32(userKey     );
1879     rk[1] = GETU32(userKey +  4);
1880     rk[2] = GETU32(userKey +  8);
1881     rk[3] = GETU32(userKey + 12);
1882     if (bits == 128) {
1883         while (1) {
1884             temp  = rk[3];
1885             rk[4] = rk[0] ^
1886                 ((u32)Te4[(temp >> 16) & 0xff] << 24) ^
1887                 ((u32)Te4[(temp >>  8) & 0xff] << 16) ^
1888                 ((u32)Te4[(temp      ) & 0xff] << 8) ^
1889                 ((u32)Te4[(temp >> 24)       ]) ^
1890                 rcon[i];
1891             rk[5] = rk[1] ^ rk[4];
1892             rk[6] = rk[2] ^ rk[5];
1893             rk[7] = rk[3] ^ rk[6];
1894             if (++i == 10) {
1895                 return 0;
1896             }
1897             rk += 4;
1898         }
1899     }
1900     rk[4] = GETU32(userKey + 16);
1901     rk[5] = GETU32(userKey + 20);
1902     if (bits == 192) {
1903         while (1) {
1904             temp = rk[ 5];
1905             rk[ 6] = rk[ 0] ^
1906                 ((u32)Te4[(temp >> 16) & 0xff] << 24) ^
1907                 ((u32)Te4[(temp >>  8) & 0xff] << 16) ^
1908                 ((u32)Te4[(temp      ) & 0xff] << 8) ^
1909                 ((u32)Te4[(temp >> 24)       ]) ^
1910                 rcon[i];
1911             rk[ 7] = rk[ 1] ^ rk[ 6];
1912             rk[ 8] = rk[ 2] ^ rk[ 7];
1913             rk[ 9] = rk[ 3] ^ rk[ 8];
1914             if (++i == 8) {
1915                 return 0;
1916             }
1917             rk[10] = rk[ 4] ^ rk[ 9];
1918             rk[11] = rk[ 5] ^ rk[10];
1919             rk += 6;
1920         }
1921     }
1922     rk[6] = GETU32(userKey + 24);
1923     rk[7] = GETU32(userKey + 28);
1924     if (bits == 256) {
1925         while (1) {
1926             temp = rk[ 7];
1927             rk[ 8] = rk[ 0] ^
1928                 ((u32)Te4[(temp >> 16) & 0xff] << 24) ^
1929                 ((u32)Te4[(temp >>  8) & 0xff] << 16) ^
1930                 ((u32)Te4[(temp      ) & 0xff] << 8) ^
1931                 ((u32)Te4[(temp >> 24)       ]) ^
1932                 rcon[i];
1933             rk[ 9] = rk[ 1] ^ rk[ 8];
1934             rk[10] = rk[ 2] ^ rk[ 9];
1935             rk[11] = rk[ 3] ^ rk[10];
1936             if (++i == 7) {
1937                 return 0;
1938             }
1939             temp = rk[11];
1940             rk[12] = rk[ 4] ^
1941                 ((u32)Te4[(temp >> 24)       ] << 24) ^
1942                 ((u32)Te4[(temp >> 16) & 0xff] << 16) ^
1943                 ((u32)Te4[(temp >>  8) & 0xff] << 8) ^
1944                 ((u32)Te4[(temp      ) & 0xff]);
1945             rk[13] = rk[ 5] ^ rk[12];
1946             rk[14] = rk[ 6] ^ rk[13];
1947             rk[15] = rk[ 7] ^ rk[14];
1948 
1949             rk += 8;
1950         }
1951     }
1952     return 0;
1953 }
1954 
1955 /**
1956  * Expand the cipher key into the decryption key schedule.
1957  */
AES_set_decrypt_key(const unsigned char * userKey,const int bits,AES_KEY * key)1958 int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
1959                         AES_KEY *key)
1960 {
1961 
1962     u32 *rk;
1963     int i, j, status;
1964     u32 temp;
1965 
1966     /* first, start with an encryption schedule */
1967     status = AES_set_encrypt_key(userKey, bits, key);
1968     if (status < 0)
1969         return status;
1970 
1971     rk = key->rd_key;
1972 
1973     /* invert the order of the round keys: */
1974     for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
1975         temp = rk[i    ]; rk[i    ] = rk[j    ]; rk[j    ] = temp;
1976         temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
1977         temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
1978         temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
1979     }
1980     /* apply the inverse MixColumn transform to all round keys but the first and the last: */
1981     for (i = 1; i < (key->rounds); i++) {
1982         rk += 4;
1983         for (j = 0; j < 4; j++) {
1984             u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
1985 
1986             tp1 = rk[j];
1987             m = tp1 & 0x80808080;
1988             tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
1989                 ((m - (m >> 7)) & 0x1b1b1b1b);
1990             m = tp2 & 0x80808080;
1991             tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
1992                 ((m - (m >> 7)) & 0x1b1b1b1b);
1993             m = tp4 & 0x80808080;
1994             tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
1995                 ((m - (m >> 7)) & 0x1b1b1b1b);
1996             tp9 = tp8 ^ tp1;
1997             tpb = tp9 ^ tp2;
1998             tpd = tp9 ^ tp4;
1999             tpe = tp8 ^ tp4 ^ tp2;
2000 #if defined(ROTATE)
2001             rk[j] = tpe ^ ROTATE(tpd,16) ^
2002                 ROTATE(tp9,24) ^ ROTATE(tpb,8);
2003 #else
2004             rk[j] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
2005                 (tp9 >> 8) ^ (tp9 << 24) ^
2006                 (tpb >> 24) ^ (tpb << 8);
2007 #endif
2008         }
2009     }
2010     return 0;
2011 }
2012 
2013 #endif /* AES_ASM */
2014