1 /*
2 * Copyright 2006-2016 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the Apache License 2.0 (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
8 */
9
10 /*
11 * This is experimental x86[_64] derivative. It assumes little-endian
12 * byte order and expects CPU to sustain unaligned memory references.
13 * It is used as playground for cache-time attack mitigations and
14 * serves as reference C implementation for x86[_64] as well as some
15 * other assembly modules.
16 */
17
18 /**
19 * rijndael-alg-fst.c
20 *
21 * @version 3.0 (December 2000)
22 *
23 * Optimised ANSI C code for the Rijndael cipher (now AES)
24 *
25 * @author Vincent Rijmen
26 * @author Antoon Bosselaers
27 * @author Paulo Barreto
28 *
29 * This code is hereby placed in the public domain.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
32 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
33 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
35 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
38 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
39 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
40 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
41 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42 */
43
44
45 #include <assert.h>
46
47 #include <stdlib.h>
48 #include <openssl/aes.h>
49 #include "aes_local.h"
50
51 /*
52 * These two parameters control which table, 256-byte or 2KB, is
53 * referenced in outer and respectively inner rounds.
54 */
55 #define AES_COMPACT_IN_OUTER_ROUNDS
56 #ifdef AES_COMPACT_IN_OUTER_ROUNDS
57 /* AES_COMPACT_IN_OUTER_ROUNDS costs ~30% in performance, while
58 * adding AES_COMPACT_IN_INNER_ROUNDS reduces benchmark *further*
59 * by factor of ~2. */
60 # undef AES_COMPACT_IN_INNER_ROUNDS
61 #endif
62
63 #if 1
prefetch256(const void * table)64 static void prefetch256(const void *table)
65 {
66 volatile unsigned long *t = (void *)table, ret;
67 unsigned long sum;
68 int i;
69
70 /* 32 is common least cache-line size */
71 for (sum = 0, i = 0; i < 256/sizeof(t[0]); i += 32/sizeof(t[0]))
72 sum ^= t[i];
73
74 ret = sum;
75 }
76 #else
77 # define prefetch256(t)
78 #endif
79
80 #undef GETU32
81 #define GETU32(p) (*((u32*)(p)))
82
83 #if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
84 #define U64(C) C##UI64
85 #elif defined(__arch64__)
86 #define U64(C) C##UL
87 #else
88 #define U64(C) C##ULL
89 #endif
90
91 #undef ROTATE
92 #if defined(_MSC_VER)
93 # define ROTATE(a,n) _lrotl(a,n)
94 #elif defined(__ICC)
95 # define ROTATE(a,n) _rotl(a,n)
96 #elif defined(__GNUC__) && __GNUC__>=2
97 # if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__)
98 # define ROTATE(a,n) ({ register unsigned int ret; \
99 asm ( \
100 "roll %1,%0" \
101 : "=r"(ret) \
102 : "I"(n), "0"(a) \
103 : "cc"); \
104 ret; \
105 })
106 # endif
107 #endif
108 /*-
109 Te [x] = S [x].[02, 01, 01, 03, 02, 01, 01, 03];
110 Te0[x] = S [x].[02, 01, 01, 03];
111 Te1[x] = S [x].[03, 02, 01, 01];
112 Te2[x] = S [x].[01, 03, 02, 01];
113 Te3[x] = S [x].[01, 01, 03, 02];
114 */
115 #define Te0 (u32)((u64*)((u8*)Te+0))
116 #define Te1 (u32)((u64*)((u8*)Te+3))
117 #define Te2 (u32)((u64*)((u8*)Te+2))
118 #define Te3 (u32)((u64*)((u8*)Te+1))
119 /*-
120 Td [x] = Si[x].[0e, 09, 0d, 0b, 0e, 09, 0d, 0b];
121 Td0[x] = Si[x].[0e, 09, 0d, 0b];
122 Td1[x] = Si[x].[0b, 0e, 09, 0d];
123 Td2[x] = Si[x].[0d, 0b, 0e, 09];
124 Td3[x] = Si[x].[09, 0d, 0b, 0e];
125 Td4[x] = Si[x].[01];
126 */
127 #define Td0 (u32)((u64*)((u8*)Td+0))
128 #define Td1 (u32)((u64*)((u8*)Td+3))
129 #define Td2 (u32)((u64*)((u8*)Td+2))
130 #define Td3 (u32)((u64*)((u8*)Td+1))
131
132 static const u64 Te[256] = {
133 U64(0xa56363c6a56363c6), U64(0x847c7cf8847c7cf8),
134 U64(0x997777ee997777ee), U64(0x8d7b7bf68d7b7bf6),
135 U64(0x0df2f2ff0df2f2ff), U64(0xbd6b6bd6bd6b6bd6),
136 U64(0xb16f6fdeb16f6fde), U64(0x54c5c59154c5c591),
137 U64(0x5030306050303060), U64(0x0301010203010102),
138 U64(0xa96767cea96767ce), U64(0x7d2b2b567d2b2b56),
139 U64(0x19fefee719fefee7), U64(0x62d7d7b562d7d7b5),
140 U64(0xe6abab4de6abab4d), U64(0x9a7676ec9a7676ec),
141 U64(0x45caca8f45caca8f), U64(0x9d82821f9d82821f),
142 U64(0x40c9c98940c9c989), U64(0x877d7dfa877d7dfa),
143 U64(0x15fafaef15fafaef), U64(0xeb5959b2eb5959b2),
144 U64(0xc947478ec947478e), U64(0x0bf0f0fb0bf0f0fb),
145 U64(0xecadad41ecadad41), U64(0x67d4d4b367d4d4b3),
146 U64(0xfda2a25ffda2a25f), U64(0xeaafaf45eaafaf45),
147 U64(0xbf9c9c23bf9c9c23), U64(0xf7a4a453f7a4a453),
148 U64(0x967272e4967272e4), U64(0x5bc0c09b5bc0c09b),
149 U64(0xc2b7b775c2b7b775), U64(0x1cfdfde11cfdfde1),
150 U64(0xae93933dae93933d), U64(0x6a26264c6a26264c),
151 U64(0x5a36366c5a36366c), U64(0x413f3f7e413f3f7e),
152 U64(0x02f7f7f502f7f7f5), U64(0x4fcccc834fcccc83),
153 U64(0x5c3434685c343468), U64(0xf4a5a551f4a5a551),
154 U64(0x34e5e5d134e5e5d1), U64(0x08f1f1f908f1f1f9),
155 U64(0x937171e2937171e2), U64(0x73d8d8ab73d8d8ab),
156 U64(0x5331316253313162), U64(0x3f15152a3f15152a),
157 U64(0x0c0404080c040408), U64(0x52c7c79552c7c795),
158 U64(0x6523234665232346), U64(0x5ec3c39d5ec3c39d),
159 U64(0x2818183028181830), U64(0xa1969637a1969637),
160 U64(0x0f05050a0f05050a), U64(0xb59a9a2fb59a9a2f),
161 U64(0x0907070e0907070e), U64(0x3612122436121224),
162 U64(0x9b80801b9b80801b), U64(0x3de2e2df3de2e2df),
163 U64(0x26ebebcd26ebebcd), U64(0x6927274e6927274e),
164 U64(0xcdb2b27fcdb2b27f), U64(0x9f7575ea9f7575ea),
165 U64(0x1b0909121b090912), U64(0x9e83831d9e83831d),
166 U64(0x742c2c58742c2c58), U64(0x2e1a1a342e1a1a34),
167 U64(0x2d1b1b362d1b1b36), U64(0xb26e6edcb26e6edc),
168 U64(0xee5a5ab4ee5a5ab4), U64(0xfba0a05bfba0a05b),
169 U64(0xf65252a4f65252a4), U64(0x4d3b3b764d3b3b76),
170 U64(0x61d6d6b761d6d6b7), U64(0xceb3b37dceb3b37d),
171 U64(0x7b2929527b292952), U64(0x3ee3e3dd3ee3e3dd),
172 U64(0x712f2f5e712f2f5e), U64(0x9784841397848413),
173 U64(0xf55353a6f55353a6), U64(0x68d1d1b968d1d1b9),
174 U64(0x0000000000000000), U64(0x2cededc12cededc1),
175 U64(0x6020204060202040), U64(0x1ffcfce31ffcfce3),
176 U64(0xc8b1b179c8b1b179), U64(0xed5b5bb6ed5b5bb6),
177 U64(0xbe6a6ad4be6a6ad4), U64(0x46cbcb8d46cbcb8d),
178 U64(0xd9bebe67d9bebe67), U64(0x4b3939724b393972),
179 U64(0xde4a4a94de4a4a94), U64(0xd44c4c98d44c4c98),
180 U64(0xe85858b0e85858b0), U64(0x4acfcf854acfcf85),
181 U64(0x6bd0d0bb6bd0d0bb), U64(0x2aefefc52aefefc5),
182 U64(0xe5aaaa4fe5aaaa4f), U64(0x16fbfbed16fbfbed),
183 U64(0xc5434386c5434386), U64(0xd74d4d9ad74d4d9a),
184 U64(0x5533336655333366), U64(0x9485851194858511),
185 U64(0xcf45458acf45458a), U64(0x10f9f9e910f9f9e9),
186 U64(0x0602020406020204), U64(0x817f7ffe817f7ffe),
187 U64(0xf05050a0f05050a0), U64(0x443c3c78443c3c78),
188 U64(0xba9f9f25ba9f9f25), U64(0xe3a8a84be3a8a84b),
189 U64(0xf35151a2f35151a2), U64(0xfea3a35dfea3a35d),
190 U64(0xc0404080c0404080), U64(0x8a8f8f058a8f8f05),
191 U64(0xad92923fad92923f), U64(0xbc9d9d21bc9d9d21),
192 U64(0x4838387048383870), U64(0x04f5f5f104f5f5f1),
193 U64(0xdfbcbc63dfbcbc63), U64(0xc1b6b677c1b6b677),
194 U64(0x75dadaaf75dadaaf), U64(0x6321214263212142),
195 U64(0x3010102030101020), U64(0x1affffe51affffe5),
196 U64(0x0ef3f3fd0ef3f3fd), U64(0x6dd2d2bf6dd2d2bf),
197 U64(0x4ccdcd814ccdcd81), U64(0x140c0c18140c0c18),
198 U64(0x3513132635131326), U64(0x2fececc32fececc3),
199 U64(0xe15f5fbee15f5fbe), U64(0xa2979735a2979735),
200 U64(0xcc444488cc444488), U64(0x3917172e3917172e),
201 U64(0x57c4c49357c4c493), U64(0xf2a7a755f2a7a755),
202 U64(0x827e7efc827e7efc), U64(0x473d3d7a473d3d7a),
203 U64(0xac6464c8ac6464c8), U64(0xe75d5dbae75d5dba),
204 U64(0x2b1919322b191932), U64(0x957373e6957373e6),
205 U64(0xa06060c0a06060c0), U64(0x9881811998818119),
206 U64(0xd14f4f9ed14f4f9e), U64(0x7fdcdca37fdcdca3),
207 U64(0x6622224466222244), U64(0x7e2a2a547e2a2a54),
208 U64(0xab90903bab90903b), U64(0x8388880b8388880b),
209 U64(0xca46468cca46468c), U64(0x29eeeec729eeeec7),
210 U64(0xd3b8b86bd3b8b86b), U64(0x3c1414283c141428),
211 U64(0x79dedea779dedea7), U64(0xe25e5ebce25e5ebc),
212 U64(0x1d0b0b161d0b0b16), U64(0x76dbdbad76dbdbad),
213 U64(0x3be0e0db3be0e0db), U64(0x5632326456323264),
214 U64(0x4e3a3a744e3a3a74), U64(0x1e0a0a141e0a0a14),
215 U64(0xdb494992db494992), U64(0x0a06060c0a06060c),
216 U64(0x6c2424486c242448), U64(0xe45c5cb8e45c5cb8),
217 U64(0x5dc2c29f5dc2c29f), U64(0x6ed3d3bd6ed3d3bd),
218 U64(0xefacac43efacac43), U64(0xa66262c4a66262c4),
219 U64(0xa8919139a8919139), U64(0xa4959531a4959531),
220 U64(0x37e4e4d337e4e4d3), U64(0x8b7979f28b7979f2),
221 U64(0x32e7e7d532e7e7d5), U64(0x43c8c88b43c8c88b),
222 U64(0x5937376e5937376e), U64(0xb76d6ddab76d6dda),
223 U64(0x8c8d8d018c8d8d01), U64(0x64d5d5b164d5d5b1),
224 U64(0xd24e4e9cd24e4e9c), U64(0xe0a9a949e0a9a949),
225 U64(0xb46c6cd8b46c6cd8), U64(0xfa5656acfa5656ac),
226 U64(0x07f4f4f307f4f4f3), U64(0x25eaeacf25eaeacf),
227 U64(0xaf6565caaf6565ca), U64(0x8e7a7af48e7a7af4),
228 U64(0xe9aeae47e9aeae47), U64(0x1808081018080810),
229 U64(0xd5baba6fd5baba6f), U64(0x887878f0887878f0),
230 U64(0x6f25254a6f25254a), U64(0x722e2e5c722e2e5c),
231 U64(0x241c1c38241c1c38), U64(0xf1a6a657f1a6a657),
232 U64(0xc7b4b473c7b4b473), U64(0x51c6c69751c6c697),
233 U64(0x23e8e8cb23e8e8cb), U64(0x7cdddda17cdddda1),
234 U64(0x9c7474e89c7474e8), U64(0x211f1f3e211f1f3e),
235 U64(0xdd4b4b96dd4b4b96), U64(0xdcbdbd61dcbdbd61),
236 U64(0x868b8b0d868b8b0d), U64(0x858a8a0f858a8a0f),
237 U64(0x907070e0907070e0), U64(0x423e3e7c423e3e7c),
238 U64(0xc4b5b571c4b5b571), U64(0xaa6666ccaa6666cc),
239 U64(0xd8484890d8484890), U64(0x0503030605030306),
240 U64(0x01f6f6f701f6f6f7), U64(0x120e0e1c120e0e1c),
241 U64(0xa36161c2a36161c2), U64(0x5f35356a5f35356a),
242 U64(0xf95757aef95757ae), U64(0xd0b9b969d0b9b969),
243 U64(0x9186861791868617), U64(0x58c1c19958c1c199),
244 U64(0x271d1d3a271d1d3a), U64(0xb99e9e27b99e9e27),
245 U64(0x38e1e1d938e1e1d9), U64(0x13f8f8eb13f8f8eb),
246 U64(0xb398982bb398982b), U64(0x3311112233111122),
247 U64(0xbb6969d2bb6969d2), U64(0x70d9d9a970d9d9a9),
248 U64(0x898e8e07898e8e07), U64(0xa7949433a7949433),
249 U64(0xb69b9b2db69b9b2d), U64(0x221e1e3c221e1e3c),
250 U64(0x9287871592878715), U64(0x20e9e9c920e9e9c9),
251 U64(0x49cece8749cece87), U64(0xff5555aaff5555aa),
252 U64(0x7828285078282850), U64(0x7adfdfa57adfdfa5),
253 U64(0x8f8c8c038f8c8c03), U64(0xf8a1a159f8a1a159),
254 U64(0x8089890980898909), U64(0x170d0d1a170d0d1a),
255 U64(0xdabfbf65dabfbf65), U64(0x31e6e6d731e6e6d7),
256 U64(0xc6424284c6424284), U64(0xb86868d0b86868d0),
257 U64(0xc3414182c3414182), U64(0xb0999929b0999929),
258 U64(0x772d2d5a772d2d5a), U64(0x110f0f1e110f0f1e),
259 U64(0xcbb0b07bcbb0b07b), U64(0xfc5454a8fc5454a8),
260 U64(0xd6bbbb6dd6bbbb6d), U64(0x3a16162c3a16162c)
261 };
262
263 static const u8 Te4[256] = {
264 0x63U, 0x7cU, 0x77U, 0x7bU, 0xf2U, 0x6bU, 0x6fU, 0xc5U,
265 0x30U, 0x01U, 0x67U, 0x2bU, 0xfeU, 0xd7U, 0xabU, 0x76U,
266 0xcaU, 0x82U, 0xc9U, 0x7dU, 0xfaU, 0x59U, 0x47U, 0xf0U,
267 0xadU, 0xd4U, 0xa2U, 0xafU, 0x9cU, 0xa4U, 0x72U, 0xc0U,
268 0xb7U, 0xfdU, 0x93U, 0x26U, 0x36U, 0x3fU, 0xf7U, 0xccU,
269 0x34U, 0xa5U, 0xe5U, 0xf1U, 0x71U, 0xd8U, 0x31U, 0x15U,
270 0x04U, 0xc7U, 0x23U, 0xc3U, 0x18U, 0x96U, 0x05U, 0x9aU,
271 0x07U, 0x12U, 0x80U, 0xe2U, 0xebU, 0x27U, 0xb2U, 0x75U,
272 0x09U, 0x83U, 0x2cU, 0x1aU, 0x1bU, 0x6eU, 0x5aU, 0xa0U,
273 0x52U, 0x3bU, 0xd6U, 0xb3U, 0x29U, 0xe3U, 0x2fU, 0x84U,
274 0x53U, 0xd1U, 0x00U, 0xedU, 0x20U, 0xfcU, 0xb1U, 0x5bU,
275 0x6aU, 0xcbU, 0xbeU, 0x39U, 0x4aU, 0x4cU, 0x58U, 0xcfU,
276 0xd0U, 0xefU, 0xaaU, 0xfbU, 0x43U, 0x4dU, 0x33U, 0x85U,
277 0x45U, 0xf9U, 0x02U, 0x7fU, 0x50U, 0x3cU, 0x9fU, 0xa8U,
278 0x51U, 0xa3U, 0x40U, 0x8fU, 0x92U, 0x9dU, 0x38U, 0xf5U,
279 0xbcU, 0xb6U, 0xdaU, 0x21U, 0x10U, 0xffU, 0xf3U, 0xd2U,
280 0xcdU, 0x0cU, 0x13U, 0xecU, 0x5fU, 0x97U, 0x44U, 0x17U,
281 0xc4U, 0xa7U, 0x7eU, 0x3dU, 0x64U, 0x5dU, 0x19U, 0x73U,
282 0x60U, 0x81U, 0x4fU, 0xdcU, 0x22U, 0x2aU, 0x90U, 0x88U,
283 0x46U, 0xeeU, 0xb8U, 0x14U, 0xdeU, 0x5eU, 0x0bU, 0xdbU,
284 0xe0U, 0x32U, 0x3aU, 0x0aU, 0x49U, 0x06U, 0x24U, 0x5cU,
285 0xc2U, 0xd3U, 0xacU, 0x62U, 0x91U, 0x95U, 0xe4U, 0x79U,
286 0xe7U, 0xc8U, 0x37U, 0x6dU, 0x8dU, 0xd5U, 0x4eU, 0xa9U,
287 0x6cU, 0x56U, 0xf4U, 0xeaU, 0x65U, 0x7aU, 0xaeU, 0x08U,
288 0xbaU, 0x78U, 0x25U, 0x2eU, 0x1cU, 0xa6U, 0xb4U, 0xc6U,
289 0xe8U, 0xddU, 0x74U, 0x1fU, 0x4bU, 0xbdU, 0x8bU, 0x8aU,
290 0x70U, 0x3eU, 0xb5U, 0x66U, 0x48U, 0x03U, 0xf6U, 0x0eU,
291 0x61U, 0x35U, 0x57U, 0xb9U, 0x86U, 0xc1U, 0x1dU, 0x9eU,
292 0xe1U, 0xf8U, 0x98U, 0x11U, 0x69U, 0xd9U, 0x8eU, 0x94U,
293 0x9bU, 0x1eU, 0x87U, 0xe9U, 0xceU, 0x55U, 0x28U, 0xdfU,
294 0x8cU, 0xa1U, 0x89U, 0x0dU, 0xbfU, 0xe6U, 0x42U, 0x68U,
295 0x41U, 0x99U, 0x2dU, 0x0fU, 0xb0U, 0x54U, 0xbbU, 0x16U
296 };
297
298 static const u64 Td[256] = {
299 U64(0x50a7f45150a7f451), U64(0x5365417e5365417e),
300 U64(0xc3a4171ac3a4171a), U64(0x965e273a965e273a),
301 U64(0xcb6bab3bcb6bab3b), U64(0xf1459d1ff1459d1f),
302 U64(0xab58faacab58faac), U64(0x9303e34b9303e34b),
303 U64(0x55fa302055fa3020), U64(0xf66d76adf66d76ad),
304 U64(0x9176cc889176cc88), U64(0x254c02f5254c02f5),
305 U64(0xfcd7e54ffcd7e54f), U64(0xd7cb2ac5d7cb2ac5),
306 U64(0x8044352680443526), U64(0x8fa362b58fa362b5),
307 U64(0x495ab1de495ab1de), U64(0x671bba25671bba25),
308 U64(0x980eea45980eea45), U64(0xe1c0fe5de1c0fe5d),
309 U64(0x02752fc302752fc3), U64(0x12f04c8112f04c81),
310 U64(0xa397468da397468d), U64(0xc6f9d36bc6f9d36b),
311 U64(0xe75f8f03e75f8f03), U64(0x959c9215959c9215),
312 U64(0xeb7a6dbfeb7a6dbf), U64(0xda595295da595295),
313 U64(0x2d83bed42d83bed4), U64(0xd3217458d3217458),
314 U64(0x2969e0492969e049), U64(0x44c8c98e44c8c98e),
315 U64(0x6a89c2756a89c275), U64(0x78798ef478798ef4),
316 U64(0x6b3e58996b3e5899), U64(0xdd71b927dd71b927),
317 U64(0xb64fe1beb64fe1be), U64(0x17ad88f017ad88f0),
318 U64(0x66ac20c966ac20c9), U64(0xb43ace7db43ace7d),
319 U64(0x184adf63184adf63), U64(0x82311ae582311ae5),
320 U64(0x6033519760335197), U64(0x457f5362457f5362),
321 U64(0xe07764b1e07764b1), U64(0x84ae6bbb84ae6bbb),
322 U64(0x1ca081fe1ca081fe), U64(0x942b08f9942b08f9),
323 U64(0x5868487058684870), U64(0x19fd458f19fd458f),
324 U64(0x876cde94876cde94), U64(0xb7f87b52b7f87b52),
325 U64(0x23d373ab23d373ab), U64(0xe2024b72e2024b72),
326 U64(0x578f1fe3578f1fe3), U64(0x2aab55662aab5566),
327 U64(0x0728ebb20728ebb2), U64(0x03c2b52f03c2b52f),
328 U64(0x9a7bc5869a7bc586), U64(0xa50837d3a50837d3),
329 U64(0xf2872830f2872830), U64(0xb2a5bf23b2a5bf23),
330 U64(0xba6a0302ba6a0302), U64(0x5c8216ed5c8216ed),
331 U64(0x2b1ccf8a2b1ccf8a), U64(0x92b479a792b479a7),
332 U64(0xf0f207f3f0f207f3), U64(0xa1e2694ea1e2694e),
333 U64(0xcdf4da65cdf4da65), U64(0xd5be0506d5be0506),
334 U64(0x1f6234d11f6234d1), U64(0x8afea6c48afea6c4),
335 U64(0x9d532e349d532e34), U64(0xa055f3a2a055f3a2),
336 U64(0x32e18a0532e18a05), U64(0x75ebf6a475ebf6a4),
337 U64(0x39ec830b39ec830b), U64(0xaaef6040aaef6040),
338 U64(0x069f715e069f715e), U64(0x51106ebd51106ebd),
339 U64(0xf98a213ef98a213e), U64(0x3d06dd963d06dd96),
340 U64(0xae053eddae053edd), U64(0x46bde64d46bde64d),
341 U64(0xb58d5491b58d5491), U64(0x055dc471055dc471),
342 U64(0x6fd406046fd40604), U64(0xff155060ff155060),
343 U64(0x24fb981924fb9819), U64(0x97e9bdd697e9bdd6),
344 U64(0xcc434089cc434089), U64(0x779ed967779ed967),
345 U64(0xbd42e8b0bd42e8b0), U64(0x888b8907888b8907),
346 U64(0x385b19e7385b19e7), U64(0xdbeec879dbeec879),
347 U64(0x470a7ca1470a7ca1), U64(0xe90f427ce90f427c),
348 U64(0xc91e84f8c91e84f8), U64(0x0000000000000000),
349 U64(0x8386800983868009), U64(0x48ed2b3248ed2b32),
350 U64(0xac70111eac70111e), U64(0x4e725a6c4e725a6c),
351 U64(0xfbff0efdfbff0efd), U64(0x5638850f5638850f),
352 U64(0x1ed5ae3d1ed5ae3d), U64(0x27392d3627392d36),
353 U64(0x64d90f0a64d90f0a), U64(0x21a65c6821a65c68),
354 U64(0xd1545b9bd1545b9b), U64(0x3a2e36243a2e3624),
355 U64(0xb1670a0cb1670a0c), U64(0x0fe757930fe75793),
356 U64(0xd296eeb4d296eeb4), U64(0x9e919b1b9e919b1b),
357 U64(0x4fc5c0804fc5c080), U64(0xa220dc61a220dc61),
358 U64(0x694b775a694b775a), U64(0x161a121c161a121c),
359 U64(0x0aba93e20aba93e2), U64(0xe52aa0c0e52aa0c0),
360 U64(0x43e0223c43e0223c), U64(0x1d171b121d171b12),
361 U64(0x0b0d090e0b0d090e), U64(0xadc78bf2adc78bf2),
362 U64(0xb9a8b62db9a8b62d), U64(0xc8a91e14c8a91e14),
363 U64(0x8519f1578519f157), U64(0x4c0775af4c0775af),
364 U64(0xbbdd99eebbdd99ee), U64(0xfd607fa3fd607fa3),
365 U64(0x9f2601f79f2601f7), U64(0xbcf5725cbcf5725c),
366 U64(0xc53b6644c53b6644), U64(0x347efb5b347efb5b),
367 U64(0x7629438b7629438b), U64(0xdcc623cbdcc623cb),
368 U64(0x68fcedb668fcedb6), U64(0x63f1e4b863f1e4b8),
369 U64(0xcadc31d7cadc31d7), U64(0x1085634210856342),
370 U64(0x4022971340229713), U64(0x2011c6842011c684),
371 U64(0x7d244a857d244a85), U64(0xf83dbbd2f83dbbd2),
372 U64(0x1132f9ae1132f9ae), U64(0x6da129c76da129c7),
373 U64(0x4b2f9e1d4b2f9e1d), U64(0xf330b2dcf330b2dc),
374 U64(0xec52860dec52860d), U64(0xd0e3c177d0e3c177),
375 U64(0x6c16b32b6c16b32b), U64(0x99b970a999b970a9),
376 U64(0xfa489411fa489411), U64(0x2264e9472264e947),
377 U64(0xc48cfca8c48cfca8), U64(0x1a3ff0a01a3ff0a0),
378 U64(0xd82c7d56d82c7d56), U64(0xef903322ef903322),
379 U64(0xc74e4987c74e4987), U64(0xc1d138d9c1d138d9),
380 U64(0xfea2ca8cfea2ca8c), U64(0x360bd498360bd498),
381 U64(0xcf81f5a6cf81f5a6), U64(0x28de7aa528de7aa5),
382 U64(0x268eb7da268eb7da), U64(0xa4bfad3fa4bfad3f),
383 U64(0xe49d3a2ce49d3a2c), U64(0x0d9278500d927850),
384 U64(0x9bcc5f6a9bcc5f6a), U64(0x62467e5462467e54),
385 U64(0xc2138df6c2138df6), U64(0xe8b8d890e8b8d890),
386 U64(0x5ef7392e5ef7392e), U64(0xf5afc382f5afc382),
387 U64(0xbe805d9fbe805d9f), U64(0x7c93d0697c93d069),
388 U64(0xa92dd56fa92dd56f), U64(0xb31225cfb31225cf),
389 U64(0x3b99acc83b99acc8), U64(0xa77d1810a77d1810),
390 U64(0x6e639ce86e639ce8), U64(0x7bbb3bdb7bbb3bdb),
391 U64(0x097826cd097826cd), U64(0xf418596ef418596e),
392 U64(0x01b79aec01b79aec), U64(0xa89a4f83a89a4f83),
393 U64(0x656e95e6656e95e6), U64(0x7ee6ffaa7ee6ffaa),
394 U64(0x08cfbc2108cfbc21), U64(0xe6e815efe6e815ef),
395 U64(0xd99be7bad99be7ba), U64(0xce366f4ace366f4a),
396 U64(0xd4099fead4099fea), U64(0xd67cb029d67cb029),
397 U64(0xafb2a431afb2a431), U64(0x31233f2a31233f2a),
398 U64(0x3094a5c63094a5c6), U64(0xc066a235c066a235),
399 U64(0x37bc4e7437bc4e74), U64(0xa6ca82fca6ca82fc),
400 U64(0xb0d090e0b0d090e0), U64(0x15d8a73315d8a733),
401 U64(0x4a9804f14a9804f1), U64(0xf7daec41f7daec41),
402 U64(0x0e50cd7f0e50cd7f), U64(0x2ff691172ff69117),
403 U64(0x8dd64d768dd64d76), U64(0x4db0ef434db0ef43),
404 U64(0x544daacc544daacc), U64(0xdf0496e4df0496e4),
405 U64(0xe3b5d19ee3b5d19e), U64(0x1b886a4c1b886a4c),
406 U64(0xb81f2cc1b81f2cc1), U64(0x7f5165467f516546),
407 U64(0x04ea5e9d04ea5e9d), U64(0x5d358c015d358c01),
408 U64(0x737487fa737487fa), U64(0x2e410bfb2e410bfb),
409 U64(0x5a1d67b35a1d67b3), U64(0x52d2db9252d2db92),
410 U64(0x335610e9335610e9), U64(0x1347d66d1347d66d),
411 U64(0x8c61d79a8c61d79a), U64(0x7a0ca1377a0ca137),
412 U64(0x8e14f8598e14f859), U64(0x893c13eb893c13eb),
413 U64(0xee27a9ceee27a9ce), U64(0x35c961b735c961b7),
414 U64(0xede51ce1ede51ce1), U64(0x3cb1477a3cb1477a),
415 U64(0x59dfd29c59dfd29c), U64(0x3f73f2553f73f255),
416 U64(0x79ce141879ce1418), U64(0xbf37c773bf37c773),
417 U64(0xeacdf753eacdf753), U64(0x5baafd5f5baafd5f),
418 U64(0x146f3ddf146f3ddf), U64(0x86db447886db4478),
419 U64(0x81f3afca81f3afca), U64(0x3ec468b93ec468b9),
420 U64(0x2c3424382c342438), U64(0x5f40a3c25f40a3c2),
421 U64(0x72c31d1672c31d16), U64(0x0c25e2bc0c25e2bc),
422 U64(0x8b493c288b493c28), U64(0x41950dff41950dff),
423 U64(0x7101a8397101a839), U64(0xdeb30c08deb30c08),
424 U64(0x9ce4b4d89ce4b4d8), U64(0x90c1566490c15664),
425 U64(0x6184cb7b6184cb7b), U64(0x70b632d570b632d5),
426 U64(0x745c6c48745c6c48), U64(0x4257b8d04257b8d0)
427 };
428 static const u8 Td4[256] = {
429 0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U,
430 0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU,
431 0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U,
432 0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU,
433 0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU,
434 0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU,
435 0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U,
436 0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U,
437 0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U,
438 0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U,
439 0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU,
440 0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U,
441 0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU,
442 0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U,
443 0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U,
444 0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU,
445 0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU,
446 0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U,
447 0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U,
448 0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU,
449 0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U,
450 0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU,
451 0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U,
452 0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U,
453 0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U,
454 0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU,
455 0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU,
456 0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU,
457 0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U,
458 0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U,
459 0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U,
460 0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU
461 };
462
463 static const u32 rcon[] = {
464 0x00000001U, 0x00000002U, 0x00000004U, 0x00000008U,
465 0x00000010U, 0x00000020U, 0x00000040U, 0x00000080U,
466 0x0000001bU, 0x00000036U, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
467 };
468
469 /**
470 * Expand the cipher key into the encryption key schedule.
471 */
AES_set_encrypt_key(const unsigned char * userKey,const int bits,AES_KEY * key)472 int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
473 AES_KEY *key)
474 {
475
476 u32 *rk;
477 int i = 0;
478 u32 temp;
479
480 if (!userKey || !key)
481 return -1;
482 if (bits != 128 && bits != 192 && bits != 256)
483 return -2;
484
485 rk = key->rd_key;
486
487 if (bits==128)
488 key->rounds = 10;
489 else if (bits==192)
490 key->rounds = 12;
491 else
492 key->rounds = 14;
493
494 rk[0] = GETU32(userKey );
495 rk[1] = GETU32(userKey + 4);
496 rk[2] = GETU32(userKey + 8);
497 rk[3] = GETU32(userKey + 12);
498 if (bits == 128) {
499 while (1) {
500 temp = rk[3];
501 rk[4] = rk[0] ^
502 ((u32)Te4[(temp >> 8) & 0xff] ) ^
503 ((u32)Te4[(temp >> 16) & 0xff] << 8) ^
504 ((u32)Te4[(temp >> 24) ] << 16) ^
505 ((u32)Te4[(temp ) & 0xff] << 24) ^
506 rcon[i];
507 rk[5] = rk[1] ^ rk[4];
508 rk[6] = rk[2] ^ rk[5];
509 rk[7] = rk[3] ^ rk[6];
510 if (++i == 10) {
511 return 0;
512 }
513 rk += 4;
514 }
515 }
516 rk[4] = GETU32(userKey + 16);
517 rk[5] = GETU32(userKey + 20);
518 if (bits == 192) {
519 while (1) {
520 temp = rk[ 5];
521 rk[ 6] = rk[ 0] ^
522 ((u32)Te4[(temp >> 8) & 0xff] ) ^
523 ((u32)Te4[(temp >> 16) & 0xff] << 8) ^
524 ((u32)Te4[(temp >> 24) ] << 16) ^
525 ((u32)Te4[(temp ) & 0xff] << 24) ^
526 rcon[i];
527 rk[ 7] = rk[ 1] ^ rk[ 6];
528 rk[ 8] = rk[ 2] ^ rk[ 7];
529 rk[ 9] = rk[ 3] ^ rk[ 8];
530 if (++i == 8) {
531 return 0;
532 }
533 rk[10] = rk[ 4] ^ rk[ 9];
534 rk[11] = rk[ 5] ^ rk[10];
535 rk += 6;
536 }
537 }
538 rk[6] = GETU32(userKey + 24);
539 rk[7] = GETU32(userKey + 28);
540 if (bits == 256) {
541 while (1) {
542 temp = rk[ 7];
543 rk[ 8] = rk[ 0] ^
544 ((u32)Te4[(temp >> 8) & 0xff] ) ^
545 ((u32)Te4[(temp >> 16) & 0xff] << 8) ^
546 ((u32)Te4[(temp >> 24) ] << 16) ^
547 ((u32)Te4[(temp ) & 0xff] << 24) ^
548 rcon[i];
549 rk[ 9] = rk[ 1] ^ rk[ 8];
550 rk[10] = rk[ 2] ^ rk[ 9];
551 rk[11] = rk[ 3] ^ rk[10];
552 if (++i == 7) {
553 return 0;
554 }
555 temp = rk[11];
556 rk[12] = rk[ 4] ^
557 ((u32)Te4[(temp ) & 0xff] ) ^
558 ((u32)Te4[(temp >> 8) & 0xff] << 8) ^
559 ((u32)Te4[(temp >> 16) & 0xff] << 16) ^
560 ((u32)Te4[(temp >> 24) ] << 24);
561 rk[13] = rk[ 5] ^ rk[12];
562 rk[14] = rk[ 6] ^ rk[13];
563 rk[15] = rk[ 7] ^ rk[14];
564
565 rk += 8;
566 }
567 }
568 return 0;
569 }
570
571 /**
572 * Expand the cipher key into the decryption key schedule.
573 */
AES_set_decrypt_key(const unsigned char * userKey,const int bits,AES_KEY * key)574 int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
575 AES_KEY *key)
576 {
577
578 u32 *rk;
579 int i, j, status;
580 u32 temp;
581
582 /* first, start with an encryption schedule */
583 status = AES_set_encrypt_key(userKey, bits, key);
584 if (status < 0)
585 return status;
586
587 rk = key->rd_key;
588
589 /* invert the order of the round keys: */
590 for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
591 temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp;
592 temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
593 temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
594 temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
595 }
596 /* apply the inverse MixColumn transform to all round keys but the first and the last: */
597 for (i = 1; i < (key->rounds); i++) {
598 rk += 4;
599 #if 1
600 for (j = 0; j < 4; j++) {
601 u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
602
603 tp1 = rk[j];
604 m = tp1 & 0x80808080;
605 tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
606 ((m - (m >> 7)) & 0x1b1b1b1b);
607 m = tp2 & 0x80808080;
608 tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
609 ((m - (m >> 7)) & 0x1b1b1b1b);
610 m = tp4 & 0x80808080;
611 tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
612 ((m - (m >> 7)) & 0x1b1b1b1b);
613 tp9 = tp8 ^ tp1;
614 tpb = tp9 ^ tp2;
615 tpd = tp9 ^ tp4;
616 tpe = tp8 ^ tp4 ^ tp2;
617 #if defined(ROTATE)
618 rk[j] = tpe ^ ROTATE(tpd,16) ^
619 ROTATE(tp9,8) ^ ROTATE(tpb,24);
620 #else
621 rk[j] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
622 (tp9 >> 24) ^ (tp9 << 8) ^
623 (tpb >> 8) ^ (tpb << 24);
624 #endif
625 }
626 #else
627 rk[0] =
628 Td0[Te2[(rk[0] ) & 0xff] & 0xff] ^
629 Td1[Te2[(rk[0] >> 8) & 0xff] & 0xff] ^
630 Td2[Te2[(rk[0] >> 16) & 0xff] & 0xff] ^
631 Td3[Te2[(rk[0] >> 24) ] & 0xff];
632 rk[1] =
633 Td0[Te2[(rk[1] ) & 0xff] & 0xff] ^
634 Td1[Te2[(rk[1] >> 8) & 0xff] & 0xff] ^
635 Td2[Te2[(rk[1] >> 16) & 0xff] & 0xff] ^
636 Td3[Te2[(rk[1] >> 24) ] & 0xff];
637 rk[2] =
638 Td0[Te2[(rk[2] ) & 0xff] & 0xff] ^
639 Td1[Te2[(rk[2] >> 8) & 0xff] & 0xff] ^
640 Td2[Te2[(rk[2] >> 16) & 0xff] & 0xff] ^
641 Td3[Te2[(rk[2] >> 24) ] & 0xff];
642 rk[3] =
643 Td0[Te2[(rk[3] ) & 0xff] & 0xff] ^
644 Td1[Te2[(rk[3] >> 8) & 0xff] & 0xff] ^
645 Td2[Te2[(rk[3] >> 16) & 0xff] & 0xff] ^
646 Td3[Te2[(rk[3] >> 24) ] & 0xff];
647 #endif
648 }
649 return 0;
650 }
651
652 /*
653 * Encrypt a single block
654 * in and out can overlap
655 */
AES_encrypt(const unsigned char * in,unsigned char * out,const AES_KEY * key)656 void AES_encrypt(const unsigned char *in, unsigned char *out,
657 const AES_KEY *key)
658 {
659
660 const u32 *rk;
661 u32 s0, s1, s2, s3, t[4];
662 int r;
663
664 assert(in && out && key);
665 rk = key->rd_key;
666
667 /*
668 * map byte array block to cipher state
669 * and add initial round key:
670 */
671 s0 = GETU32(in ) ^ rk[0];
672 s1 = GETU32(in + 4) ^ rk[1];
673 s2 = GETU32(in + 8) ^ rk[2];
674 s3 = GETU32(in + 12) ^ rk[3];
675
676 #if defined(AES_COMPACT_IN_OUTER_ROUNDS)
677 prefetch256(Te4);
678
679 t[0] = (u32)Te4[(s0 ) & 0xff] ^
680 (u32)Te4[(s1 >> 8) & 0xff] << 8 ^
681 (u32)Te4[(s2 >> 16) & 0xff] << 16 ^
682 (u32)Te4[(s3 >> 24) ] << 24;
683 t[1] = (u32)Te4[(s1 ) & 0xff] ^
684 (u32)Te4[(s2 >> 8) & 0xff] << 8 ^
685 (u32)Te4[(s3 >> 16) & 0xff] << 16 ^
686 (u32)Te4[(s0 >> 24) ] << 24;
687 t[2] = (u32)Te4[(s2 ) & 0xff] ^
688 (u32)Te4[(s3 >> 8) & 0xff] << 8 ^
689 (u32)Te4[(s0 >> 16) & 0xff] << 16 ^
690 (u32)Te4[(s1 >> 24) ] << 24;
691 t[3] = (u32)Te4[(s3 ) & 0xff] ^
692 (u32)Te4[(s0 >> 8) & 0xff] << 8 ^
693 (u32)Te4[(s1 >> 16) & 0xff] << 16 ^
694 (u32)Te4[(s2 >> 24) ] << 24;
695
696 /* now do the linear transform using words */
697 { int i;
698 u32 r0, r1, r2;
699
700 for (i = 0; i < 4; i++) {
701 r0 = t[i];
702 r1 = r0 & 0x80808080;
703 r2 = ((r0 & 0x7f7f7f7f) << 1) ^
704 ((r1 - (r1 >> 7)) & 0x1b1b1b1b);
705 #if defined(ROTATE)
706 t[i] = r2 ^ ROTATE(r2,24) ^ ROTATE(r0,24) ^
707 ROTATE(r0,16) ^ ROTATE(r0,8);
708 #else
709 t[i] = r2 ^ ((r2 ^ r0) << 24) ^ ((r2 ^ r0) >> 8) ^
710 (r0 << 16) ^ (r0 >> 16) ^
711 (r0 << 8) ^ (r0 >> 24);
712 #endif
713 t[i] ^= rk[4+i];
714 }
715 }
716 #else
717 t[0] = Te0[(s0 ) & 0xff] ^
718 Te1[(s1 >> 8) & 0xff] ^
719 Te2[(s2 >> 16) & 0xff] ^
720 Te3[(s3 >> 24) ] ^
721 rk[4];
722 t[1] = Te0[(s1 ) & 0xff] ^
723 Te1[(s2 >> 8) & 0xff] ^
724 Te2[(s3 >> 16) & 0xff] ^
725 Te3[(s0 >> 24) ] ^
726 rk[5];
727 t[2] = Te0[(s2 ) & 0xff] ^
728 Te1[(s3 >> 8) & 0xff] ^
729 Te2[(s0 >> 16) & 0xff] ^
730 Te3[(s1 >> 24) ] ^
731 rk[6];
732 t[3] = Te0[(s3 ) & 0xff] ^
733 Te1[(s0 >> 8) & 0xff] ^
734 Te2[(s1 >> 16) & 0xff] ^
735 Te3[(s2 >> 24) ] ^
736 rk[7];
737 #endif
738 s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
739
740 /*
741 * Nr - 2 full rounds:
742 */
743 for (rk+=8,r=key->rounds-2; r>0; rk+=4,r--) {
744 #if defined(AES_COMPACT_IN_INNER_ROUNDS)
745 t[0] = (u32)Te4[(s0 ) & 0xff] ^
746 (u32)Te4[(s1 >> 8) & 0xff] << 8 ^
747 (u32)Te4[(s2 >> 16) & 0xff] << 16 ^
748 (u32)Te4[(s3 >> 24) ] << 24;
749 t[1] = (u32)Te4[(s1 ) & 0xff] ^
750 (u32)Te4[(s2 >> 8) & 0xff] << 8 ^
751 (u32)Te4[(s3 >> 16) & 0xff] << 16 ^
752 (u32)Te4[(s0 >> 24) ] << 24;
753 t[2] = (u32)Te4[(s2 ) & 0xff] ^
754 (u32)Te4[(s3 >> 8) & 0xff] << 8 ^
755 (u32)Te4[(s0 >> 16) & 0xff] << 16 ^
756 (u32)Te4[(s1 >> 24) ] << 24;
757 t[3] = (u32)Te4[(s3 ) & 0xff] ^
758 (u32)Te4[(s0 >> 8) & 0xff] << 8 ^
759 (u32)Te4[(s1 >> 16) & 0xff] << 16 ^
760 (u32)Te4[(s2 >> 24) ] << 24;
761
762 /* now do the linear transform using words */
763 {
764 int i;
765 u32 r0, r1, r2;
766
767 for (i = 0; i < 4; i++) {
768 r0 = t[i];
769 r1 = r0 & 0x80808080;
770 r2 = ((r0 & 0x7f7f7f7f) << 1) ^
771 ((r1 - (r1 >> 7)) & 0x1b1b1b1b);
772 #if defined(ROTATE)
773 t[i] = r2 ^ ROTATE(r2,24) ^ ROTATE(r0,24) ^
774 ROTATE(r0,16) ^ ROTATE(r0,8);
775 #else
776 t[i] = r2 ^ ((r2 ^ r0) << 24) ^ ((r2 ^ r0) >> 8) ^
777 (r0 << 16) ^ (r0 >> 16) ^
778 (r0 << 8) ^ (r0 >> 24);
779 #endif
780 t[i] ^= rk[i];
781 }
782 }
783 #else
784 t[0] = Te0[(s0 ) & 0xff] ^
785 Te1[(s1 >> 8) & 0xff] ^
786 Te2[(s2 >> 16) & 0xff] ^
787 Te3[(s3 >> 24) ] ^
788 rk[0];
789 t[1] = Te0[(s1 ) & 0xff] ^
790 Te1[(s2 >> 8) & 0xff] ^
791 Te2[(s3 >> 16) & 0xff] ^
792 Te3[(s0 >> 24) ] ^
793 rk[1];
794 t[2] = Te0[(s2 ) & 0xff] ^
795 Te1[(s3 >> 8) & 0xff] ^
796 Te2[(s0 >> 16) & 0xff] ^
797 Te3[(s1 >> 24) ] ^
798 rk[2];
799 t[3] = Te0[(s3 ) & 0xff] ^
800 Te1[(s0 >> 8) & 0xff] ^
801 Te2[(s1 >> 16) & 0xff] ^
802 Te3[(s2 >> 24) ] ^
803 rk[3];
804 #endif
805 s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
806 }
807 /*
808 * apply last round and
809 * map cipher state to byte array block:
810 */
811 #if defined(AES_COMPACT_IN_OUTER_ROUNDS)
812 prefetch256(Te4);
813
814 *(u32*)(out+0) =
815 (u32)Te4[(s0 ) & 0xff] ^
816 (u32)Te4[(s1 >> 8) & 0xff] << 8 ^
817 (u32)Te4[(s2 >> 16) & 0xff] << 16 ^
818 (u32)Te4[(s3 >> 24) ] << 24 ^
819 rk[0];
820 *(u32*)(out+4) =
821 (u32)Te4[(s1 ) & 0xff] ^
822 (u32)Te4[(s2 >> 8) & 0xff] << 8 ^
823 (u32)Te4[(s3 >> 16) & 0xff] << 16 ^
824 (u32)Te4[(s0 >> 24) ] << 24 ^
825 rk[1];
826 *(u32*)(out+8) =
827 (u32)Te4[(s2 ) & 0xff] ^
828 (u32)Te4[(s3 >> 8) & 0xff] << 8 ^
829 (u32)Te4[(s0 >> 16) & 0xff] << 16 ^
830 (u32)Te4[(s1 >> 24) ] << 24 ^
831 rk[2];
832 *(u32*)(out+12) =
833 (u32)Te4[(s3 ) & 0xff] ^
834 (u32)Te4[(s0 >> 8) & 0xff] << 8 ^
835 (u32)Te4[(s1 >> 16) & 0xff] << 16 ^
836 (u32)Te4[(s2 >> 24) ] << 24 ^
837 rk[3];
838 #else
839 *(u32*)(out+0) =
840 (Te2[(s0 ) & 0xff] & 0x000000ffU) ^
841 (Te3[(s1 >> 8) & 0xff] & 0x0000ff00U) ^
842 (Te0[(s2 >> 16) & 0xff] & 0x00ff0000U) ^
843 (Te1[(s3 >> 24) ] & 0xff000000U) ^
844 rk[0];
845 *(u32*)(out+4) =
846 (Te2[(s1 ) & 0xff] & 0x000000ffU) ^
847 (Te3[(s2 >> 8) & 0xff] & 0x0000ff00U) ^
848 (Te0[(s3 >> 16) & 0xff] & 0x00ff0000U) ^
849 (Te1[(s0 >> 24) ] & 0xff000000U) ^
850 rk[1];
851 *(u32*)(out+8) =
852 (Te2[(s2 ) & 0xff] & 0x000000ffU) ^
853 (Te3[(s3 >> 8) & 0xff] & 0x0000ff00U) ^
854 (Te0[(s0 >> 16) & 0xff] & 0x00ff0000U) ^
855 (Te1[(s1 >> 24) ] & 0xff000000U) ^
856 rk[2];
857 *(u32*)(out+12) =
858 (Te2[(s3 ) & 0xff] & 0x000000ffU) ^
859 (Te3[(s0 >> 8) & 0xff] & 0x0000ff00U) ^
860 (Te0[(s1 >> 16) & 0xff] & 0x00ff0000U) ^
861 (Te1[(s2 >> 24) ] & 0xff000000U) ^
862 rk[3];
863 #endif
864 }
865
866 /*
867 * Decrypt a single block
868 * in and out can overlap
869 */
AES_decrypt(const unsigned char * in,unsigned char * out,const AES_KEY * key)870 void AES_decrypt(const unsigned char *in, unsigned char *out,
871 const AES_KEY *key)
872 {
873
874 const u32 *rk;
875 u32 s0, s1, s2, s3, t[4];
876 int r;
877
878 assert(in && out && key);
879 rk = key->rd_key;
880
881 /*
882 * map byte array block to cipher state
883 * and add initial round key:
884 */
885 s0 = GETU32(in ) ^ rk[0];
886 s1 = GETU32(in + 4) ^ rk[1];
887 s2 = GETU32(in + 8) ^ rk[2];
888 s3 = GETU32(in + 12) ^ rk[3];
889
890 #if defined(AES_COMPACT_IN_OUTER_ROUNDS)
891 prefetch256(Td4);
892
893 t[0] = (u32)Td4[(s0 ) & 0xff] ^
894 (u32)Td4[(s3 >> 8) & 0xff] << 8 ^
895 (u32)Td4[(s2 >> 16) & 0xff] << 16 ^
896 (u32)Td4[(s1 >> 24) ] << 24;
897 t[1] = (u32)Td4[(s1 ) & 0xff] ^
898 (u32)Td4[(s0 >> 8) & 0xff] << 8 ^
899 (u32)Td4[(s3 >> 16) & 0xff] << 16 ^
900 (u32)Td4[(s2 >> 24) ] << 24;
901 t[2] = (u32)Td4[(s2 ) & 0xff] ^
902 (u32)Td4[(s1 >> 8) & 0xff] << 8 ^
903 (u32)Td4[(s0 >> 16) & 0xff] << 16 ^
904 (u32)Td4[(s3 >> 24) ] << 24;
905 t[3] = (u32)Td4[(s3 ) & 0xff] ^
906 (u32)Td4[(s2 >> 8) & 0xff] << 8 ^
907 (u32)Td4[(s1 >> 16) & 0xff] << 16 ^
908 (u32)Td4[(s0 >> 24) ] << 24;
909
910 /* now do the linear transform using words */
911 {
912 int i;
913 u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
914
915 for (i = 0; i < 4; i++) {
916 tp1 = t[i];
917 m = tp1 & 0x80808080;
918 tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
919 ((m - (m >> 7)) & 0x1b1b1b1b);
920 m = tp2 & 0x80808080;
921 tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
922 ((m - (m >> 7)) & 0x1b1b1b1b);
923 m = tp4 & 0x80808080;
924 tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
925 ((m - (m >> 7)) & 0x1b1b1b1b);
926 tp9 = tp8 ^ tp1;
927 tpb = tp9 ^ tp2;
928 tpd = tp9 ^ tp4;
929 tpe = tp8 ^ tp4 ^ tp2;
930 #if defined(ROTATE)
931 t[i] = tpe ^ ROTATE(tpd,16) ^
932 ROTATE(tp9,8) ^ ROTATE(tpb,24);
933 #else
934 t[i] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
935 (tp9 >> 24) ^ (tp9 << 8) ^
936 (tpb >> 8) ^ (tpb << 24);
937 #endif
938 t[i] ^= rk[4+i];
939 }
940 }
941 #else
942 t[0] = Td0[(s0 ) & 0xff] ^
943 Td1[(s3 >> 8) & 0xff] ^
944 Td2[(s2 >> 16) & 0xff] ^
945 Td3[(s1 >> 24) ] ^
946 rk[4];
947 t[1] = Td0[(s1 ) & 0xff] ^
948 Td1[(s0 >> 8) & 0xff] ^
949 Td2[(s3 >> 16) & 0xff] ^
950 Td3[(s2 >> 24) ] ^
951 rk[5];
952 t[2] = Td0[(s2 ) & 0xff] ^
953 Td1[(s1 >> 8) & 0xff] ^
954 Td2[(s0 >> 16) & 0xff] ^
955 Td3[(s3 >> 24) ] ^
956 rk[6];
957 t[3] = Td0[(s3 ) & 0xff] ^
958 Td1[(s2 >> 8) & 0xff] ^
959 Td2[(s1 >> 16) & 0xff] ^
960 Td3[(s0 >> 24) ] ^
961 rk[7];
962 #endif
963 s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
964
965 /*
966 * Nr - 2 full rounds:
967 */
968 for (rk+=8,r=key->rounds-2; r>0; rk+=4,r--) {
969 #if defined(AES_COMPACT_IN_INNER_ROUNDS)
970 t[0] = (u32)Td4[(s0 ) & 0xff] ^
971 (u32)Td4[(s3 >> 8) & 0xff] << 8 ^
972 (u32)Td4[(s2 >> 16) & 0xff] << 16 ^
973 (u32)Td4[(s1 >> 24) ] << 24;
974 t[1] = (u32)Td4[(s1 ) & 0xff] ^
975 (u32)Td4[(s0 >> 8) & 0xff] << 8 ^
976 (u32)Td4[(s3 >> 16) & 0xff] << 16 ^
977 (u32)Td4[(s2 >> 24) ] << 24;
978 t[2] = (u32)Td4[(s2 ) & 0xff] ^
979 (u32)Td4[(s1 >> 8) & 0xff] << 8 ^
980 (u32)Td4[(s0 >> 16) & 0xff] << 16 ^
981 (u32)Td4[(s3 >> 24) ] << 24;
982 t[3] = (u32)Td4[(s3 ) & 0xff] ^
983 (u32)Td4[(s2 >> 8) & 0xff] << 8 ^
984 (u32)Td4[(s1 >> 16) & 0xff] << 16 ^
985 (u32)Td4[(s0 >> 24) ] << 24;
986
987 /* now do the linear transform using words */
988 {
989 int i;
990 u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
991
992 for (i = 0; i < 4; i++) {
993 tp1 = t[i];
994 m = tp1 & 0x80808080;
995 tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
996 ((m - (m >> 7)) & 0x1b1b1b1b);
997 m = tp2 & 0x80808080;
998 tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
999 ((m - (m >> 7)) & 0x1b1b1b1b);
1000 m = tp4 & 0x80808080;
1001 tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
1002 ((m - (m >> 7)) & 0x1b1b1b1b);
1003 tp9 = tp8 ^ tp1;
1004 tpb = tp9 ^ tp2;
1005 tpd = tp9 ^ tp4;
1006 tpe = tp8 ^ tp4 ^ tp2;
1007 #if defined(ROTATE)
1008 t[i] = tpe ^ ROTATE(tpd,16) ^
1009 ROTATE(tp9,8) ^ ROTATE(tpb,24);
1010 #else
1011 t[i] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
1012 (tp9 >> 24) ^ (tp9 << 8) ^
1013 (tpb >> 8) ^ (tpb << 24);
1014 #endif
1015 t[i] ^= rk[i];
1016 }
1017 }
1018 #else
1019 t[0] = Td0[(s0 ) & 0xff] ^
1020 Td1[(s3 >> 8) & 0xff] ^
1021 Td2[(s2 >> 16) & 0xff] ^
1022 Td3[(s1 >> 24) ] ^
1023 rk[0];
1024 t[1] = Td0[(s1 ) & 0xff] ^
1025 Td1[(s0 >> 8) & 0xff] ^
1026 Td2[(s3 >> 16) & 0xff] ^
1027 Td3[(s2 >> 24) ] ^
1028 rk[1];
1029 t[2] = Td0[(s2 ) & 0xff] ^
1030 Td1[(s1 >> 8) & 0xff] ^
1031 Td2[(s0 >> 16) & 0xff] ^
1032 Td3[(s3 >> 24) ] ^
1033 rk[2];
1034 t[3] = Td0[(s3 ) & 0xff] ^
1035 Td1[(s2 >> 8) & 0xff] ^
1036 Td2[(s1 >> 16) & 0xff] ^
1037 Td3[(s0 >> 24) ] ^
1038 rk[3];
1039 #endif
1040 s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
1041 }
1042 /*
1043 * apply last round and
1044 * map cipher state to byte array block:
1045 */
1046 prefetch256(Td4);
1047
1048 *(u32*)(out+0) =
1049 ((u32)Td4[(s0 ) & 0xff]) ^
1050 ((u32)Td4[(s3 >> 8) & 0xff] << 8) ^
1051 ((u32)Td4[(s2 >> 16) & 0xff] << 16) ^
1052 ((u32)Td4[(s1 >> 24) ] << 24) ^
1053 rk[0];
1054 *(u32*)(out+4) =
1055 ((u32)Td4[(s1 ) & 0xff]) ^
1056 ((u32)Td4[(s0 >> 8) & 0xff] << 8) ^
1057 ((u32)Td4[(s3 >> 16) & 0xff] << 16) ^
1058 ((u32)Td4[(s2 >> 24) ] << 24) ^
1059 rk[1];
1060 *(u32*)(out+8) =
1061 ((u32)Td4[(s2 ) & 0xff]) ^
1062 ((u32)Td4[(s1 >> 8) & 0xff] << 8) ^
1063 ((u32)Td4[(s0 >> 16) & 0xff] << 16) ^
1064 ((u32)Td4[(s3 >> 24) ] << 24) ^
1065 rk[2];
1066 *(u32*)(out+12) =
1067 ((u32)Td4[(s3 ) & 0xff]) ^
1068 ((u32)Td4[(s2 >> 8) & 0xff] << 8) ^
1069 ((u32)Td4[(s1 >> 16) & 0xff] << 16) ^
1070 ((u32)Td4[(s0 >> 24) ] << 24) ^
1071 rk[3];
1072 }
1073