1/*
2Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
3Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
4denoted as "the implementer".
5
6For more information, feedback or questions, please refer to our websites:
7http://keccak.noekeon.org/
8http://keyak.noekeon.org/
9http://ketje.noekeon.org/
10
11To the extent possible under law, the implementer has waived all copyright
12and related or neighboring rights to the source code in this file.
13http://creativecommons.org/publicdomain/zero/1.0/
14*/
15
16#define declareABCDE \
17    UINT64 Aba, Abe, Abi, Abo, Abu; \
18    UINT64 Aga, Age, Agi, Ago, Agu; \
19    UINT64 Aka, Ake, Aki, Ako, Aku; \
20    UINT64 Ama, Ame, Ami, Amo, Amu; \
21    UINT64 Asa, Ase, Asi, Aso, Asu; \
22    UINT64 Bba, Bbe, Bbi, Bbo, Bbu; \
23    UINT64 Bga, Bge, Bgi, Bgo, Bgu; \
24    UINT64 Bka, Bke, Bki, Bko, Bku; \
25    UINT64 Bma, Bme, Bmi, Bmo, Bmu; \
26    UINT64 Bsa, Bse, Bsi, Bso, Bsu; \
27    UINT64 Ca, Ce, Ci, Co, Cu; \
28    UINT64 Da, De, Di, Do, Du; \
29    UINT64 Eba, Ebe, Ebi, Ebo, Ebu; \
30    UINT64 Ega, Ege, Egi, Ego, Egu; \
31    UINT64 Eka, Eke, Eki, Eko, Eku; \
32    UINT64 Ema, Eme, Emi, Emo, Emu; \
33    UINT64 Esa, Ese, Esi, Eso, Esu; \
34
35#define prepareTheta \
36    Ca = Aba^Aga^Aka^Ama^Asa; \
37    Ce = Abe^Age^Ake^Ame^Ase; \
38    Ci = Abi^Agi^Aki^Ami^Asi; \
39    Co = Abo^Ago^Ako^Amo^Aso; \
40    Cu = Abu^Agu^Aku^Amu^Asu; \
41
42#ifdef UseBebigokimisa
43/* --- Code for round, with prepare-theta (lane complementing pattern 'bebigokimisa') */
44/* --- 64-bit lanes mapped to 64-bit words */
45#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
46    Da = Cu^ROL64(Ce, 1); \
47    De = Ca^ROL64(Ci, 1); \
48    Di = Ce^ROL64(Co, 1); \
49    Do = Ci^ROL64(Cu, 1); \
50    Du = Co^ROL64(Ca, 1); \
51\
52    A##ba ^= Da; \
53    Bba = A##ba; \
54    A##ge ^= De; \
55    Bbe = ROL64(A##ge, 44); \
56    A##ki ^= Di; \
57    Bbi = ROL64(A##ki, 43); \
58    A##mo ^= Do; \
59    Bbo = ROL64(A##mo, 21); \
60    A##su ^= Du; \
61    Bbu = ROL64(A##su, 14); \
62    E##ba =   Bba ^(  Bbe |  Bbi ); \
63    E##ba ^= KeccakF1600RoundConstants[i]; \
64    Ca = E##ba; \
65    E##be =   Bbe ^((~Bbi)|  Bbo ); \
66    Ce = E##be; \
67    E##bi =   Bbi ^(  Bbo &  Bbu ); \
68    Ci = E##bi; \
69    E##bo =   Bbo ^(  Bbu |  Bba ); \
70    Co = E##bo; \
71    E##bu =   Bbu ^(  Bba &  Bbe ); \
72    Cu = E##bu; \
73\
74    A##bo ^= Do; \
75    Bga = ROL64(A##bo, 28); \
76    A##gu ^= Du; \
77    Bge = ROL64(A##gu, 20); \
78    A##ka ^= Da; \
79    Bgi = ROL64(A##ka, 3); \
80    A##me ^= De; \
81    Bgo = ROL64(A##me, 45); \
82    A##si ^= Di; \
83    Bgu = ROL64(A##si, 61); \
84    E##ga =   Bga ^(  Bge |  Bgi ); \
85    Ca ^= E##ga; \
86    E##ge =   Bge ^(  Bgi &  Bgo ); \
87    Ce ^= E##ge; \
88    E##gi =   Bgi ^(  Bgo |(~Bgu)); \
89    Ci ^= E##gi; \
90    E##go =   Bgo ^(  Bgu |  Bga ); \
91    Co ^= E##go; \
92    E##gu =   Bgu ^(  Bga &  Bge ); \
93    Cu ^= E##gu; \
94\
95    A##be ^= De; \
96    Bka = ROL64(A##be, 1); \
97    A##gi ^= Di; \
98    Bke = ROL64(A##gi, 6); \
99    A##ko ^= Do; \
100    Bki = ROL64(A##ko, 25); \
101    A##mu ^= Du; \
102    Bko = ROL64(A##mu, 8); \
103    A##sa ^= Da; \
104    Bku = ROL64(A##sa, 18); \
105    E##ka =   Bka ^(  Bke |  Bki ); \
106    Ca ^= E##ka; \
107    E##ke =   Bke ^(  Bki &  Bko ); \
108    Ce ^= E##ke; \
109    E##ki =   Bki ^((~Bko)&  Bku ); \
110    Ci ^= E##ki; \
111    E##ko = (~Bko)^(  Bku |  Bka ); \
112    Co ^= E##ko; \
113    E##ku =   Bku ^(  Bka &  Bke ); \
114    Cu ^= E##ku; \
115\
116    A##bu ^= Du; \
117    Bma = ROL64(A##bu, 27); \
118    A##ga ^= Da; \
119    Bme = ROL64(A##ga, 36); \
120    A##ke ^= De; \
121    Bmi = ROL64(A##ke, 10); \
122    A##mi ^= Di; \
123    Bmo = ROL64(A##mi, 15); \
124    A##so ^= Do; \
125    Bmu = ROL64(A##so, 56); \
126    E##ma =   Bma ^(  Bme &  Bmi ); \
127    Ca ^= E##ma; \
128    E##me =   Bme ^(  Bmi |  Bmo ); \
129    Ce ^= E##me; \
130    E##mi =   Bmi ^((~Bmo)|  Bmu ); \
131    Ci ^= E##mi; \
132    E##mo = (~Bmo)^(  Bmu &  Bma ); \
133    Co ^= E##mo; \
134    E##mu =   Bmu ^(  Bma |  Bme ); \
135    Cu ^= E##mu; \
136\
137    A##bi ^= Di; \
138    Bsa = ROL64(A##bi, 62); \
139    A##go ^= Do; \
140    Bse = ROL64(A##go, 55); \
141    A##ku ^= Du; \
142    Bsi = ROL64(A##ku, 39); \
143    A##ma ^= Da; \
144    Bso = ROL64(A##ma, 41); \
145    A##se ^= De; \
146    Bsu = ROL64(A##se, 2); \
147    E##sa =   Bsa ^((~Bse)&  Bsi ); \
148    Ca ^= E##sa; \
149    E##se = (~Bse)^(  Bsi |  Bso ); \
150    Ce ^= E##se; \
151    E##si =   Bsi ^(  Bso &  Bsu ); \
152    Ci ^= E##si; \
153    E##so =   Bso ^(  Bsu |  Bsa ); \
154    Co ^= E##so; \
155    E##su =   Bsu ^(  Bsa &  Bse ); \
156    Cu ^= E##su; \
157\
158
159/* --- Code for round (lane complementing pattern 'bebigokimisa') */
160/* --- 64-bit lanes mapped to 64-bit words */
161#define thetaRhoPiChiIota(i, A, E) \
162    Da = Cu^ROL64(Ce, 1); \
163    De = Ca^ROL64(Ci, 1); \
164    Di = Ce^ROL64(Co, 1); \
165    Do = Ci^ROL64(Cu, 1); \
166    Du = Co^ROL64(Ca, 1); \
167\
168    A##ba ^= Da; \
169    Bba = A##ba; \
170    A##ge ^= De; \
171    Bbe = ROL64(A##ge, 44); \
172    A##ki ^= Di; \
173    Bbi = ROL64(A##ki, 43); \
174    A##mo ^= Do; \
175    Bbo = ROL64(A##mo, 21); \
176    A##su ^= Du; \
177    Bbu = ROL64(A##su, 14); \
178    E##ba =   Bba ^(  Bbe |  Bbi ); \
179    E##ba ^= KeccakF1600RoundConstants[i]; \
180    E##be =   Bbe ^((~Bbi)|  Bbo ); \
181    E##bi =   Bbi ^(  Bbo &  Bbu ); \
182    E##bo =   Bbo ^(  Bbu |  Bba ); \
183    E##bu =   Bbu ^(  Bba &  Bbe ); \
184\
185    A##bo ^= Do; \
186    Bga = ROL64(A##bo, 28); \
187    A##gu ^= Du; \
188    Bge = ROL64(A##gu, 20); \
189    A##ka ^= Da; \
190    Bgi = ROL64(A##ka, 3); \
191    A##me ^= De; \
192    Bgo = ROL64(A##me, 45); \
193    A##si ^= Di; \
194    Bgu = ROL64(A##si, 61); \
195    E##ga =   Bga ^(  Bge |  Bgi ); \
196    E##ge =   Bge ^(  Bgi &  Bgo ); \
197    E##gi =   Bgi ^(  Bgo |(~Bgu)); \
198    E##go =   Bgo ^(  Bgu |  Bga ); \
199    E##gu =   Bgu ^(  Bga &  Bge ); \
200\
201    A##be ^= De; \
202    Bka = ROL64(A##be, 1); \
203    A##gi ^= Di; \
204    Bke = ROL64(A##gi, 6); \
205    A##ko ^= Do; \
206    Bki = ROL64(A##ko, 25); \
207    A##mu ^= Du; \
208    Bko = ROL64(A##mu, 8); \
209    A##sa ^= Da; \
210    Bku = ROL64(A##sa, 18); \
211    E##ka =   Bka ^(  Bke |  Bki ); \
212    E##ke =   Bke ^(  Bki &  Bko ); \
213    E##ki =   Bki ^((~Bko)&  Bku ); \
214    E##ko = (~Bko)^(  Bku |  Bka ); \
215    E##ku =   Bku ^(  Bka &  Bke ); \
216\
217    A##bu ^= Du; \
218    Bma = ROL64(A##bu, 27); \
219    A##ga ^= Da; \
220    Bme = ROL64(A##ga, 36); \
221    A##ke ^= De; \
222    Bmi = ROL64(A##ke, 10); \
223    A##mi ^= Di; \
224    Bmo = ROL64(A##mi, 15); \
225    A##so ^= Do; \
226    Bmu = ROL64(A##so, 56); \
227    E##ma =   Bma ^(  Bme &  Bmi ); \
228    E##me =   Bme ^(  Bmi |  Bmo ); \
229    E##mi =   Bmi ^((~Bmo)|  Bmu ); \
230    E##mo = (~Bmo)^(  Bmu &  Bma ); \
231    E##mu =   Bmu ^(  Bma |  Bme ); \
232\
233    A##bi ^= Di; \
234    Bsa = ROL64(A##bi, 62); \
235    A##go ^= Do; \
236    Bse = ROL64(A##go, 55); \
237    A##ku ^= Du; \
238    Bsi = ROL64(A##ku, 39); \
239    A##ma ^= Da; \
240    Bso = ROL64(A##ma, 41); \
241    A##se ^= De; \
242    Bsu = ROL64(A##se, 2); \
243    E##sa =   Bsa ^((~Bse)&  Bsi ); \
244    E##se = (~Bse)^(  Bsi |  Bso ); \
245    E##si =   Bsi ^(  Bso &  Bsu ); \
246    E##so =   Bso ^(  Bsu |  Bsa ); \
247    E##su =   Bsu ^(  Bsa &  Bse ); \
248\
249
250#else /* UseBebigokimisa */
251/* --- Code for round, with prepare-theta */
252/* --- 64-bit lanes mapped to 64-bit words */
253#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
254    Da = Cu^ROL64(Ce, 1); \
255    De = Ca^ROL64(Ci, 1); \
256    Di = Ce^ROL64(Co, 1); \
257    Do = Ci^ROL64(Cu, 1); \
258    Du = Co^ROL64(Ca, 1); \
259\
260    A##ba ^= Da; \
261    Bba = A##ba; \
262    A##ge ^= De; \
263    Bbe = ROL64(A##ge, 44); \
264    A##ki ^= Di; \
265    Bbi = ROL64(A##ki, 43); \
266    A##mo ^= Do; \
267    Bbo = ROL64(A##mo, 21); \
268    A##su ^= Du; \
269    Bbu = ROL64(A##su, 14); \
270    E##ba =   Bba ^((~Bbe)&  Bbi ); \
271    E##ba ^= KeccakF1600RoundConstants[i]; \
272    Ca = E##ba; \
273    E##be =   Bbe ^((~Bbi)&  Bbo ); \
274    Ce = E##be; \
275    E##bi =   Bbi ^((~Bbo)&  Bbu ); \
276    Ci = E##bi; \
277    E##bo =   Bbo ^((~Bbu)&  Bba ); \
278    Co = E##bo; \
279    E##bu =   Bbu ^((~Bba)&  Bbe ); \
280    Cu = E##bu; \
281\
282    A##bo ^= Do; \
283    Bga = ROL64(A##bo, 28); \
284    A##gu ^= Du; \
285    Bge = ROL64(A##gu, 20); \
286    A##ka ^= Da; \
287    Bgi = ROL64(A##ka, 3); \
288    A##me ^= De; \
289    Bgo = ROL64(A##me, 45); \
290    A##si ^= Di; \
291    Bgu = ROL64(A##si, 61); \
292    E##ga =   Bga ^((~Bge)&  Bgi ); \
293    Ca ^= E##ga; \
294    E##ge =   Bge ^((~Bgi)&  Bgo ); \
295    Ce ^= E##ge; \
296    E##gi =   Bgi ^((~Bgo)&  Bgu ); \
297    Ci ^= E##gi; \
298    E##go =   Bgo ^((~Bgu)&  Bga ); \
299    Co ^= E##go; \
300    E##gu =   Bgu ^((~Bga)&  Bge ); \
301    Cu ^= E##gu; \
302\
303    A##be ^= De; \
304    Bka = ROL64(A##be, 1); \
305    A##gi ^= Di; \
306    Bke = ROL64(A##gi, 6); \
307    A##ko ^= Do; \
308    Bki = ROL64(A##ko, 25); \
309    A##mu ^= Du; \
310    Bko = ROL64(A##mu, 8); \
311    A##sa ^= Da; \
312    Bku = ROL64(A##sa, 18); \
313    E##ka =   Bka ^((~Bke)&  Bki ); \
314    Ca ^= E##ka; \
315    E##ke =   Bke ^((~Bki)&  Bko ); \
316    Ce ^= E##ke; \
317    E##ki =   Bki ^((~Bko)&  Bku ); \
318    Ci ^= E##ki; \
319    E##ko =   Bko ^((~Bku)&  Bka ); \
320    Co ^= E##ko; \
321    E##ku =   Bku ^((~Bka)&  Bke ); \
322    Cu ^= E##ku; \
323\
324    A##bu ^= Du; \
325    Bma = ROL64(A##bu, 27); \
326    A##ga ^= Da; \
327    Bme = ROL64(A##ga, 36); \
328    A##ke ^= De; \
329    Bmi = ROL64(A##ke, 10); \
330    A##mi ^= Di; \
331    Bmo = ROL64(A##mi, 15); \
332    A##so ^= Do; \
333    Bmu = ROL64(A##so, 56); \
334    E##ma =   Bma ^((~Bme)&  Bmi ); \
335    Ca ^= E##ma; \
336    E##me =   Bme ^((~Bmi)&  Bmo ); \
337    Ce ^= E##me; \
338    E##mi =   Bmi ^((~Bmo)&  Bmu ); \
339    Ci ^= E##mi; \
340    E##mo =   Bmo ^((~Bmu)&  Bma ); \
341    Co ^= E##mo; \
342    E##mu =   Bmu ^((~Bma)&  Bme ); \
343    Cu ^= E##mu; \
344\
345    A##bi ^= Di; \
346    Bsa = ROL64(A##bi, 62); \
347    A##go ^= Do; \
348    Bse = ROL64(A##go, 55); \
349    A##ku ^= Du; \
350    Bsi = ROL64(A##ku, 39); \
351    A##ma ^= Da; \
352    Bso = ROL64(A##ma, 41); \
353    A##se ^= De; \
354    Bsu = ROL64(A##se, 2); \
355    E##sa =   Bsa ^((~Bse)&  Bsi ); \
356    Ca ^= E##sa; \
357    E##se =   Bse ^((~Bsi)&  Bso ); \
358    Ce ^= E##se; \
359    E##si =   Bsi ^((~Bso)&  Bsu ); \
360    Ci ^= E##si; \
361    E##so =   Bso ^((~Bsu)&  Bsa ); \
362    Co ^= E##so; \
363    E##su =   Bsu ^((~Bsa)&  Bse ); \
364    Cu ^= E##su; \
365\
366
367/* --- Code for round */
368/* --- 64-bit lanes mapped to 64-bit words */
369#define thetaRhoPiChiIota(i, A, E) \
370    Da = Cu^ROL64(Ce, 1); \
371    De = Ca^ROL64(Ci, 1); \
372    Di = Ce^ROL64(Co, 1); \
373    Do = Ci^ROL64(Cu, 1); \
374    Du = Co^ROL64(Ca, 1); \
375\
376    A##ba ^= Da; \
377    Bba = A##ba; \
378    A##ge ^= De; \
379    Bbe = ROL64(A##ge, 44); \
380    A##ki ^= Di; \
381    Bbi = ROL64(A##ki, 43); \
382    A##mo ^= Do; \
383    Bbo = ROL64(A##mo, 21); \
384    A##su ^= Du; \
385    Bbu = ROL64(A##su, 14); \
386    E##ba =   Bba ^((~Bbe)&  Bbi ); \
387    E##ba ^= KeccakF1600RoundConstants[i]; \
388    E##be =   Bbe ^((~Bbi)&  Bbo ); \
389    E##bi =   Bbi ^((~Bbo)&  Bbu ); \
390    E##bo =   Bbo ^((~Bbu)&  Bba ); \
391    E##bu =   Bbu ^((~Bba)&  Bbe ); \
392\
393    A##bo ^= Do; \
394    Bga = ROL64(A##bo, 28); \
395    A##gu ^= Du; \
396    Bge = ROL64(A##gu, 20); \
397    A##ka ^= Da; \
398    Bgi = ROL64(A##ka, 3); \
399    A##me ^= De; \
400    Bgo = ROL64(A##me, 45); \
401    A##si ^= Di; \
402    Bgu = ROL64(A##si, 61); \
403    E##ga =   Bga ^((~Bge)&  Bgi ); \
404    E##ge =   Bge ^((~Bgi)&  Bgo ); \
405    E##gi =   Bgi ^((~Bgo)&  Bgu ); \
406    E##go =   Bgo ^((~Bgu)&  Bga ); \
407    E##gu =   Bgu ^((~Bga)&  Bge ); \
408\
409    A##be ^= De; \
410    Bka = ROL64(A##be, 1); \
411    A##gi ^= Di; \
412    Bke = ROL64(A##gi, 6); \
413    A##ko ^= Do; \
414    Bki = ROL64(A##ko, 25); \
415    A##mu ^= Du; \
416    Bko = ROL64(A##mu, 8); \
417    A##sa ^= Da; \
418    Bku = ROL64(A##sa, 18); \
419    E##ka =   Bka ^((~Bke)&  Bki ); \
420    E##ke =   Bke ^((~Bki)&  Bko ); \
421    E##ki =   Bki ^((~Bko)&  Bku ); \
422    E##ko =   Bko ^((~Bku)&  Bka ); \
423    E##ku =   Bku ^((~Bka)&  Bke ); \
424\
425    A##bu ^= Du; \
426    Bma = ROL64(A##bu, 27); \
427    A##ga ^= Da; \
428    Bme = ROL64(A##ga, 36); \
429    A##ke ^= De; \
430    Bmi = ROL64(A##ke, 10); \
431    A##mi ^= Di; \
432    Bmo = ROL64(A##mi, 15); \
433    A##so ^= Do; \
434    Bmu = ROL64(A##so, 56); \
435    E##ma =   Bma ^((~Bme)&  Bmi ); \
436    E##me =   Bme ^((~Bmi)&  Bmo ); \
437    E##mi =   Bmi ^((~Bmo)&  Bmu ); \
438    E##mo =   Bmo ^((~Bmu)&  Bma ); \
439    E##mu =   Bmu ^((~Bma)&  Bme ); \
440\
441    A##bi ^= Di; \
442    Bsa = ROL64(A##bi, 62); \
443    A##go ^= Do; \
444    Bse = ROL64(A##go, 55); \
445    A##ku ^= Du; \
446    Bsi = ROL64(A##ku, 39); \
447    A##ma ^= Da; \
448    Bso = ROL64(A##ma, 41); \
449    A##se ^= De; \
450    Bsu = ROL64(A##se, 2); \
451    E##sa =   Bsa ^((~Bse)&  Bsi ); \
452    E##se =   Bse ^((~Bsi)&  Bso ); \
453    E##si =   Bsi ^((~Bso)&  Bsu ); \
454    E##so =   Bso ^((~Bsu)&  Bsa ); \
455    E##su =   Bsu ^((~Bsa)&  Bse ); \
456\
457
458#endif /* UseBebigokimisa */
459
460#define copyFromState(X, state) \
461    X##ba = state[ 0]; \
462    X##be = state[ 1]; \
463    X##bi = state[ 2]; \
464    X##bo = state[ 3]; \
465    X##bu = state[ 4]; \
466    X##ga = state[ 5]; \
467    X##ge = state[ 6]; \
468    X##gi = state[ 7]; \
469    X##go = state[ 8]; \
470    X##gu = state[ 9]; \
471    X##ka = state[10]; \
472    X##ke = state[11]; \
473    X##ki = state[12]; \
474    X##ko = state[13]; \
475    X##ku = state[14]; \
476    X##ma = state[15]; \
477    X##me = state[16]; \
478    X##mi = state[17]; \
479    X##mo = state[18]; \
480    X##mu = state[19]; \
481    X##sa = state[20]; \
482    X##se = state[21]; \
483    X##si = state[22]; \
484    X##so = state[23]; \
485    X##su = state[24]; \
486
487#define copyToState(state, X) \
488    state[ 0] = X##ba; \
489    state[ 1] = X##be; \
490    state[ 2] = X##bi; \
491    state[ 3] = X##bo; \
492    state[ 4] = X##bu; \
493    state[ 5] = X##ga; \
494    state[ 6] = X##ge; \
495    state[ 7] = X##gi; \
496    state[ 8] = X##go; \
497    state[ 9] = X##gu; \
498    state[10] = X##ka; \
499    state[11] = X##ke; \
500    state[12] = X##ki; \
501    state[13] = X##ko; \
502    state[14] = X##ku; \
503    state[15] = X##ma; \
504    state[16] = X##me; \
505    state[17] = X##mi; \
506    state[18] = X##mo; \
507    state[19] = X##mu; \
508    state[20] = X##sa; \
509    state[21] = X##se; \
510    state[22] = X##si; \
511    state[23] = X##so; \
512    state[24] = X##su; \
513
514#define copyStateVariables(X, Y) \
515    X##ba = Y##ba; \
516    X##be = Y##be; \
517    X##bi = Y##bi; \
518    X##bo = Y##bo; \
519    X##bu = Y##bu; \
520    X##ga = Y##ga; \
521    X##ge = Y##ge; \
522    X##gi = Y##gi; \
523    X##go = Y##go; \
524    X##gu = Y##gu; \
525    X##ka = Y##ka; \
526    X##ke = Y##ke; \
527    X##ki = Y##ki; \
528    X##ko = Y##ko; \
529    X##ku = Y##ku; \
530    X##ma = Y##ma; \
531    X##me = Y##me; \
532    X##mi = Y##mi; \
533    X##mo = Y##mo; \
534    X##mu = Y##mu; \
535    X##sa = Y##sa; \
536    X##se = Y##se; \
537    X##si = Y##si; \
538    X##so = Y##so; \
539    X##su = Y##su; \
540
541#define copyFromStateAndAdd(X, state, input, laneCount) \
542    if (laneCount < 16) { \
543        if (laneCount < 8) { \
544            if (laneCount < 4) { \
545                if (laneCount < 2) { \
546                    if (laneCount < 1) { \
547                        X##ba = state[ 0]; \
548                    } \
549                    else { \
550                        X##ba = state[ 0]^input[ 0]; \
551                    } \
552                    X##be = state[ 1]; \
553                    X##bi = state[ 2]; \
554                } \
555                else { \
556                    X##ba = state[ 0]^input[ 0]; \
557                    X##be = state[ 1]^input[ 1]; \
558                    if (laneCount < 3) { \
559                        X##bi = state[ 2]; \
560                    } \
561                    else { \
562                        X##bi = state[ 2]^input[ 2]; \
563                    } \
564                } \
565                X##bo = state[ 3]; \
566                X##bu = state[ 4]; \
567                X##ga = state[ 5]; \
568                X##ge = state[ 6]; \
569            } \
570            else { \
571                X##ba = state[ 0]^input[ 0]; \
572                X##be = state[ 1]^input[ 1]; \
573                X##bi = state[ 2]^input[ 2]; \
574                X##bo = state[ 3]^input[ 3]; \
575                if (laneCount < 6) { \
576                    if (laneCount < 5) { \
577                        X##bu = state[ 4]; \
578                    } \
579                    else { \
580                        X##bu = state[ 4]^input[ 4]; \
581                    } \
582                    X##ga = state[ 5]; \
583                    X##ge = state[ 6]; \
584                } \
585                else { \
586                    X##bu = state[ 4]^input[ 4]; \
587                    X##ga = state[ 5]^input[ 5]; \
588                    if (laneCount < 7) { \
589                        X##ge = state[ 6]; \
590                    } \
591                    else { \
592                        X##ge = state[ 6]^input[ 6]; \
593                    } \
594                } \
595            } \
596            X##gi = state[ 7]; \
597            X##go = state[ 8]; \
598            X##gu = state[ 9]; \
599            X##ka = state[10]; \
600            X##ke = state[11]; \
601            X##ki = state[12]; \
602            X##ko = state[13]; \
603            X##ku = state[14]; \
604        } \
605        else { \
606            X##ba = state[ 0]^input[ 0]; \
607            X##be = state[ 1]^input[ 1]; \
608            X##bi = state[ 2]^input[ 2]; \
609            X##bo = state[ 3]^input[ 3]; \
610            X##bu = state[ 4]^input[ 4]; \
611            X##ga = state[ 5]^input[ 5]; \
612            X##ge = state[ 6]^input[ 6]; \
613            X##gi = state[ 7]^input[ 7]; \
614            if (laneCount < 12) { \
615                if (laneCount < 10) { \
616                    if (laneCount < 9) { \
617                        X##go = state[ 8]; \
618                    } \
619                    else { \
620                        X##go = state[ 8]^input[ 8]; \
621                    } \
622                    X##gu = state[ 9]; \
623                    X##ka = state[10]; \
624                } \
625                else { \
626                    X##go = state[ 8]^input[ 8]; \
627                    X##gu = state[ 9]^input[ 9]; \
628                    if (laneCount < 11) { \
629                        X##ka = state[10]; \
630                    } \
631                    else { \
632                        X##ka = state[10]^input[10]; \
633                    } \
634                } \
635                X##ke = state[11]; \
636                X##ki = state[12]; \
637                X##ko = state[13]; \
638                X##ku = state[14]; \
639            } \
640            else { \
641                X##go = state[ 8]^input[ 8]; \
642                X##gu = state[ 9]^input[ 9]; \
643                X##ka = state[10]^input[10]; \
644                X##ke = state[11]^input[11]; \
645                if (laneCount < 14) { \
646                    if (laneCount < 13) { \
647                        X##ki = state[12]; \
648                    } \
649                    else { \
650                        X##ki = state[12]^input[12]; \
651                    } \
652                    X##ko = state[13]; \
653                    X##ku = state[14]; \
654                } \
655                else { \
656                    X##ki = state[12]^input[12]; \
657                    X##ko = state[13]^input[13]; \
658                    if (laneCount < 15) { \
659                        X##ku = state[14]; \
660                    } \
661                    else { \
662                        X##ku = state[14]^input[14]; \
663                    } \
664                } \
665            } \
666        } \
667        X##ma = state[15]; \
668        X##me = state[16]; \
669        X##mi = state[17]; \
670        X##mo = state[18]; \
671        X##mu = state[19]; \
672        X##sa = state[20]; \
673        X##se = state[21]; \
674        X##si = state[22]; \
675        X##so = state[23]; \
676        X##su = state[24]; \
677    } \
678    else { \
679        X##ba = state[ 0]^input[ 0]; \
680        X##be = state[ 1]^input[ 1]; \
681        X##bi = state[ 2]^input[ 2]; \
682        X##bo = state[ 3]^input[ 3]; \
683        X##bu = state[ 4]^input[ 4]; \
684        X##ga = state[ 5]^input[ 5]; \
685        X##ge = state[ 6]^input[ 6]; \
686        X##gi = state[ 7]^input[ 7]; \
687        X##go = state[ 8]^input[ 8]; \
688        X##gu = state[ 9]^input[ 9]; \
689        X##ka = state[10]^input[10]; \
690        X##ke = state[11]^input[11]; \
691        X##ki = state[12]^input[12]; \
692        X##ko = state[13]^input[13]; \
693        X##ku = state[14]^input[14]; \
694        X##ma = state[15]^input[15]; \
695        if (laneCount < 24) { \
696            if (laneCount < 20) { \
697                if (laneCount < 18) { \
698                    if (laneCount < 17) { \
699                        X##me = state[16]; \
700                    } \
701                    else { \
702                        X##me = state[16]^input[16]; \
703                    } \
704                    X##mi = state[17]; \
705                    X##mo = state[18]; \
706                } \
707                else { \
708                    X##me = state[16]^input[16]; \
709                    X##mi = state[17]^input[17]; \
710                    if (laneCount < 19) { \
711                        X##mo = state[18]; \
712                    } \
713                    else { \
714                        X##mo = state[18]^input[18]; \
715                    } \
716                } \
717                X##mu = state[19]; \
718                X##sa = state[20]; \
719                X##se = state[21]; \
720                X##si = state[22]; \
721            } \
722            else { \
723                X##me = state[16]^input[16]; \
724                X##mi = state[17]^input[17]; \
725                X##mo = state[18]^input[18]; \
726                X##mu = state[19]^input[19]; \
727                if (laneCount < 22) { \
728                    if (laneCount < 21) { \
729                        X##sa = state[20]; \
730                    } \
731                    else { \
732                        X##sa = state[20]^input[20]; \
733                    } \
734                    X##se = state[21]; \
735                    X##si = state[22]; \
736                } \
737                else { \
738                    X##sa = state[20]^input[20]; \
739                    X##se = state[21]^input[21]; \
740                    if (laneCount < 23) { \
741                        X##si = state[22]; \
742                    } \
743                    else { \
744                        X##si = state[22]^input[22]; \
745                    } \
746                } \
747            } \
748            X##so = state[23]; \
749            X##su = state[24]; \
750        } \
751        else { \
752            X##me = state[16]^input[16]; \
753            X##mi = state[17]^input[17]; \
754            X##mo = state[18]^input[18]; \
755            X##mu = state[19]^input[19]; \
756            X##sa = state[20]^input[20]; \
757            X##se = state[21]^input[21]; \
758            X##si = state[22]^input[22]; \
759            X##so = state[23]^input[23]; \
760            if (laneCount < 25) { \
761                X##su = state[24]; \
762            } \
763            else { \
764                X##su = state[24]^input[24]; \
765            } \
766        } \
767    }
768
769#define addInput(X, input, laneCount) \
770    if (laneCount == 21) { \
771        X##ba ^= input[ 0]; \
772        X##be ^= input[ 1]; \
773        X##bi ^= input[ 2]; \
774        X##bo ^= input[ 3]; \
775        X##bu ^= input[ 4]; \
776        X##ga ^= input[ 5]; \
777        X##ge ^= input[ 6]; \
778        X##gi ^= input[ 7]; \
779        X##go ^= input[ 8]; \
780        X##gu ^= input[ 9]; \
781        X##ka ^= input[10]; \
782        X##ke ^= input[11]; \
783        X##ki ^= input[12]; \
784        X##ko ^= input[13]; \
785        X##ku ^= input[14]; \
786        X##ma ^= input[15]; \
787        X##me ^= input[16]; \
788        X##mi ^= input[17]; \
789        X##mo ^= input[18]; \
790        X##mu ^= input[19]; \
791        X##sa ^= input[20]; \
792    } \
793    else if (laneCount < 16) { \
794        if (laneCount < 8) { \
795            if (laneCount < 4) { \
796                if (laneCount < 2) { \
797                    if (laneCount < 1) { \
798                    } \
799                    else { \
800                        X##ba ^= input[ 0]; \
801                    } \
802                } \
803                else { \
804                    X##ba ^= input[ 0]; \
805                    X##be ^= input[ 1]; \
806                    if (laneCount < 3) { \
807                    } \
808                    else { \
809                        X##bi ^= input[ 2]; \
810                    } \
811                } \
812            } \
813            else { \
814                X##ba ^= input[ 0]; \
815                X##be ^= input[ 1]; \
816                X##bi ^= input[ 2]; \
817                X##bo ^= input[ 3]; \
818                if (laneCount < 6) { \
819                    if (laneCount < 5) { \
820                    } \
821                    else { \
822                        X##bu ^= input[ 4]; \
823                    } \
824                } \
825                else { \
826                    X##bu ^= input[ 4]; \
827                    X##ga ^= input[ 5]; \
828                    if (laneCount < 7) { \
829                    } \
830                    else { \
831                        X##ge ^= input[ 6]; \
832                    } \
833                } \
834            } \
835        } \
836        else { \
837            X##ba ^= input[ 0]; \
838            X##be ^= input[ 1]; \
839            X##bi ^= input[ 2]; \
840            X##bo ^= input[ 3]; \
841            X##bu ^= input[ 4]; \
842            X##ga ^= input[ 5]; \
843            X##ge ^= input[ 6]; \
844            X##gi ^= input[ 7]; \
845            if (laneCount < 12) { \
846                if (laneCount < 10) { \
847                    if (laneCount < 9) { \
848                    } \
849                    else { \
850                        X##go ^= input[ 8]; \
851                    } \
852                } \
853                else { \
854                    X##go ^= input[ 8]; \
855                    X##gu ^= input[ 9]; \
856                    if (laneCount < 11) { \
857                    } \
858                    else { \
859                        X##ka ^= input[10]; \
860                    } \
861                } \
862            } \
863            else { \
864                X##go ^= input[ 8]; \
865                X##gu ^= input[ 9]; \
866                X##ka ^= input[10]; \
867                X##ke ^= input[11]; \
868                if (laneCount < 14) { \
869                    if (laneCount < 13) { \
870                    } \
871                    else { \
872                        X##ki ^= input[12]; \
873                    } \
874                } \
875                else { \
876                    X##ki ^= input[12]; \
877                    X##ko ^= input[13]; \
878                    if (laneCount < 15) { \
879                    } \
880                    else { \
881                        X##ku ^= input[14]; \
882                    } \
883                } \
884            } \
885        } \
886    } \
887    else { \
888        X##ba ^= input[ 0]; \
889        X##be ^= input[ 1]; \
890        X##bi ^= input[ 2]; \
891        X##bo ^= input[ 3]; \
892        X##bu ^= input[ 4]; \
893        X##ga ^= input[ 5]; \
894        X##ge ^= input[ 6]; \
895        X##gi ^= input[ 7]; \
896        X##go ^= input[ 8]; \
897        X##gu ^= input[ 9]; \
898        X##ka ^= input[10]; \
899        X##ke ^= input[11]; \
900        X##ki ^= input[12]; \
901        X##ko ^= input[13]; \
902        X##ku ^= input[14]; \
903        X##ma ^= input[15]; \
904        if (laneCount < 24) { \
905            if (laneCount < 20) { \
906                if (laneCount < 18) { \
907                    if (laneCount < 17) { \
908                    } \
909                    else { \
910                        X##me ^= input[16]; \
911                    } \
912                } \
913                else { \
914                    X##me ^= input[16]; \
915                    X##mi ^= input[17]; \
916                    if (laneCount < 19) { \
917                    } \
918                    else { \
919                        X##mo ^= input[18]; \
920                    } \
921                } \
922            } \
923            else { \
924                X##me ^= input[16]; \
925                X##mi ^= input[17]; \
926                X##mo ^= input[18]; \
927                X##mu ^= input[19]; \
928                if (laneCount < 22) { \
929                    if (laneCount < 21) { \
930                    } \
931                    else { \
932                        X##sa ^= input[20]; \
933                    } \
934                } \
935                else { \
936                    X##sa ^= input[20]; \
937                    X##se ^= input[21]; \
938                    if (laneCount < 23) { \
939                    } \
940                    else { \
941                        X##si ^= input[22]; \
942                    } \
943                } \
944            } \
945        } \
946        else { \
947            X##me ^= input[16]; \
948            X##mi ^= input[17]; \
949            X##mo ^= input[18]; \
950            X##mu ^= input[19]; \
951            X##sa ^= input[20]; \
952            X##se ^= input[21]; \
953            X##si ^= input[22]; \
954            X##so ^= input[23]; \
955            if (laneCount < 25) { \
956            } \
957            else { \
958                X##su ^= input[24]; \
959            } \
960        } \
961    }
962
963#ifdef UseBebigokimisa
964
965#define copyToStateAndOutput(X, state, output, laneCount) \
966    if (laneCount < 16) { \
967        if (laneCount < 8) { \
968            if (laneCount < 4) { \
969                if (laneCount < 2) { \
970                    state[ 0] = X##ba; \
971                    if (laneCount >= 1) { \
972                        output[ 0] = X##ba; \
973                    } \
974                    state[ 1] = X##be; \
975                    state[ 2] = X##bi; \
976                } \
977                else { \
978                    state[ 0] = X##ba; \
979                    output[ 0] = X##ba; \
980                    state[ 1] = X##be; \
981                    output[ 1] = ~X##be; \
982                    state[ 2] = X##bi; \
983                    if (laneCount >= 3) { \
984                        output[ 2] = ~X##bi; \
985                    } \
986                } \
987                state[ 3] = X##bo; \
988                state[ 4] = X##bu; \
989                state[ 5] = X##ga; \
990                state[ 6] = X##ge; \
991            } \
992            else { \
993                state[ 0] = X##ba; \
994                output[ 0] = X##ba; \
995                state[ 1] = X##be; \
996                output[ 1] = ~X##be; \
997                state[ 2] = X##bi; \
998                output[ 2] = ~X##bi; \
999                state[ 3] = X##bo; \
1000                output[ 3] = X##bo; \
1001                if (laneCount < 6) { \
1002                    state[ 4] = X##bu; \
1003                    if (laneCount >= 5) { \
1004                        output[ 4] = X##bu; \
1005                    } \
1006                    state[ 5] = X##ga; \
1007                    state[ 6] = X##ge; \
1008                } \
1009                else { \
1010                    state[ 4] = X##bu; \
1011                    output[ 4] = X##bu; \
1012                    state[ 5] = X##ga; \
1013                    output[ 5] = X##ga; \
1014                    state[ 6] = X##ge; \
1015                    if (laneCount >= 7) { \
1016                        output[ 6] = X##ge; \
1017                    } \
1018                } \
1019            } \
1020            state[ 7] = X##gi; \
1021            state[ 8] = X##go; \
1022            state[ 9] = X##gu; \
1023            state[10] = X##ka; \
1024            state[11] = X##ke; \
1025            state[12] = X##ki; \
1026            state[13] = X##ko; \
1027            state[14] = X##ku; \
1028        } \
1029        else { \
1030            state[ 0] = X##ba; \
1031            output[ 0] = X##ba; \
1032            state[ 1] = X##be; \
1033            output[ 1] = ~X##be; \
1034            state[ 2] = X##bi; \
1035            output[ 2] = ~X##bi; \
1036            state[ 3] = X##bo; \
1037            output[ 3] = X##bo; \
1038            state[ 4] = X##bu; \
1039            output[ 4] = X##bu; \
1040            state[ 5] = X##ga; \
1041            output[ 5] = X##ga; \
1042            state[ 6] = X##ge; \
1043            output[ 6] = X##ge; \
1044            state[ 7] = X##gi; \
1045            output[ 7] = X##gi; \
1046            if (laneCount < 12) { \
1047                if (laneCount < 10) { \
1048                    state[ 8] = X##go; \
1049                    if (laneCount >= 9) { \
1050                        output[ 8] = ~X##go; \
1051                    } \
1052                    state[ 9] = X##gu; \
1053                    state[10] = X##ka; \
1054                } \
1055                else { \
1056                    state[ 8] = X##go; \
1057                    output[ 8] = ~X##go; \
1058                    state[ 9] = X##gu; \
1059                    output[ 9] = X##gu; \
1060                    state[10] = X##ka; \
1061                    if (laneCount >= 11) { \
1062                        output[10] = X##ka; \
1063                    } \
1064                } \
1065                state[11] = X##ke; \
1066                state[12] = X##ki; \
1067                state[13] = X##ko; \
1068                state[14] = X##ku; \
1069            } \
1070            else { \
1071                state[ 8] = X##go; \
1072                output[ 8] = ~X##go; \
1073                state[ 9] = X##gu; \
1074                output[ 9] = X##gu; \
1075                state[10] = X##ka; \
1076                output[10] = X##ka; \
1077                state[11] = X##ke; \
1078                output[11] = X##ke; \
1079                if (laneCount < 14) { \
1080                    state[12] = X##ki; \
1081                    if (laneCount >= 13) { \
1082                        output[12] = ~X##ki; \
1083                    } \
1084                    state[13] = X##ko; \
1085                    state[14] = X##ku; \
1086                } \
1087                else { \
1088                    state[12] = X##ki; \
1089                    output[12] = ~X##ki; \
1090                    state[13] = X##ko; \
1091                    output[13] = X##ko; \
1092                    state[14] = X##ku; \
1093                    if (laneCount >= 15) { \
1094                        output[14] = X##ku; \
1095                    } \
1096                } \
1097            } \
1098        } \
1099        state[15] = X##ma; \
1100        state[16] = X##me; \
1101        state[17] = X##mi; \
1102        state[18] = X##mo; \
1103        state[19] = X##mu; \
1104        state[20] = X##sa; \
1105        state[21] = X##se; \
1106        state[22] = X##si; \
1107        state[23] = X##so; \
1108        state[24] = X##su; \
1109    } \
1110    else { \
1111        state[ 0] = X##ba; \
1112        output[ 0] = X##ba; \
1113        state[ 1] = X##be; \
1114        output[ 1] = ~X##be; \
1115        state[ 2] = X##bi; \
1116        output[ 2] = ~X##bi; \
1117        state[ 3] = X##bo; \
1118        output[ 3] = X##bo; \
1119        state[ 4] = X##bu; \
1120        output[ 4] = X##bu; \
1121        state[ 5] = X##ga; \
1122        output[ 5] = X##ga; \
1123        state[ 6] = X##ge; \
1124        output[ 6] = X##ge; \
1125        state[ 7] = X##gi; \
1126        output[ 7] = X##gi; \
1127        state[ 8] = X##go; \
1128        output[ 8] = ~X##go; \
1129        state[ 9] = X##gu; \
1130        output[ 9] = X##gu; \
1131        state[10] = X##ka; \
1132        output[10] = X##ka; \
1133        state[11] = X##ke; \
1134        output[11] = X##ke; \
1135        state[12] = X##ki; \
1136        output[12] = ~X##ki; \
1137        state[13] = X##ko; \
1138        output[13] = X##ko; \
1139        state[14] = X##ku; \
1140        output[14] = X##ku; \
1141        state[15] = X##ma; \
1142        output[15] = X##ma; \
1143        if (laneCount < 24) { \
1144            if (laneCount < 20) { \
1145                if (laneCount < 18) { \
1146                    state[16] = X##me; \
1147                    if (laneCount >= 17) { \
1148                        output[16] = X##me; \
1149                    } \
1150                    state[17] = X##mi; \
1151                    state[18] = X##mo; \
1152                } \
1153                else { \
1154                    state[16] = X##me; \
1155                    output[16] = X##me; \
1156                    state[17] = X##mi; \
1157                    output[17] = ~X##mi; \
1158                    state[18] = X##mo; \
1159                    if (laneCount >= 19) { \
1160                        output[18] = X##mo; \
1161                    } \
1162                } \
1163                state[19] = X##mu; \
1164                state[20] = X##sa; \
1165                state[21] = X##se; \
1166                state[22] = X##si; \
1167            } \
1168            else { \
1169                state[16] = X##me; \
1170                output[16] = X##me; \
1171                state[17] = X##mi; \
1172                output[17] = ~X##mi; \
1173                state[18] = X##mo; \
1174                output[18] = X##mo; \
1175                state[19] = X##mu; \
1176                output[19] = X##mu; \
1177                if (laneCount < 22) { \
1178                    state[20] = X##sa; \
1179                    if (laneCount >= 21) { \
1180                        output[20] = ~X##sa; \
1181                    } \
1182                    state[21] = X##se; \
1183                    state[22] = X##si; \
1184                } \
1185                else { \
1186                    state[20] = X##sa; \
1187                    output[20] = ~X##sa; \
1188                    state[21] = X##se; \
1189                    output[21] = X##se; \
1190                    state[22] = X##si; \
1191                    if (laneCount >= 23) { \
1192                        output[22] = X##si; \
1193                    } \
1194                } \
1195            } \
1196            state[23] = X##so; \
1197            state[24] = X##su; \
1198        } \
1199        else { \
1200            state[16] = X##me; \
1201            output[16] = X##me; \
1202            state[17] = X##mi; \
1203            output[17] = ~X##mi; \
1204            state[18] = X##mo; \
1205            output[18] = X##mo; \
1206            state[19] = X##mu; \
1207            output[19] = X##mu; \
1208            state[20] = X##sa; \
1209            output[20] = ~X##sa; \
1210            state[21] = X##se; \
1211            output[21] = X##se; \
1212            state[22] = X##si; \
1213            output[22] = X##si; \
1214            state[23] = X##so; \
1215            output[23] = X##so; \
1216            state[24] = X##su; \
1217            if (laneCount >= 25) { \
1218                output[24] = X##su; \
1219            } \
1220        } \
1221    }
1222
1223#define output(X, output, laneCount) \
1224    if (laneCount < 16) { \
1225        if (laneCount < 8) { \
1226            if (laneCount < 4) { \
1227                if (laneCount < 2) { \
1228                    if (laneCount >= 1) { \
1229                        output[ 0] = X##ba; \
1230                    } \
1231                } \
1232                else { \
1233                    output[ 0] = X##ba; \
1234                    output[ 1] = ~X##be; \
1235                    if (laneCount >= 3) { \
1236                        output[ 2] = ~X##bi; \
1237                    } \
1238                } \
1239            } \
1240            else { \
1241                output[ 0] = X##ba; \
1242                output[ 1] = ~X##be; \
1243                output[ 2] = ~X##bi; \
1244                output[ 3] = X##bo; \
1245                if (laneCount < 6) { \
1246                    if (laneCount >= 5) { \
1247                        output[ 4] = X##bu; \
1248                    } \
1249                } \
1250                else { \
1251                    output[ 4] = X##bu; \
1252                    output[ 5] = X##ga; \
1253                    if (laneCount >= 7) { \
1254                        output[ 6] = X##ge; \
1255                    } \
1256                } \
1257            } \
1258        } \
1259        else { \
1260            output[ 0] = X##ba; \
1261            output[ 1] = ~X##be; \
1262            output[ 2] = ~X##bi; \
1263            output[ 3] = X##bo; \
1264            output[ 4] = X##bu; \
1265            output[ 5] = X##ga; \
1266            output[ 6] = X##ge; \
1267            output[ 7] = X##gi; \
1268            if (laneCount < 12) { \
1269                if (laneCount < 10) { \
1270                    if (laneCount >= 9) { \
1271                        output[ 8] = ~X##go; \
1272                    } \
1273                } \
1274                else { \
1275                    output[ 8] = ~X##go; \
1276                    output[ 9] = X##gu; \
1277                    if (laneCount >= 11) { \
1278                        output[10] = X##ka; \
1279                    } \
1280                } \
1281            } \
1282            else { \
1283                output[ 8] = ~X##go; \
1284                output[ 9] = X##gu; \
1285                output[10] = X##ka; \
1286                output[11] = X##ke; \
1287                if (laneCount < 14) { \
1288                    if (laneCount >= 13) { \
1289                        output[12] = ~X##ki; \
1290                    } \
1291                } \
1292                else { \
1293                    output[12] = ~X##ki; \
1294                    output[13] = X##ko; \
1295                    if (laneCount >= 15) { \
1296                        output[14] = X##ku; \
1297                    } \
1298                } \
1299            } \
1300        } \
1301    } \
1302    else { \
1303        output[ 0] = X##ba; \
1304        output[ 1] = ~X##be; \
1305        output[ 2] = ~X##bi; \
1306        output[ 3] = X##bo; \
1307        output[ 4] = X##bu; \
1308        output[ 5] = X##ga; \
1309        output[ 6] = X##ge; \
1310        output[ 7] = X##gi; \
1311        output[ 8] = ~X##go; \
1312        output[ 9] = X##gu; \
1313        output[10] = X##ka; \
1314        output[11] = X##ke; \
1315        output[12] = ~X##ki; \
1316        output[13] = X##ko; \
1317        output[14] = X##ku; \
1318        output[15] = X##ma; \
1319        if (laneCount < 24) { \
1320            if (laneCount < 20) { \
1321                if (laneCount < 18) { \
1322                    if (laneCount >= 17) { \
1323                        output[16] = X##me; \
1324                    } \
1325                } \
1326                else { \
1327                    output[16] = X##me; \
1328                    output[17] = ~X##mi; \
1329                    if (laneCount >= 19) { \
1330                        output[18] = X##mo; \
1331                    } \
1332                } \
1333            } \
1334            else { \
1335                output[16] = X##me; \
1336                output[17] = ~X##mi; \
1337                output[18] = X##mo; \
1338                output[19] = X##mu; \
1339                if (laneCount < 22) { \
1340                    if (laneCount >= 21) { \
1341                        output[20] = ~X##sa; \
1342                    } \
1343                } \
1344                else { \
1345                    output[20] = ~X##sa; \
1346                    output[21] = X##se; \
1347                    if (laneCount >= 23) { \
1348                        output[22] = X##si; \
1349                    } \
1350                } \
1351            } \
1352        } \
1353        else { \
1354            output[16] = X##me; \
1355            output[17] = ~X##mi; \
1356            output[18] = X##mo; \
1357            output[19] = X##mu; \
1358            output[20] = ~X##sa; \
1359            output[21] = X##se; \
1360            output[22] = X##si; \
1361            output[23] = X##so; \
1362            if (laneCount >= 25) { \
1363                output[24] = X##su; \
1364            } \
1365        } \
1366    }
1367
1368#define wrapOne(X, input, output, index, name) \
1369    X##name ^= input[index]; \
1370    output[index] = X##name;
1371
1372#define wrapOneInvert(X, input, output, index, name) \
1373    X##name ^= input[index]; \
1374    output[index] = ~X##name;
1375
1376#define unwrapOne(X, input, output, index, name) \
1377    output[index] = input[index] ^ X##name; \
1378    X##name ^= output[index];
1379
1380#define unwrapOneInvert(X, input, output, index, name) \
1381    output[index] = ~(input[index] ^ X##name); \
1382    X##name ^= output[index]; \
1383
1384#else /* UseBebigokimisa */
1385
1386#define copyToStateAndOutput(X, state, output, laneCount) \
1387    if (laneCount < 16) { \
1388        if (laneCount < 8) { \
1389            if (laneCount < 4) { \
1390                if (laneCount < 2) { \
1391                    state[ 0] = X##ba; \
1392                    if (laneCount >= 1) { \
1393                        output[ 0] = X##ba; \
1394                    } \
1395                    state[ 1] = X##be; \
1396                    state[ 2] = X##bi; \
1397                } \
1398                else { \
1399                    state[ 0] = X##ba; \
1400                    output[ 0] = X##ba; \
1401                    state[ 1] = X##be; \
1402                    output[ 1] = X##be; \
1403                    state[ 2] = X##bi; \
1404                    if (laneCount >= 3) { \
1405                        output[ 2] = X##bi; \
1406                    } \
1407                } \
1408                state[ 3] = X##bo; \
1409                state[ 4] = X##bu; \
1410                state[ 5] = X##ga; \
1411                state[ 6] = X##ge; \
1412            } \
1413            else { \
1414                state[ 0] = X##ba; \
1415                output[ 0] = X##ba; \
1416                state[ 1] = X##be; \
1417                output[ 1] = X##be; \
1418                state[ 2] = X##bi; \
1419                output[ 2] = X##bi; \
1420                state[ 3] = X##bo; \
1421                output[ 3] = X##bo; \
1422                if (laneCount < 6) { \
1423                    state[ 4] = X##bu; \
1424                    if (laneCount >= 5) { \
1425                        output[ 4] = X##bu; \
1426                    } \
1427                    state[ 5] = X##ga; \
1428                    state[ 6] = X##ge; \
1429                } \
1430                else { \
1431                    state[ 4] = X##bu; \
1432                    output[ 4] = X##bu; \
1433                    state[ 5] = X##ga; \
1434                    output[ 5] = X##ga; \
1435                    state[ 6] = X##ge; \
1436                    if (laneCount >= 7) { \
1437                        output[ 6] = X##ge; \
1438                    } \
1439                } \
1440            } \
1441            state[ 7] = X##gi; \
1442            state[ 8] = X##go; \
1443            state[ 9] = X##gu; \
1444            state[10] = X##ka; \
1445            state[11] = X##ke; \
1446            state[12] = X##ki; \
1447            state[13] = X##ko; \
1448            state[14] = X##ku; \
1449        } \
1450        else { \
1451            state[ 0] = X##ba; \
1452            output[ 0] = X##ba; \
1453            state[ 1] = X##be; \
1454            output[ 1] = X##be; \
1455            state[ 2] = X##bi; \
1456            output[ 2] = X##bi; \
1457            state[ 3] = X##bo; \
1458            output[ 3] = X##bo; \
1459            state[ 4] = X##bu; \
1460            output[ 4] = X##bu; \
1461            state[ 5] = X##ga; \
1462            output[ 5] = X##ga; \
1463            state[ 6] = X##ge; \
1464            output[ 6] = X##ge; \
1465            state[ 7] = X##gi; \
1466            output[ 7] = X##gi; \
1467            if (laneCount < 12) { \
1468                if (laneCount < 10) { \
1469                    state[ 8] = X##go; \
1470                    if (laneCount >= 9) { \
1471                        output[ 8] = X##go; \
1472                    } \
1473                    state[ 9] = X##gu; \
1474                    state[10] = X##ka; \
1475                } \
1476                else { \
1477                    state[ 8] = X##go; \
1478                    output[ 8] = X##go; \
1479                    state[ 9] = X##gu; \
1480                    output[ 9] = X##gu; \
1481                    state[10] = X##ka; \
1482                    if (laneCount >= 11) { \
1483                        output[10] = X##ka; \
1484                    } \
1485                } \
1486                state[11] = X##ke; \
1487                state[12] = X##ki; \
1488                state[13] = X##ko; \
1489                state[14] = X##ku; \
1490            } \
1491            else { \
1492                state[ 8] = X##go; \
1493                output[ 8] = X##go; \
1494                state[ 9] = X##gu; \
1495                output[ 9] = X##gu; \
1496                state[10] = X##ka; \
1497                output[10] = X##ka; \
1498                state[11] = X##ke; \
1499                output[11] = X##ke; \
1500                if (laneCount < 14) { \
1501                    state[12] = X##ki; \
1502                    if (laneCount >= 13) { \
1503                        output[12]= X##ki; \
1504                    } \
1505                    state[13] = X##ko; \
1506                    state[14] = X##ku; \
1507                } \
1508                else { \
1509                    state[12] = X##ki; \
1510                    output[12]= X##ki; \
1511                    state[13] = X##ko; \
1512                    output[13] = X##ko; \
1513                    state[14] = X##ku; \
1514                    if (laneCount >= 15) { \
1515                        output[14] = X##ku; \
1516                    } \
1517                } \
1518            } \
1519        } \
1520        state[15] = X##ma; \
1521        state[16] = X##me; \
1522        state[17] = X##mi; \
1523        state[18] = X##mo; \
1524        state[19] = X##mu; \
1525        state[20] = X##sa; \
1526        state[21] = X##se; \
1527        state[22] = X##si; \
1528        state[23] = X##so; \
1529        state[24] = X##su; \
1530    } \
1531    else { \
1532        state[ 0] = X##ba; \
1533        output[ 0] = X##ba; \
1534        state[ 1] = X##be; \
1535        output[ 1] = X##be; \
1536        state[ 2] = X##bi; \
1537        output[ 2] = X##bi; \
1538        state[ 3] = X##bo; \
1539        output[ 3] = X##bo; \
1540        state[ 4] = X##bu; \
1541        output[ 4] = X##bu; \
1542        state[ 5] = X##ga; \
1543        output[ 5] = X##ga; \
1544        state[ 6] = X##ge; \
1545        output[ 6] = X##ge; \
1546        state[ 7] = X##gi; \
1547        output[ 7] = X##gi; \
1548        state[ 8] = X##go; \
1549        output[ 8] = X##go; \
1550        state[ 9] = X##gu; \
1551        output[ 9] = X##gu; \
1552        state[10] = X##ka; \
1553        output[10] = X##ka; \
1554        state[11] = X##ke; \
1555        output[11] = X##ke; \
1556        state[12] = X##ki; \
1557        output[12]= X##ki; \
1558        state[13] = X##ko; \
1559        output[13] = X##ko; \
1560        state[14] = X##ku; \
1561        output[14] = X##ku; \
1562        state[15] = X##ma; \
1563        output[15] = X##ma; \
1564        if (laneCount < 24) { \
1565            if (laneCount < 20) { \
1566                if (laneCount < 18) { \
1567                    state[16] = X##me; \
1568                    if (laneCount >= 17) { \
1569                        output[16] = X##me; \
1570                    } \
1571                    state[17] = X##mi; \
1572                    state[18] = X##mo; \
1573                } \
1574                else { \
1575                    state[16] = X##me; \
1576                    output[16] = X##me; \
1577                    state[17] = X##mi; \
1578                    output[17] = X##mi; \
1579                    state[18] = X##mo; \
1580                    if (laneCount >= 19) { \
1581                        output[18] = X##mo; \
1582                    } \
1583                } \
1584                state[19] = X##mu; \
1585                state[20] = X##sa; \
1586                state[21] = X##se; \
1587                state[22] = X##si; \
1588            } \
1589            else { \
1590                state[16] = X##me; \
1591                output[16] = X##me; \
1592                state[17] = X##mi; \
1593                output[17] = X##mi; \
1594                state[18] = X##mo; \
1595                output[18] = X##mo; \
1596                state[19] = X##mu; \
1597                output[19] = X##mu; \
1598                if (laneCount < 22) { \
1599                    state[20] = X##sa; \
1600                    if (laneCount >= 21) { \
1601                        output[20] = X##sa; \
1602                    } \
1603                    state[21] = X##se; \
1604                    state[22] = X##si; \
1605                } \
1606                else { \
1607                    state[20] = X##sa; \
1608                    output[20] = X##sa; \
1609                    state[21] = X##se; \
1610                    output[21] = X##se; \
1611                    state[22] = X##si; \
1612                    if (laneCount >= 23) { \
1613                        output[22] = X##si; \
1614                    } \
1615                } \
1616            } \
1617            state[23] = X##so; \
1618            state[24] = X##su; \
1619        } \
1620        else { \
1621            state[16] = X##me; \
1622            output[16] = X##me; \
1623            state[17] = X##mi; \
1624            output[17] = X##mi; \
1625            state[18] = X##mo; \
1626            output[18] = X##mo; \
1627            state[19] = X##mu; \
1628            output[19] = X##mu; \
1629            state[20] = X##sa; \
1630            output[20] = X##sa; \
1631            state[21] = X##se; \
1632            output[21] = X##se; \
1633            state[22] = X##si; \
1634            output[22] = X##si; \
1635            state[23] = X##so; \
1636            output[23] = X##so; \
1637            state[24] = X##su; \
1638            if (laneCount >= 25) { \
1639                output[24] = X##su; \
1640            } \
1641        } \
1642    }
1643
1644#define output(X, output, laneCount) \
1645    if (laneCount < 16) { \
1646        if (laneCount < 8) { \
1647            if (laneCount < 4) { \
1648                if (laneCount < 2) { \
1649                    if (laneCount >= 1) { \
1650                        output[ 0] = X##ba; \
1651                    } \
1652                } \
1653                else { \
1654                    output[ 0] = X##ba; \
1655                    output[ 1] = X##be; \
1656                    if (laneCount >= 3) { \
1657                        output[ 2] = X##bi; \
1658                    } \
1659                } \
1660            } \
1661            else { \
1662                output[ 0] = X##ba; \
1663                output[ 1] = X##be; \
1664                output[ 2] = X##bi; \
1665                output[ 3] = X##bo; \
1666                if (laneCount < 6) { \
1667                    if (laneCount >= 5) { \
1668                        output[ 4] = X##bu; \
1669                    } \
1670                } \
1671                else { \
1672                    output[ 4] = X##bu; \
1673                    output[ 5] = X##ga; \
1674                    if (laneCount >= 7) { \
1675                        output[ 6] = X##ge; \
1676                    } \
1677                } \
1678            } \
1679        } \
1680        else { \
1681            output[ 0] = X##ba; \
1682            output[ 1] = X##be; \
1683            output[ 2] = X##bi; \
1684            output[ 3] = X##bo; \
1685            output[ 4] = X##bu; \
1686            output[ 5] = X##ga; \
1687            output[ 6] = X##ge; \
1688            output[ 7] = X##gi; \
1689            if (laneCount < 12) { \
1690                if (laneCount < 10) { \
1691                    if (laneCount >= 9) { \
1692                        output[ 8] = X##go; \
1693                    } \
1694                } \
1695                else { \
1696                    output[ 8] = X##go; \
1697                    output[ 9] = X##gu; \
1698                    if (laneCount >= 11) { \
1699                        output[10] = X##ka; \
1700                    } \
1701                } \
1702            } \
1703            else { \
1704                output[ 8] = X##go; \
1705                output[ 9] = X##gu; \
1706                output[10] = X##ka; \
1707                output[11] = X##ke; \
1708                if (laneCount < 14) { \
1709                    if (laneCount >= 13) { \
1710                        output[12] = X##ki; \
1711                    } \
1712                } \
1713                else { \
1714                    output[12] = X##ki; \
1715                    output[13] = X##ko; \
1716                    if (laneCount >= 15) { \
1717                        output[14] = X##ku; \
1718                    } \
1719                } \
1720            } \
1721        } \
1722    } \
1723    else { \
1724        output[ 0] = X##ba; \
1725        output[ 1] = X##be; \
1726        output[ 2] = X##bi; \
1727        output[ 3] = X##bo; \
1728        output[ 4] = X##bu; \
1729        output[ 5] = X##ga; \
1730        output[ 6] = X##ge; \
1731        output[ 7] = X##gi; \
1732        output[ 8] = X##go; \
1733        output[ 9] = X##gu; \
1734        output[10] = X##ka; \
1735        output[11] = X##ke; \
1736        output[12] = X##ki; \
1737        output[13] = X##ko; \
1738        output[14] = X##ku; \
1739        output[15] = X##ma; \
1740        if (laneCount < 24) { \
1741            if (laneCount < 20) { \
1742                if (laneCount < 18) { \
1743                    if (laneCount >= 17) { \
1744                        output[16] = X##me; \
1745                    } \
1746                } \
1747                else { \
1748                    output[16] = X##me; \
1749                    output[17] = X##mi; \
1750                    if (laneCount >= 19) { \
1751                        output[18] = X##mo; \
1752                    } \
1753                } \
1754            } \
1755            else { \
1756                output[16] = X##me; \
1757                output[17] = X##mi; \
1758                output[18] = X##mo; \
1759                output[19] = X##mu; \
1760                if (laneCount < 22) { \
1761                    if (laneCount >= 21) { \
1762                        output[20] = X##sa; \
1763                    } \
1764                } \
1765                else { \
1766                    output[20] = X##sa; \
1767                    output[21] = X##se; \
1768                    if (laneCount >= 23) { \
1769                        output[22] = X##si; \
1770                    } \
1771                } \
1772            } \
1773        } \
1774        else { \
1775            output[16] = X##me; \
1776            output[17] = X##mi; \
1777            output[18] = X##mo; \
1778            output[19] = X##mu; \
1779            output[20] = X##sa; \
1780            output[21] = X##se; \
1781            output[22] = X##si; \
1782            output[23] = X##so; \
1783            if (laneCount >= 25) { \
1784                output[24] = X##su; \
1785            } \
1786        } \
1787    }
1788
1789#define wrapOne(X, input, output, index, name) \
1790    X##name ^= input[index]; \
1791    output[index] = X##name;
1792
1793#define wrapOneInvert(X, input, output, index, name) \
1794    X##name ^= input[index]; \
1795    output[index] = X##name;
1796
1797#define unwrapOne(X, input, output, index, name) \
1798    output[index] = input[index] ^ X##name; \
1799    X##name ^= output[index];
1800
1801#define unwrapOneInvert(X, input, output, index, name) \
1802    output[index] = input[index] ^ X##name; \
1803    X##name ^= output[index];
1804
1805#endif
1806
1807#define wrap(X, input, output, laneCount, trailingBits) \
1808    if (laneCount < 16) { \
1809        if (laneCount < 8) { \
1810            if (laneCount < 4) { \
1811                if (laneCount < 2) { \
1812                    if (laneCount < 1) { \
1813                        X##ba ^= trailingBits; \
1814                    } \
1815                    else { \
1816                        wrapOne(X, input, output, 0, ba) \
1817                        X##be ^= trailingBits; \
1818                    } \
1819                } \
1820                else { \
1821                    wrapOne(X, input, output, 0, ba) \
1822                    wrapOneInvert(X, input, output, 1, be) \
1823                    if (laneCount < 3) { \
1824                        X##bi ^= trailingBits; \
1825                    } \
1826                    else { \
1827                        wrapOneInvert(X, input, output, 2, bi) \
1828                        X##bo ^= trailingBits; \
1829                    } \
1830                } \
1831            } \
1832            else { \
1833                wrapOne(X, input, output, 0, ba) \
1834                wrapOneInvert(X, input, output, 1, be) \
1835                wrapOneInvert(X, input, output, 2, bi) \
1836                wrapOne(X, input, output, 3, bo) \
1837                if (laneCount < 6) { \
1838                    if (laneCount < 5) { \
1839                        X##bu ^= trailingBits; \
1840                    } \
1841                    else { \
1842                        wrapOne(X, input, output, 4, bu) \
1843                        X##ga ^= trailingBits; \
1844                    } \
1845                } \
1846                else { \
1847                    wrapOne(X, input, output, 4, bu) \
1848                    wrapOne(X, input, output, 5, ga) \
1849                    if (laneCount < 7) { \
1850                        X##ge ^= trailingBits; \
1851                    } \
1852                    else { \
1853                        wrapOne(X, input, output, 6, ge) \
1854                        X##gi ^= trailingBits; \
1855                    } \
1856                } \
1857            } \
1858        } \
1859        else { \
1860            wrapOne(X, input, output, 0, ba) \
1861            wrapOneInvert(X, input, output, 1, be) \
1862            wrapOneInvert(X, input, output, 2, bi) \
1863            wrapOne(X, input, output, 3, bo) \
1864            wrapOne(X, input, output, 4, bu) \
1865            wrapOne(X, input, output, 5, ga) \
1866            wrapOne(X, input, output, 6, ge) \
1867            wrapOne(X, input, output, 7, gi) \
1868            if (laneCount < 12) { \
1869                if (laneCount < 10) { \
1870                    if (laneCount < 9) { \
1871                        X##go ^= trailingBits; \
1872                    } \
1873                    else { \
1874                        wrapOneInvert(X, input, output, 8, go) \
1875                        X##gu ^= trailingBits; \
1876                    } \
1877                } \
1878                else { \
1879                    wrapOneInvert(X, input, output, 8, go) \
1880                    wrapOne(X, input, output, 9, gu) \
1881                    if (laneCount < 11) { \
1882                        X##ka ^= trailingBits; \
1883                    } \
1884                    else { \
1885                        wrapOne(X, input, output, 10, ka) \
1886                        X##ke ^= trailingBits; \
1887                    } \
1888                } \
1889            } \
1890            else { \
1891                wrapOneInvert(X, input, output, 8, go) \
1892                wrapOne(X, input, output, 9, gu) \
1893                wrapOne(X, input, output, 10, ka) \
1894                wrapOne(X, input, output, 11, ke) \
1895                if (laneCount < 14) { \
1896                    if (laneCount < 13) { \
1897                        X##ki ^= trailingBits; \
1898                    } \
1899                    else { \
1900                        wrapOneInvert(X, input, output, 12, ki) \
1901                        X##ko ^= trailingBits; \
1902                    } \
1903                } \
1904                else { \
1905                    wrapOneInvert(X, input, output, 12, ki) \
1906                    wrapOne(X, input, output, 13, ko) \
1907                    if (laneCount < 15) { \
1908                        X##ku ^= trailingBits; \
1909                    } \
1910                    else { \
1911                        wrapOne(X, input, output, 14, ku) \
1912                        X##ma ^= trailingBits; \
1913                    } \
1914                } \
1915            } \
1916        } \
1917    } \
1918    else { \
1919        wrapOne(X, input, output, 0, ba) \
1920        wrapOneInvert(X, input, output, 1, be) \
1921        wrapOneInvert(X, input, output, 2, bi) \
1922        wrapOne(X, input, output, 3, bo) \
1923        wrapOne(X, input, output, 4, bu) \
1924        wrapOne(X, input, output, 5, ga) \
1925        wrapOne(X, input, output, 6, ge) \
1926        wrapOne(X, input, output, 7, gi) \
1927        wrapOneInvert(X, input, output, 8, go) \
1928        wrapOne(X, input, output, 9, gu) \
1929        wrapOne(X, input, output, 10, ka) \
1930        wrapOne(X, input, output, 11, ke) \
1931        wrapOneInvert(X, input, output, 12, ki) \
1932        wrapOne(X, input, output, 13, ko) \
1933        wrapOne(X, input, output, 14, ku) \
1934        wrapOne(X, input, output, 15, ma) \
1935        if (laneCount < 24) { \
1936            if (laneCount < 20) { \
1937                if (laneCount < 18) { \
1938                    if (laneCount < 17) { \
1939                        X##me ^= trailingBits; \
1940                    } \
1941                    else { \
1942                        wrapOne(X, input, output, 16, me) \
1943                        X##mi ^= trailingBits; \
1944                    } \
1945                } \
1946                else { \
1947                    wrapOne(X, input, output, 16, me) \
1948                    wrapOneInvert(X, input, output, 17, mi) \
1949                    if (laneCount < 19) { \
1950                        X##mo ^= trailingBits; \
1951                    } \
1952                    else { \
1953                        wrapOne(X, input, output, 18, mo) \
1954                        X##mu ^= trailingBits; \
1955                    } \
1956                } \
1957            } \
1958            else { \
1959                wrapOne(X, input, output, 16, me) \
1960                wrapOneInvert(X, input, output, 17, mi) \
1961                wrapOne(X, input, output, 18, mo) \
1962                wrapOne(X, input, output, 19, mu) \
1963                if (laneCount < 22) { \
1964                    if (laneCount < 21) { \
1965                        X##sa ^= trailingBits; \
1966                    } \
1967                    else { \
1968                        wrapOneInvert(X, input, output, 20, sa) \
1969                        X##se ^= trailingBits; \
1970                    } \
1971                } \
1972                else { \
1973                    wrapOneInvert(X, input, output, 20, sa) \
1974                    wrapOne(X, input, output, 21, se) \
1975                    if (laneCount < 23) { \
1976                        X##si ^= trailingBits; \
1977                    } \
1978                    else { \
1979                        wrapOne(X, input, output, 22, si) \
1980                        X##so ^= trailingBits; \
1981                    } \
1982                } \
1983            } \
1984        } \
1985        else { \
1986            wrapOne(X, input, output, 16, me) \
1987            wrapOneInvert(X, input, output, 17, mi) \
1988            wrapOne(X, input, output, 18, mo) \
1989            wrapOne(X, input, output, 19, mu) \
1990            wrapOneInvert(X, input, output, 20, sa) \
1991            wrapOne(X, input, output, 21, se) \
1992            wrapOne(X, input, output, 22, si) \
1993            wrapOne(X, input, output, 23, so) \
1994            if (laneCount < 25) { \
1995                X##su ^= trailingBits; \
1996            } \
1997            else { \
1998                wrapOne(X, input, output, 24, su) \
1999            } \
2000        } \
2001    }
2002
2003#define unwrap(X, input, output, laneCount, trailingBits) \
2004    if (laneCount < 16) { \
2005        if (laneCount < 8) { \
2006            if (laneCount < 4) { \
2007                if (laneCount < 2) { \
2008                    if (laneCount < 1) { \
2009                        X##ba ^= trailingBits; \
2010                    } \
2011                    else { \
2012                        unwrapOne(X, input, output, 0, ba) \
2013                        X##be ^= trailingBits; \
2014                    } \
2015                } \
2016                else { \
2017                    unwrapOne(X, input, output, 0, ba) \
2018                    unwrapOneInvert(X, input, output, 1, be) \
2019                    if (laneCount < 3) { \
2020                        X##bi ^= trailingBits; \
2021                    } \
2022                    else { \
2023                        unwrapOneInvert(X, input, output, 2, bi) \
2024                        X##bo ^= trailingBits; \
2025                    } \
2026                } \
2027            } \
2028            else { \
2029                unwrapOne(X, input, output, 0, ba) \
2030                unwrapOneInvert(X, input, output, 1, be) \
2031                unwrapOneInvert(X, input, output, 2, bi) \
2032                unwrapOne(X, input, output, 3, bo) \
2033                if (laneCount < 6) { \
2034                    if (laneCount < 5) { \
2035                        X##bu ^= trailingBits; \
2036                    } \
2037                    else { \
2038                        unwrapOne(X, input, output, 4, bu) \
2039                        X##ga ^= trailingBits; \
2040                    } \
2041                } \
2042                else { \
2043                    unwrapOne(X, input, output, 4, bu) \
2044                    unwrapOne(X, input, output, 5, ga) \
2045                    if (laneCount < 7) { \
2046                        X##ge ^= trailingBits; \
2047                    } \
2048                    else { \
2049                        unwrapOne(X, input, output, 6, ge) \
2050                        X##gi ^= trailingBits; \
2051                    } \
2052                } \
2053            } \
2054        } \
2055        else { \
2056            unwrapOne(X, input, output, 0, ba) \
2057            unwrapOneInvert(X, input, output, 1, be) \
2058            unwrapOneInvert(X, input, output, 2, bi) \
2059            unwrapOne(X, input, output, 3, bo) \
2060            unwrapOne(X, input, output, 4, bu) \
2061            unwrapOne(X, input, output, 5, ga) \
2062            unwrapOne(X, input, output, 6, ge) \
2063            unwrapOne(X, input, output, 7, gi) \
2064            if (laneCount < 12) { \
2065                if (laneCount < 10) { \
2066                    if (laneCount < 9) { \
2067                        X##go ^= trailingBits; \
2068                    } \
2069                    else { \
2070                        unwrapOneInvert(X, input, output, 8, go) \
2071                        X##gu ^= trailingBits; \
2072                    } \
2073                } \
2074                else { \
2075                    unwrapOneInvert(X, input, output, 8, go) \
2076                    unwrapOne(X, input, output, 9, gu) \
2077                    if (laneCount < 11) { \
2078                        X##ka ^= trailingBits; \
2079                    } \
2080                    else { \
2081                        unwrapOne(X, input, output, 10, ka) \
2082                        X##ke ^= trailingBits; \
2083                    } \
2084                } \
2085            } \
2086            else { \
2087                unwrapOneInvert(X, input, output, 8, go) \
2088                unwrapOne(X, input, output, 9, gu) \
2089                unwrapOne(X, input, output, 10, ka) \
2090                unwrapOne(X, input, output, 11, ke) \
2091                if (laneCount < 14) { \
2092                    if (laneCount < 13) { \
2093                        X##ki ^= trailingBits; \
2094                    } \
2095                    else { \
2096                        unwrapOneInvert(X, input, output, 12, ki) \
2097                        X##ko ^= trailingBits; \
2098                    } \
2099                } \
2100                else { \
2101                    unwrapOneInvert(X, input, output, 12, ki) \
2102                    unwrapOne(X, input, output, 13, ko) \
2103                    if (laneCount < 15) { \
2104                        X##ku ^= trailingBits; \
2105                    } \
2106                    else { \
2107                        unwrapOne(X, input, output, 14, ku) \
2108                        X##ma ^= trailingBits; \
2109                    } \
2110                } \
2111            } \
2112        } \
2113    } \
2114    else { \
2115        unwrapOne(X, input, output, 0, ba) \
2116        unwrapOneInvert(X, input, output, 1, be) \
2117        unwrapOneInvert(X, input, output, 2, bi) \
2118        unwrapOne(X, input, output, 3, bo) \
2119        unwrapOne(X, input, output, 4, bu) \
2120        unwrapOne(X, input, output, 5, ga) \
2121        unwrapOne(X, input, output, 6, ge) \
2122        unwrapOne(X, input, output, 7, gi) \
2123        unwrapOneInvert(X, input, output, 8, go) \
2124        unwrapOne(X, input, output, 9, gu) \
2125        unwrapOne(X, input, output, 10, ka) \
2126        unwrapOne(X, input, output, 11, ke) \
2127        unwrapOneInvert(X, input, output, 12, ki) \
2128        unwrapOne(X, input, output, 13, ko) \
2129        unwrapOne(X, input, output, 14, ku) \
2130        unwrapOne(X, input, output, 15, ma) \
2131        if (laneCount < 24) { \
2132            if (laneCount < 20) { \
2133                if (laneCount < 18) { \
2134                    if (laneCount < 17) { \
2135                        X##me ^= trailingBits; \
2136                    } \
2137                    else { \
2138                        unwrapOne(X, input, output, 16, me) \
2139                        X##mi ^= trailingBits; \
2140                    } \
2141                } \
2142                else { \
2143                    unwrapOne(X, input, output, 16, me) \
2144                    unwrapOneInvert(X, input, output, 17, mi) \
2145                    if (laneCount < 19) { \
2146                        X##mo ^= trailingBits; \
2147                    } \
2148                    else { \
2149                        unwrapOne(X, input, output, 18, mo) \
2150                        X##mu ^= trailingBits; \
2151                    } \
2152                } \
2153            } \
2154            else { \
2155                unwrapOne(X, input, output, 16, me) \
2156                unwrapOneInvert(X, input, output, 17, mi) \
2157                unwrapOne(X, input, output, 18, mo) \
2158                unwrapOne(X, input, output, 19, mu) \
2159                if (laneCount < 22) { \
2160                    if (laneCount < 21) { \
2161                        X##sa ^= trailingBits; \
2162                    } \
2163                    else { \
2164                        unwrapOneInvert(X, input, output, 20, sa) \
2165                        X##se ^= trailingBits; \
2166                    } \
2167                } \
2168                else { \
2169                    unwrapOneInvert(X, input, output, 20, sa) \
2170                    unwrapOne(X, input, output, 21, se) \
2171                    if (laneCount < 23) { \
2172                        X##si ^= trailingBits; \
2173                    } \
2174                    else { \
2175                        unwrapOne(X, input, output, 22, si) \
2176                        X##so ^= trailingBits; \
2177                    } \
2178                } \
2179            } \
2180        } \
2181        else { \
2182            unwrapOne(X, input, output, 16, me) \
2183            unwrapOneInvert(X, input, output, 17, mi) \
2184            unwrapOne(X, input, output, 18, mo) \
2185            unwrapOne(X, input, output, 19, mu) \
2186            unwrapOneInvert(X, input, output, 20, sa) \
2187            unwrapOne(X, input, output, 21, se) \
2188            unwrapOne(X, input, output, 22, si) \
2189            unwrapOne(X, input, output, 23, so) \
2190            if (laneCount < 25) { \
2191                X##su ^= trailingBits; \
2192            } \
2193            else { \
2194                unwrapOne(X, input, output, 24, su) \
2195            } \
2196        } \
2197    }
2198