1 /*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | http://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Author: Jim Winstead <jimw@php.net> |
14 | Xinchen Hui <laruence@php.net> |
15 +----------------------------------------------------------------------+
16 */
17
18 #include <string.h>
19
20 #include "php.h"
21 #include "base64.h"
22
23 /* {{{ base64 tables */
24 static const char base64_table[] = {
25 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
26 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
27 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
28 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
29 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', '\0'
30 };
31
32 static const char base64_pad = '=';
33
34 static const short base64_reverse_table[256] = {
35 -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1, -2, -2, -1, -2, -2,
36 -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
37 -1, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, 62, -2, -2, -2, 63,
38 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -2, -2, -2, -2, -2, -2,
39 -2, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
40 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -2, -2, -2, -2, -2,
41 -2, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
42 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -2, -2, -2, -2, -2,
43 -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
44 -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
45 -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
46 -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
47 -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
48 -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
49 -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
50 -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2
51 };
52 /* }}} */
53
54 #ifdef __aarch64__
55 #include <arm_neon.h>
56
encode_toascii(const uint8x16_t input,const uint8x16x2_t shift_LUT)57 static zend_always_inline uint8x16_t encode_toascii(const uint8x16_t input, const uint8x16x2_t shift_LUT)
58 {
59 /* reduce 0..51 -> 0
60 52..61 -> 1 .. 10
61 62 -> 11
62 63 -> 12 */
63 uint8x16_t result = vqsubq_u8(input, vdupq_n_u8(51));
64 /* distinguish between ranges 0..25 and 26..51:
65 0 .. 25 -> remains 0
66 26 .. 51 -> becomes 13 */
67 const uint8x16_t less = vcgtq_u8(vdupq_n_u8(26), input);
68 result = vorrq_u8(result, vandq_u8(less, vdupq_n_u8(13)));
69 /* read shift */
70 result = vqtbl2q_u8(shift_LUT, result);
71 return vaddq_u8(result, input);
72 }
73
neon_base64_encode(const unsigned char * in,size_t inl,unsigned char * out,size_t * left)74 static zend_always_inline unsigned char *neon_base64_encode(const unsigned char *in, size_t inl, unsigned char *out, size_t *left)
75 {
76 const uint8_t shift_LUT_[32] = {'a' - 26, '0' - 52, '0' - 52, '0' - 52,
77 '0' - 52, '0' - 52, '0' - 52, '0' - 52,
78 '0' - 52, '0' - 52, '0' - 52, '+' - 62,
79 '/' - 63, 'A', 0, 0,
80 'a' - 26, '0' - 52, '0' - 52, '0' - 52,
81 '0' - 52, '0' - 52, '0' - 52, '0' - 52,
82 '0' - 52, '0' - 52, '0' - 52, '+' - 62,
83 '/' - 63, 'A', 0, 0};
84 const uint8x16x2_t shift_LUT = *((const uint8x16x2_t *)shift_LUT_);
85 do {
86 /* [ccdddddd | bbbbcccc | aaaaaabb]
87 x.val[2] | x.val[1] | x.val[0] */
88 const uint8x16x3_t x = vld3q_u8((const uint8_t *)(in));
89
90 /* [00aa_aaaa] */
91 const uint8x16_t field_a = vshrq_n_u8(x.val[0], 2);
92
93 const uint8x16_t field_b = /* [00bb_bbbb] */
94 vbslq_u8(vdupq_n_u8(0x30), /* [0011_0000] */
95 vshlq_n_u8(x.val[0], 4), /* [aabb_0000] */
96 vshrq_n_u8(x.val[1], 4)); /* [0000_bbbb] */
97
98 const uint8x16_t field_c = /* [00cc_cccc] */
99 vbslq_u8(vdupq_n_u8(0x3c), /* [0011_1100] */
100 vshlq_n_u8(x.val[1], 2), /* [bbcc_cc00] */
101 vshrq_n_u8(x.val[2], 6)); /* [0000_00cc] */
102
103 /* [00dd_dddd] */
104 const uint8x16_t field_d = vandq_u8(x.val[2], vdupq_n_u8(0x3f));
105
106 uint8x16x4_t result;
107 result.val[0] = encode_toascii(field_a, shift_LUT);
108 result.val[1] = encode_toascii(field_b, shift_LUT);
109 result.val[2] = encode_toascii(field_c, shift_LUT);
110 result.val[3] = encode_toascii(field_d, shift_LUT);
111
112 vst4q_u8((uint8_t *)out, result);
113 out += 64;
114 in += 16 * 3;
115 inl -= 16 * 3;
116 } while (inl >= 16 * 3);
117
118 *left = inl;
119 return out;
120 }
121 #endif /* __aarch64__ */
122
php_base64_encode_impl(const unsigned char * in,size_t inl,unsigned char * out)123 static zend_always_inline unsigned char *php_base64_encode_impl(const unsigned char *in, size_t inl, unsigned char *out) /* {{{ */
124 {
125 #ifdef __aarch64__
126 if (inl >= 16 * 3) {
127 size_t left = 0;
128 out = neon_base64_encode(in, inl, out, &left);
129 in += inl - left;
130 inl = left;
131 }
132 #endif
133
134 while (inl > 2) { /* keep going until we have less than 24 bits */
135 *out++ = base64_table[in[0] >> 2];
136 *out++ = base64_table[((in[0] & 0x03) << 4) + (in[1] >> 4)];
137 *out++ = base64_table[((in[1] & 0x0f) << 2) + (in[2] >> 6)];
138 *out++ = base64_table[in[2] & 0x3f];
139
140 in += 3;
141 inl -= 3; /* we just handle 3 octets of data */
142 }
143
144 /* now deal with the tail end of things */
145 if (inl != 0) {
146 *out++ = base64_table[in[0] >> 2];
147 if (inl > 1) {
148 *out++ = base64_table[((in[0] & 0x03) << 4) + (in[1] >> 4)];
149 *out++ = base64_table[(in[1] & 0x0f) << 2];
150 *out++ = base64_pad;
151 } else {
152 *out++ = base64_table[(in[0] & 0x03) << 4];
153 *out++ = base64_pad;
154 *out++ = base64_pad;
155 }
156 }
157
158 *out = '\0';
159
160 return out;
161 }
162 /* }}} */
163
164 #ifdef __aarch64__
decode_fromascii(const uint8x16_t input,uint8x16_t * error,const uint8x16x2_t shiftLUT,const uint8x16x2_t maskLUT,const uint8x16x2_t bitposLUT)165 static zend_always_inline uint8x16_t decode_fromascii(const uint8x16_t input, uint8x16_t *error, const uint8x16x2_t shiftLUT, const uint8x16x2_t maskLUT, const uint8x16x2_t bitposLUT) {
166 const uint8x16_t higher_nibble = vshrq_n_u8(input, 4);
167 const uint8x16_t lower_nibble = vandq_u8(input, vdupq_n_u8(0x0f));
168 const uint8x16_t sh = vqtbl2q_u8(shiftLUT, higher_nibble);
169 const uint8x16_t eq_2f = vceqq_u8(input, vdupq_n_u8(0x2f));
170 const uint8x16_t shift = vbslq_u8(eq_2f, vdupq_n_u8(16), sh);
171 const uint8x16_t M = vqtbl2q_u8(maskLUT, lower_nibble);
172 const uint8x16_t bit = vqtbl2q_u8(bitposLUT, higher_nibble);
173 *error = vceqq_u8(vandq_u8(M, bit), vdupq_n_u8(0));
174 return vaddq_u8(input, shift);
175 }
176
neon_base64_decode(const unsigned char * in,size_t inl,unsigned char * out,size_t * left)177 static zend_always_inline size_t neon_base64_decode(const unsigned char *in, size_t inl, unsigned char *out, size_t *left) {
178 unsigned char *out_orig = out;
179 const uint8_t shiftLUT_[32] = {
180 0, 0, 19, 4, (uint8_t)-65, (uint8_t)-65, (uint8_t)-71, (uint8_t)-71,
181 0, 0, 0, 0, 0, 0, 0, 0,
182 0, 0, 19, 4, (uint8_t)-65, (uint8_t)-65, (uint8_t)-71, (uint8_t)-71,
183 0, 0, 0, 0, 0, 0, 0, 0};
184 const uint8_t maskLUT_[32] = {
185 /* 0 : 0b1010_1000*/ 0xa8,
186 /* 1 .. 9 : 0b1111_1000*/ 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
187 /* 10 : 0b1111_0000*/ 0xf0,
188 /* 11 : 0b0101_0100*/ 0x54,
189 /* 12 .. 14 : 0b0101_0000*/ 0x50, 0x50, 0x50,
190 /* 15 : 0b0101_0100*/ 0x54,
191
192 /* 0 : 0b1010_1000*/ 0xa8,
193 /* 1 .. 9 : 0b1111_1000*/ 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
194 /* 10 : 0b1111_0000*/ 0xf0,
195 /* 11 : 0b0101_0100*/ 0x54,
196 /* 12 .. 14 : 0b0101_0000*/ 0x50, 0x50, 0x50,
197 /* 15 : 0b0101_0100*/ 0x54
198 };
199 const uint8_t bitposLUT_[32] = {
200 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
201 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
202
203 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
204 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
205 };
206 const uint8x16x2_t shiftLUT = *((const uint8x16x2_t *)shiftLUT_);
207 const uint8x16x2_t maskLUT = *((const uint8x16x2_t *)maskLUT_);
208 const uint8x16x2_t bitposLUT = *((const uint8x16x2_t *)bitposLUT_);;
209
210 do {
211 const uint8x16x4_t x = vld4q_u8((const unsigned char *)in);
212 uint8x16_t error_a;
213 uint8x16_t error_b;
214 uint8x16_t error_c;
215 uint8x16_t error_d;
216 uint8x16_t field_a = decode_fromascii(x.val[0], &error_a, shiftLUT, maskLUT, bitposLUT);
217 uint8x16_t field_b = decode_fromascii(x.val[1], &error_b, shiftLUT, maskLUT, bitposLUT);
218 uint8x16_t field_c = decode_fromascii(x.val[2], &error_c, shiftLUT, maskLUT, bitposLUT);
219 uint8x16_t field_d = decode_fromascii(x.val[3], &error_d, shiftLUT, maskLUT, bitposLUT);
220
221 const uint8x16_t err = vorrq_u8(vorrq_u8(error_a, error_b), vorrq_u8(error_c, error_d));
222 union {uint8_t mem[16]; uint64_t dw[2]; } error;
223 vst1q_u8(error.mem, err);
224
225 /* Check that the input only contains bytes belonging to the alphabet of
226 Base64. If there are errors, decode the rest of the string with the
227 scalar decoder. */
228 if (error.dw[0] | error.dw[1])
229 break;
230
231 uint8x16x3_t result;
232 result.val[0] = vorrq_u8(vshrq_n_u8(field_b, 4), vshlq_n_u8(field_a, 2));
233 result.val[1] = vorrq_u8(vshrq_n_u8(field_c, 2), vshlq_n_u8(field_b, 4));
234 result.val[2] = vorrq_u8(field_d, vshlq_n_u8(field_c, 6));
235
236 vst3q_u8((unsigned char *)out, result);
237 out += 16 * 3;
238 in += 16 * 4;
239 inl -= 16 * 4;
240 } while (inl >= 16 * 4);
241 *left = inl;
242 return out - out_orig;
243 }
244 #endif /* __aarch64__ */
245
php_base64_decode_impl(const unsigned char * in,size_t inl,unsigned char * out,size_t * outl,zend_bool strict)246 static zend_always_inline int php_base64_decode_impl(const unsigned char *in, size_t inl, unsigned char *out, size_t *outl, zend_bool strict) /* {{{ */
247 {
248 int ch;
249 size_t i = 0, padding = 0, j = *outl;
250
251 #ifdef __aarch64__
252 if (inl >= 16 * 4) {
253 size_t left = 0;
254 j += neon_base64_decode(in, inl, out, &left);
255 i = inl - left;
256 in += i;
257 inl = left;
258 }
259 #endif
260
261 /* run through the whole string, converting as we go */
262 while (inl-- > 0) {
263 ch = *in++;
264 if (ch == base64_pad) {
265 padding++;
266 continue;
267 }
268
269 ch = base64_reverse_table[ch];
270 if (!strict) {
271 /* skip unknown characters and whitespace */
272 if (ch < 0) {
273 continue;
274 }
275 } else {
276 /* skip whitespace */
277 if (ch == -1) {
278 continue;
279 }
280 /* fail on bad characters or if any data follows padding */
281 if (ch == -2 || padding) {
282 goto fail;
283 }
284 }
285
286 switch (i % 4) {
287 case 0:
288 out[j] = ch << 2;
289 break;
290 case 1:
291 out[j++] |= ch >> 4;
292 out[j] = (ch & 0x0f) << 4;
293 break;
294 case 2:
295 out[j++] |= ch >>2;
296 out[j] = (ch & 0x03) << 6;
297 break;
298 case 3:
299 out[j++] |= ch;
300 break;
301 }
302 i++;
303 }
304
305 /* fail if the input is truncated (only one char in last group) */
306 if (strict && i % 4 == 1) {
307 goto fail;
308 }
309
310 /* fail if the padding length is wrong (not VV==, VVV=), but accept zero padding
311 * RFC 4648: "In some circumstances, the use of padding [--] is not required" */
312 if (strict && padding && (padding > 2 || (i + padding) % 4 != 0)) {
313 goto fail;
314 }
315
316 *outl = j;
317 out[j] = '\0';
318
319 return 1;
320
321 fail:
322 return 0;
323 }
324 /* }}} */
325
326 /* {{{ php_base64_encode */
327
328 #if ZEND_INTRIN_AVX2_NATIVE
329 # undef ZEND_INTRIN_SSSE3_NATIVE
330 # undef ZEND_INTRIN_SSSE3_RESOLVER
331 # undef ZEND_INTRIN_SSSE3_FUNC_PROTO
332 # undef ZEND_INTRIN_SSSE3_FUNC_PTR
333 #elif ZEND_INTRIN_AVX2_FUNC_PROTO && ZEND_INTRIN_SSSE3_NATIVE
334 # undef ZEND_INTRIN_SSSE3_NATIVE
335 # undef ZEND_INTRIN_SSSE3_RESOLVER
336 # define ZEND_INTRIN_SSSE3_RESOLVER 1
337 # define ZEND_INTRIN_SSSE3_FUNC_PROTO 1
338 # undef ZEND_INTRIN_SSSE3_FUNC_DECL
339 # ifdef HAVE_FUNC_ATTRIBUTE_TARGET
340 # define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func __attribute__((target("ssse3")))
341 # else
342 # define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func
343 # endif
344 #elif ZEND_INTRIN_AVX2_FUNC_PTR && ZEND_INTRIN_SSSE3_NATIVE
345 # undef ZEND_INTRIN_SSSE3_NATIVE
346 # undef ZEND_INTRIN_SSSE3_RESOLVER
347 # define ZEND_INTRIN_SSSE3_RESOLVER 1
348 # define ZEND_INTRIN_SSSE3_FUNC_PTR 1
349 # undef ZEND_INTRIN_SSSE3_FUNC_DECL
350 # ifdef HAVE_FUNC_ATTRIBUTE_TARGET
351 # define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func __attribute__((target("ssse3")))
352 # else
353 # define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func
354 # endif
355 #endif
356
357 #if ZEND_INTRIN_AVX2_NATIVE
358 # include <immintrin.h>
359 #elif ZEND_INTRIN_SSSE3_NATIVE
360 # include <tmmintrin.h>
361 #elif (ZEND_INTRIN_SSSE3_RESOLVER || ZEND_INTRIN_AVX2_RESOLVER)
362 # if ZEND_INTRIN_AVX2_RESOLVER
363 # include <immintrin.h>
364 # else
365 # include <tmmintrin.h>
366 # endif /* (ZEND_INTRIN_SSSE3_RESOLVER || ZEND_INTRIN_AVX2_RESOLVER) */
367 # include "Zend/zend_cpuinfo.h"
368
369 # if ZEND_INTRIN_AVX2_RESOLVER
370 ZEND_INTRIN_AVX2_FUNC_DECL(zend_string *php_base64_encode_avx2(const unsigned char *str, size_t length));
371 ZEND_INTRIN_AVX2_FUNC_DECL(zend_string *php_base64_decode_ex_avx2(const unsigned char *str, size_t length, zend_bool strict));
372 # endif
373
374 # if ZEND_INTRIN_SSSE3_RESOLVER
375 ZEND_INTRIN_SSSE3_FUNC_DECL(zend_string *php_base64_encode_ssse3(const unsigned char *str, size_t length));
376 ZEND_INTRIN_SSSE3_FUNC_DECL(zend_string *php_base64_decode_ex_ssse3(const unsigned char *str, size_t length, zend_bool strict));
377 # endif
378
379 zend_string *php_base64_encode_default(const unsigned char *str, size_t length);
380 zend_string *php_base64_decode_ex_default(const unsigned char *str, size_t length, zend_bool strict);
381
382 # if (ZEND_INTRIN_AVX2_FUNC_PROTO || ZEND_INTRIN_SSSE3_FUNC_PROTO)
383 PHPAPI zend_string *php_base64_encode(const unsigned char *str, size_t length) __attribute__((ifunc("resolve_base64_encode")));
384 PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, zend_bool strict) __attribute__((ifunc("resolve_base64_decode")));
385
386 typedef zend_string *(*base64_encode_func_t)(const unsigned char *, size_t);
387 typedef zend_string *(*base64_decode_func_t)(const unsigned char *, size_t, zend_bool);
388
389 ZEND_NO_SANITIZE_ADDRESS
390 ZEND_ATTRIBUTE_UNUSED /* clang mistakenly warns about this */
resolve_base64_encode()391 static base64_encode_func_t resolve_base64_encode() {
392 # if ZEND_INTRIN_AVX2_FUNC_PROTO
393 if (zend_cpu_supports_avx2()) {
394 return php_base64_encode_avx2;
395 } else
396 # endif
397 #if ZEND_INTRIN_SSSE3_FUNC_PROTO
398 if (zend_cpu_supports_ssse3()) {
399 return php_base64_encode_ssse3;
400 }
401 #endif
402 return php_base64_encode_default;
403 }
404
405 ZEND_NO_SANITIZE_ADDRESS
406 ZEND_ATTRIBUTE_UNUSED /* clang mistakenly warns about this */
resolve_base64_decode()407 static base64_decode_func_t resolve_base64_decode() {
408 # if ZEND_INTRIN_AVX2_FUNC_PROTO
409 if (zend_cpu_supports_avx2()) {
410 return php_base64_decode_ex_avx2;
411 } else
412 # endif
413 #if ZEND_INTRIN_SSSE3_FUNC_PROTO
414 if (zend_cpu_supports_ssse3()) {
415 return php_base64_decode_ex_ssse3;
416 }
417 #endif
418 return php_base64_decode_ex_default;
419 }
420 # else /* (ZEND_INTRIN_AVX2_FUNC_PROTO || ZEND_INTRIN_SSSE3_FUNC_PROTO) */
421
422 PHPAPI zend_string *(*php_base64_encode_ptr)(const unsigned char *str, size_t length) = NULL;
423 PHPAPI zend_string *(*php_base64_decode_ex_ptr)(const unsigned char *str, size_t length, zend_bool strict) = NULL;
424
php_base64_encode(const unsigned char * str,size_t length)425 PHPAPI zend_string *php_base64_encode(const unsigned char *str, size_t length) {
426 return php_base64_encode_ptr(str, length);
427 }
php_base64_decode_ex(const unsigned char * str,size_t length,zend_bool strict)428 PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, zend_bool strict) {
429 return php_base64_decode_ex_ptr(str, length, strict);
430 }
431
PHP_MINIT_FUNCTION(base64_intrin)432 PHP_MINIT_FUNCTION(base64_intrin)
433 {
434 # if ZEND_INTRIN_AVX2_FUNC_PTR
435 if (zend_cpu_supports_avx2()) {
436 php_base64_encode_ptr = php_base64_encode_avx2;
437 php_base64_decode_ex_ptr = php_base64_decode_ex_avx2;
438 } else
439 # endif
440 #if ZEND_INTRIN_SSSE3_FUNC_PTR
441 if (zend_cpu_supports_ssse3()) {
442 php_base64_encode_ptr = php_base64_encode_ssse3;
443 php_base64_decode_ex_ptr = php_base64_decode_ex_ssse3;
444 } else
445 #endif
446 {
447 php_base64_encode_ptr = php_base64_encode_default;
448 php_base64_decode_ex_ptr = php_base64_decode_ex_default;
449 }
450 return SUCCESS;
451 }
452 # endif /* (ZEND_INTRIN_AVX2_FUNC_PROTO || ZEND_INTRIN_SSSE3_FUNC_PROTO) */
453 #endif /* ZEND_INTRIN_AVX2_NATIVE */
454
455 #if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER
456 # if ZEND_INTRIN_AVX2_RESOLVER && defined(HAVE_FUNC_ATTRIBUTE_TARGET)
457 static __m256i php_base64_encode_avx2_reshuffle(__m256i in) __attribute__((target("avx2")));
458 static __m256i php_base64_encode_avx2_translate(__m256i in) __attribute__((target("avx2")));
459 # endif
php_base64_encode_avx2_reshuffle(__m256i in)460 static __m256i php_base64_encode_avx2_reshuffle(__m256i in)
461 {
462 /* This one works with shifted (4 bytes) input in order to
463 * be able to work efficiently in the 2 128-bit lanes */
464 __m256i t0, t1, t2, t3;
465
466 /* input, bytes MSB to LSB:
467 * 0 0 0 0 x w v u t s r q p o n m
468 * l k j i h g f e d c b a 0 0 0 0 */
469 in = _mm256_shuffle_epi8(in, _mm256_set_epi8(
470 10, 11, 9, 10,
471 7, 8, 6, 7,
472 4, 5, 3, 4,
473 1, 2, 0, 1,
474
475 14, 15, 13, 14,
476 11, 12, 10, 11,
477 8, 9, 7, 8,
478 5, 6, 4, 5));
479
480 t0 = _mm256_and_si256(in, _mm256_set1_epi32(0x0fc0fc00));
481
482 t1 = _mm256_mulhi_epu16(t0, _mm256_set1_epi32(0x04000040));
483
484 t2 = _mm256_and_si256(in, _mm256_set1_epi32(0x003f03f0));
485
486 t3 = _mm256_mullo_epi16(t2, _mm256_set1_epi32(0x01000010));
487
488 return _mm256_or_si256(t1, t3);
489 /* 00xxxxxx 00wwwwXX 00vvWWWW 00VVVVVV
490 * 00uuuuuu 00ttttUU 00ssTTTT 00SSSSSS
491 * 00rrrrrr 00qqqqRR 00ppQQQQ 00PPPPPP
492 * 00oooooo 00nnnnOO 00mmNNNN 00MMMMMM
493 * 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ
494 * 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG
495 * 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD
496 * 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA */
497 }
498
php_base64_encode_avx2_translate(__m256i in)499 static __m256i php_base64_encode_avx2_translate(__m256i in)
500 {
501 __m256i lut, indices, mask;
502
503 lut = _mm256_setr_epi8(
504 65, 71, -4, -4, -4, -4, -4, -4,
505 -4, -4, -4, -4, -19, -16, 0, 0,
506 65, 71, -4, -4, -4, -4, -4, -4,
507 -4, -4, -4, -4, -19, -16, 0, 0);
508
509 indices = _mm256_subs_epu8(in, _mm256_set1_epi8(51));
510
511 mask = _mm256_cmpgt_epi8(in, _mm256_set1_epi8(25));
512
513 indices = _mm256_sub_epi8(indices, mask);
514
515 return _mm256_add_epi8(in, _mm256_shuffle_epi8(lut, indices));
516
517 }
518 #endif /* ZEND_INTRIN_AVX2_NATIVE || (ZEND_INTRIN_AVX2_RESOLVER && !ZEND_INTRIN_SSSE3_NATIVE) */
519
520 #if ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER
521
522 # if ZEND_INTRIN_SSSE3_RESOLVER && defined(HAVE_FUNC_ATTRIBUTE_TARGET)
523 static __m128i php_base64_encode_ssse3_reshuffle(__m128i in) __attribute__((target("ssse3")));
524 static __m128i php_base64_encode_ssse3_translate(__m128i in) __attribute__((target("ssse3")));
525 # endif
526
php_base64_encode_ssse3_reshuffle(__m128i in)527 static __m128i php_base64_encode_ssse3_reshuffle(__m128i in)
528 {
529 __m128i t0, t1, t2, t3;
530
531 /* input, bytes MSB to LSB:
532 * 0 0 0 0 l k j i h g f e d c b a */
533 in = _mm_shuffle_epi8(in, _mm_set_epi8(
534 10, 11, 9, 10,
535 7, 8, 6, 7,
536 4, 5, 3, 4,
537 1, 2, 0, 1));
538
539 t0 = _mm_and_si128(in, _mm_set1_epi32(0x0fc0fc00));
540
541 t1 = _mm_mulhi_epu16(t0, _mm_set1_epi32(0x04000040));
542
543 t2 = _mm_and_si128(in, _mm_set1_epi32(0x003f03f0));
544
545 t3 = _mm_mullo_epi16(t2, _mm_set1_epi32(0x01000010));
546
547 /* output (upper case are MSB, lower case are LSB):
548 * 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ
549 * 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG
550 * 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD
551 * 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA */
552 return _mm_or_si128(t1, t3);
553 }
554
php_base64_encode_ssse3_translate(__m128i in)555 static __m128i php_base64_encode_ssse3_translate(__m128i in)
556 {
557 __m128i mask, indices;
558 __m128i lut = _mm_setr_epi8(
559 65, 71, -4, -4,
560 -4, -4, -4, -4,
561 -4, -4, -4, -4,
562 -19, -16, 0, 0
563 );
564
565 /* Translate values 0..63 to the Base64 alphabet. There are five sets:
566 * # From To Abs Index Characters
567 * 0 [0..25] [65..90] +65 0 ABCDEFGHIJKLMNOPQRSTUVWXYZ
568 * 1 [26..51] [97..122] +71 1 abcdefghijklmnopqrstuvwxyz
569 * 2 [52..61] [48..57] -4 [2..11] 0123456789
570 * 3 [62] [43] -19 12 +
571 * 4 [63] [47] -16 13 / */
572
573 /* Create LUT indices from input:
574 * the index for range #0 is right, others are 1 less than expected: */
575 indices = _mm_subs_epu8(in, _mm_set1_epi8(51));
576
577 /* mask is 0xFF (-1) for range #[1..4] and 0x00 for range #0: */
578 mask = _mm_cmpgt_epi8(in, _mm_set1_epi8(25));
579
580 /* subtract -1, so add 1 to indices for range #[1..4], All indices are now correct: */
581 indices = _mm_sub_epi8(indices, mask);
582
583 /* Add offsets to input values: */
584 return _mm_add_epi8(in, _mm_shuffle_epi8(lut, indices));
585 }
586
587 #define PHP_BASE64_ENCODE_SSSE3_LOOP \
588 while (length > 15) { \
589 __m128i s = _mm_loadu_si128((__m128i *)c); \
590 \
591 s = php_base64_encode_ssse3_reshuffle(s); \
592 \
593 s = php_base64_encode_ssse3_translate(s); \
594 \
595 _mm_storeu_si128((__m128i *)o, s); \
596 c += 12; \
597 o += 16; \
598 length -= 12; \
599 }
600
601 #endif /* ZEND_INTRIN_SSSE3_NATIVE || (ZEND_INTRIN_SSSE3_RESOLVER && !ZEND_INTRIN_AVX2_NATIVE) */
602
603 #if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER
604 # if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_SSSE3_NATIVE
php_base64_encode(const unsigned char * str,size_t length)605 PHPAPI zend_string *php_base64_encode(const unsigned char *str, size_t length)
606 # elif ZEND_INTRIN_AVX2_RESOLVER
607 zend_string *php_base64_encode_avx2(const unsigned char *str, size_t length)
608 # else /* ZEND_INTRIN_SSSE3_RESOLVER */
609 zend_string *php_base64_encode_ssse3(const unsigned char *str, size_t length)
610 # endif
611 {
612 const unsigned char *c = str;
613 unsigned char *o;
614 zend_string *result;
615
616 result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0);
617 o = (unsigned char *)ZSTR_VAL(result);
618 # if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER
619 if (length > 31) {
620 __m256i s = _mm256_loadu_si256((__m256i *)c);
621
622 s = _mm256_permutevar8x32_epi32(s, _mm256_setr_epi32(0, 0, 1, 2, 3, 4, 5, 6));
623
624 for (;;) {
625 s = php_base64_encode_avx2_reshuffle(s);
626
627 s = php_base64_encode_avx2_translate(s);
628
629 _mm256_storeu_si256((__m256i *)o, s);
630 c += 24;
631 o += 32;
632 length -= 24;
633 if (length < 28) {
634 break;
635 }
636 s = _mm256_loadu_si256((__m256i *)(c - 4));
637 }
638 }
639 # else
640 PHP_BASE64_ENCODE_SSSE3_LOOP;
641 # endif
642
643 o = php_base64_encode_impl(c, length, o);
644
645 ZSTR_LEN(result) = (o - (unsigned char *)ZSTR_VAL(result));
646
647 return result;
648 }
649
650 # if ZEND_INTRIN_SSSE3_RESOLVER && ZEND_INTRIN_AVX2_RESOLVER
php_base64_encode_ssse3(const unsigned char * str,size_t length)651 zend_string *php_base64_encode_ssse3(const unsigned char *str, size_t length)
652 {
653 const unsigned char *c = str;
654 unsigned char *o;
655 zend_string *result;
656
657 result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0);
658 o = (unsigned char *)ZSTR_VAL(result);
659
660 PHP_BASE64_ENCODE_SSSE3_LOOP;
661
662 o = php_base64_encode_impl(c, length, o);
663
664 ZSTR_LEN(result) = (o - (unsigned char *)ZSTR_VAL(result));
665
666 return result;
667 }
668 # endif
669 #endif /* ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER */
670
671 /* }}} */
672
673 #if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER
674 # if ZEND_INTRIN_AVX2_RESOLVER && defined(HAVE_FUNC_ATTRIBUTE_TARGET)
675 static __m256i php_base64_decode_avx2_reshuffle(__m256i in) __attribute__((target("avx2")));
676 # endif
677
php_base64_decode_avx2_reshuffle(__m256i in)678 static __m256i php_base64_decode_avx2_reshuffle(__m256i in)
679 {
680 __m256i merge_ab_and_bc, out;
681
682 merge_ab_and_bc = _mm256_maddubs_epi16(in, _mm256_set1_epi32(0x01400140));
683
684 out = _mm256_madd_epi16(merge_ab_and_bc, _mm256_set1_epi32(0x00011000));
685
686 out = _mm256_shuffle_epi8(out, _mm256_setr_epi8(
687 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1,
688 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1));
689
690 return _mm256_permutevar8x32_epi32(out, _mm256_setr_epi32(0, 1, 2, 4, 5, 6, -1, -1));
691 }
692 #endif
693
694 #if ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER
695 # if ZEND_INTRIN_SSSE3_RESOLVER && defined(HAVE_FUNC_ATTRIBUTE_TARGET)
696 static __m128i php_base64_decode_ssse3_reshuffle(__m128i in) __attribute__((target("ssse3")));
697 # endif
698
php_base64_decode_ssse3_reshuffle(__m128i in)699 static __m128i php_base64_decode_ssse3_reshuffle(__m128i in)
700 {
701 __m128i merge_ab_and_bc, out;
702
703 merge_ab_and_bc = _mm_maddubs_epi16(in, _mm_set1_epi32(0x01400140));
704 /* 0000kkkk LLllllll 0000JJJJ JJjjKKKK
705 * 0000hhhh IIiiiiii 0000GGGG GGggHHHH
706 * 0000eeee FFffffff 0000DDDD DDddEEEE
707 * 0000bbbb CCcccccc 0000AAAA AAaaBBBB */
708
709 out = _mm_madd_epi16(merge_ab_and_bc, _mm_set1_epi32(0x00011000));
710 /* 00000000 JJJJJJjj KKKKkkkk LLllllll
711 * 00000000 GGGGGGgg HHHHhhhh IIiiiiii
712 * 00000000 DDDDDDdd EEEEeeee FFffffff
713 * 00000000 AAAAAAaa BBBBbbbb CCcccccc */
714
715 return _mm_shuffle_epi8(out, _mm_setr_epi8(
716 2, 1, 0,
717 6, 5, 4,
718 10, 9, 8,
719 14, 13, 12,
720 -1, -1, -1, -1));
721 /* 00000000 00000000 00000000 00000000
722 * LLllllll KKKKkkkk JJJJJJjj IIiiiiii
723 * HHHHhhhh GGGGGGgg FFffffff EEEEeeee
724 * DDDDDDdd CCcccccc BBBBbbbb AAAAAAaa */
725 }
726
727 #define PHP_BASE64_DECODE_SSSE3_LOOP \
728 while (length > 15 + 6 + 2) { \
729 __m128i lut_lo, lut_hi, lut_roll; \
730 __m128i hi_nibbles, lo_nibbles, hi, lo; \
731 __m128i s = _mm_loadu_si128((__m128i *)c); \
732 \
733 lut_lo = _mm_setr_epi8( \
734 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, \
735 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A); \
736 lut_hi = _mm_setr_epi8( \
737 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08, \
738 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10); \
739 lut_roll = _mm_setr_epi8( \
740 0, 16, 19, 4, -65, -65, -71, -71, \
741 0, 0, 0, 0, 0, 0, 0, 0); \
742 \
743 hi_nibbles = _mm_and_si128( \
744 _mm_srli_epi32(s, 4), _mm_set1_epi8(0x2f)); \
745 lo_nibbles = _mm_and_si128(s, _mm_set1_epi8(0x2f)); \
746 hi = _mm_shuffle_epi8(lut_hi, hi_nibbles); \
747 lo = _mm_shuffle_epi8(lut_lo, lo_nibbles); \
748 \
749 \
750 if (UNEXPECTED( \
751 _mm_movemask_epi8( \
752 _mm_cmpgt_epi8( \
753 _mm_and_si128(lo, hi), _mm_set1_epi8(0))))) { \
754 break; \
755 } else { \
756 __m128i eq_2f, roll; \
757 \
758 eq_2f = _mm_cmpeq_epi8(s, _mm_set1_epi8(0x2f)); \
759 roll = _mm_shuffle_epi8( \
760 lut_roll, _mm_add_epi8(eq_2f, hi_nibbles)); \
761 \
762 s = _mm_add_epi8(s, roll); \
763 s = php_base64_decode_ssse3_reshuffle(s); \
764 \
765 _mm_storeu_si128((__m128i *)o, s); \
766 \
767 c += 16; \
768 o += 12; \
769 outl += 12; \
770 length -= 16; \
771 } \
772 }
773
774 #endif
775
776 #if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER
777 # if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_SSSE3_NATIVE
php_base64_decode_ex(const unsigned char * str,size_t length,zend_bool strict)778 PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, zend_bool strict)
779 # elif ZEND_INTRIN_AVX2_RESOLVER
780 zend_string *php_base64_decode_ex_avx2(const unsigned char *str, size_t length, zend_bool strict)
781 # else
782 zend_string *php_base64_decode_ex_ssse3(const unsigned char *str, size_t length, zend_bool strict)
783 # endif
784 {
785 const unsigned char *c = str;
786 unsigned char *o;
787 size_t outl = 0;
788 zend_string *result;
789
790 result = zend_string_alloc(length, 0);
791 o = (unsigned char *)ZSTR_VAL(result);
792
793 /* See: "Faster Base64 Encoding and Decoding using AVX2 Instructions"
794 * https://arxiv.org/pdf/1704.00605.pdf */
795 # if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER
796 while (length > 31 + 11 + 2) {
797 __m256i lut_lo, lut_hi, lut_roll;
798 __m256i hi_nibbles, lo_nibbles, hi, lo;
799 __m256i str = _mm256_loadu_si256((__m256i *)c);
800
801 lut_lo = _mm256_setr_epi8(
802 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
803 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A,
804 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
805 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A);
806
807 lut_hi = _mm256_setr_epi8(
808 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
809 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
810 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
811 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10);
812
813 lut_roll = _mm256_setr_epi8(
814 0, 16, 19, 4, -65, -65, -71, -71,
815 0, 0, 0, 0, 0, 0, 0, 0,
816 0, 16, 19, 4, -65, -65, -71, -71,
817 0, 0, 0, 0, 0, 0, 0, 0);
818
819 hi_nibbles = _mm256_and_si256(_mm256_srli_epi32(str, 4), _mm256_set1_epi8(0x2f));
820 lo_nibbles = _mm256_and_si256(str, _mm256_set1_epi8(0x2f));
821 hi = _mm256_shuffle_epi8(lut_hi, hi_nibbles);
822 lo = _mm256_shuffle_epi8(lut_lo, lo_nibbles);
823
824 if (!_mm256_testz_si256(lo, hi)) {
825 break;
826 } else {
827 __m256i eq_2f, roll;
828 eq_2f = _mm256_cmpeq_epi8(str, _mm256_set1_epi8(0x2f));
829 roll = _mm256_shuffle_epi8(lut_roll, _mm256_add_epi8(eq_2f, hi_nibbles));
830
831
832 str = _mm256_add_epi8(str, roll);
833
834 str = php_base64_decode_avx2_reshuffle(str);
835
836 _mm256_storeu_si256((__m256i *)o, str);
837
838 c += 32;
839 o += 24;
840 outl += 24;
841 length -= 32;
842 }
843 }
844 # else
845 PHP_BASE64_DECODE_SSSE3_LOOP;
846 # endif
847
848 if (!php_base64_decode_impl(c, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) {
849 zend_string_efree(result);
850 return NULL;
851 }
852
853 ZSTR_LEN(result) = outl;
854
855 return result;
856 }
857
858 # if ZEND_INTRIN_SSSE3_RESOLVER && ZEND_INTRIN_AVX2_RESOLVER
php_base64_decode_ex_ssse3(const unsigned char * str,size_t length,zend_bool strict)859 zend_string *php_base64_decode_ex_ssse3(const unsigned char *str, size_t length, zend_bool strict)
860 {
861 const unsigned char *c = str;
862 unsigned char *o;
863 size_t outl = 0;
864 zend_string *result;
865
866 result = zend_string_alloc(length, 0);
867 o = (unsigned char *)ZSTR_VAL(result);
868
869 PHP_BASE64_DECODE_SSSE3_LOOP;
870
871 if (!php_base64_decode_impl(c, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) {
872 zend_string_efree(result);
873 return NULL;
874 }
875
876 ZSTR_LEN(result) = outl;
877
878 return result;
879 }
880 # endif
881 #endif /* ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER */
882
883 #if !ZEND_INTRIN_AVX2_NATIVE && !ZEND_INTRIN_SSSE3_NATIVE
884 #if ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_RESOLVER
php_base64_encode_default(const unsigned char * str,size_t length)885 zend_string *php_base64_encode_default(const unsigned char *str, size_t length)
886 #else
887 PHPAPI zend_string *php_base64_encode(const unsigned char *str, size_t length)
888 #endif
889 {
890 unsigned char *p;
891 zend_string *result;
892
893 result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0);
894 p = (unsigned char *)ZSTR_VAL(result);
895
896 p = php_base64_encode_impl(str, length, p);
897
898 ZSTR_LEN(result) = (p - (unsigned char *)ZSTR_VAL(result));
899
900 return result;
901 }
902 #endif
903
904 #if !ZEND_INTRIN_AVX2_NATIVE && !ZEND_INTRIN_SSSE3_NATIVE
905 #if ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_RESOLVER
php_base64_decode_ex_default(const unsigned char * str,size_t length,zend_bool strict)906 zend_string *php_base64_decode_ex_default(const unsigned char *str, size_t length, zend_bool strict)
907 #else
908 PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, zend_bool strict)
909 #endif
910 {
911 zend_string *result;
912 size_t outl = 0;
913
914 result = zend_string_alloc(length, 0);
915
916 if (!php_base64_decode_impl(str, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) {
917 zend_string_efree(result);
918 return NULL;
919 }
920
921 ZSTR_LEN(result) = outl;
922
923 return result;
924 }
925 #endif
926 /* }}} */
927
928 /* {{{ Encodes string using MIME base64 algorithm */
PHP_FUNCTION(base64_encode)929 PHP_FUNCTION(base64_encode)
930 {
931 char *str;
932 size_t str_len;
933 zend_string *result;
934
935 ZEND_PARSE_PARAMETERS_START(1, 1)
936 Z_PARAM_STRING(str, str_len)
937 ZEND_PARSE_PARAMETERS_END();
938
939 result = php_base64_encode((unsigned char*)str, str_len);
940 RETURN_STR(result);
941 }
942 /* }}} */
943
944 /* {{{ Decodes string using MIME base64 algorithm */
PHP_FUNCTION(base64_decode)945 PHP_FUNCTION(base64_decode)
946 {
947 char *str;
948 zend_bool strict = 0;
949 size_t str_len;
950 zend_string *result;
951
952 ZEND_PARSE_PARAMETERS_START(1, 2)
953 Z_PARAM_STRING(str, str_len)
954 Z_PARAM_OPTIONAL
955 Z_PARAM_BOOL(strict)
956 ZEND_PARSE_PARAMETERS_END();
957
958 result = php_base64_decode_ex((unsigned char*)str, str_len, strict);
959 if (result != NULL) {
960 RETURN_STR(result);
961 } else {
962 RETURN_FALSE;
963 }
964 }
965 /* }}} */
966