1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 7 |
4 +----------------------------------------------------------------------+
5 | Copyright (c) The PHP Group |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Author: Jim Winstead <jimw@php.net> |
16 | Xinchen Hui <laruence@php.net> |
17 +----------------------------------------------------------------------+
18 */
19
20 #include <string.h>
21
22 #include "php.h"
23 #include "base64.h"
24
25 /* {{{ base64 tables */
26 static const char base64_table[] = {
27 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
28 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
29 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
30 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
31 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', '\0'
32 };
33
34 static const char base64_pad = '=';
35
36 static const short base64_reverse_table[256] = {
37 -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1, -2, -2, -1, -2, -2,
38 -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
39 -1, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, 62, -2, -2, -2, 63,
40 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -2, -2, -2, -2, -2, -2,
41 -2, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
42 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -2, -2, -2, -2, -2,
43 -2, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
44 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -2, -2, -2, -2, -2,
45 -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
46 -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
47 -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
48 -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
49 -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
50 -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
51 -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
52 -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2
53 };
54 /* }}} */
55
56 #ifdef __aarch64__
57 #include <arm_neon.h>
58
encode_toascii(const uint8x16_t input,const uint8x16x2_t shift_LUT)59 static zend_always_inline uint8x16_t encode_toascii(const uint8x16_t input, const uint8x16x2_t shift_LUT)
60 {
61 /* reduce 0..51 -> 0
62 52..61 -> 1 .. 10
63 62 -> 11
64 63 -> 12 */
65 uint8x16_t result = vqsubq_u8(input, vdupq_n_u8(51));
66 /* distinguish between ranges 0..25 and 26..51:
67 0 .. 25 -> remains 0
68 26 .. 51 -> becomes 13 */
69 const uint8x16_t less = vcgtq_u8(vdupq_n_u8(26), input);
70 result = vorrq_u8(result, vandq_u8(less, vdupq_n_u8(13)));
71 /* read shift */
72 result = vqtbl2q_u8(shift_LUT, result);
73 return vaddq_u8(result, input);
74 }
75
neon_base64_encode(const unsigned char * in,size_t inl,unsigned char * out,size_t * left)76 static zend_always_inline unsigned char *neon_base64_encode(const unsigned char *in, size_t inl, unsigned char *out, size_t *left)
77 {
78 const uint8_t shift_LUT_[32] = {'a' - 26, '0' - 52, '0' - 52, '0' - 52,
79 '0' - 52, '0' - 52, '0' - 52, '0' - 52,
80 '0' - 52, '0' - 52, '0' - 52, '+' - 62,
81 '/' - 63, 'A', 0, 0,
82 'a' - 26, '0' - 52, '0' - 52, '0' - 52,
83 '0' - 52, '0' - 52, '0' - 52, '0' - 52,
84 '0' - 52, '0' - 52, '0' - 52, '+' - 62,
85 '/' - 63, 'A', 0, 0};
86 const uint8x16x2_t shift_LUT = *((const uint8x16x2_t *)shift_LUT_);
87 do {
88 /* [ccdddddd | bbbbcccc | aaaaaabb]
89 x.val[2] | x.val[1] | x.val[0] */
90 const uint8x16x3_t x = vld3q_u8((const uint8_t *)(in));
91
92 /* [00aa_aaaa] */
93 const uint8x16_t field_a = vshrq_n_u8(x.val[0], 2);
94
95 const uint8x16_t field_b = /* [00bb_bbbb] */
96 vbslq_u8(vdupq_n_u8(0x30), /* [0011_0000] */
97 vshlq_n_u8(x.val[0], 4), /* [aabb_0000] */
98 vshrq_n_u8(x.val[1], 4)); /* [0000_bbbb] */
99
100 const uint8x16_t field_c = /* [00cc_cccc] */
101 vbslq_u8(vdupq_n_u8(0x3c), /* [0011_1100] */
102 vshlq_n_u8(x.val[1], 2), /* [bbcc_cc00] */
103 vshrq_n_u8(x.val[2], 6)); /* [0000_00cc] */
104
105 /* [00dd_dddd] */
106 const uint8x16_t field_d = vandq_u8(x.val[2], vdupq_n_u8(0x3f));
107
108 uint8x16x4_t result;
109 result.val[0] = encode_toascii(field_a, shift_LUT);
110 result.val[1] = encode_toascii(field_b, shift_LUT);
111 result.val[2] = encode_toascii(field_c, shift_LUT);
112 result.val[3] = encode_toascii(field_d, shift_LUT);
113
114 vst4q_u8((uint8_t *)out, result);
115 out += 64;
116 in += 16 * 3;
117 inl -= 16 * 3;
118 } while (inl >= 16 * 3);
119
120 *left = inl;
121 return out;
122 }
123 #endif /* __aarch64__ */
124
php_base64_encode_impl(const unsigned char * in,size_t inl,unsigned char * out)125 static zend_always_inline unsigned char *php_base64_encode_impl(const unsigned char *in, size_t inl, unsigned char *out) /* {{{ */
126 {
127 #ifdef __aarch64__
128 if (inl >= 16 * 3) {
129 size_t left = 0;
130 out = neon_base64_encode(in, inl, out, &left);
131 in += inl - left;
132 inl = left;
133 }
134 #endif
135
136 while (inl > 2) { /* keep going until we have less than 24 bits */
137 *out++ = base64_table[in[0] >> 2];
138 *out++ = base64_table[((in[0] & 0x03) << 4) + (in[1] >> 4)];
139 *out++ = base64_table[((in[1] & 0x0f) << 2) + (in[2] >> 6)];
140 *out++ = base64_table[in[2] & 0x3f];
141
142 in += 3;
143 inl -= 3; /* we just handle 3 octets of data */
144 }
145
146 /* now deal with the tail end of things */
147 if (inl != 0) {
148 *out++ = base64_table[in[0] >> 2];
149 if (inl > 1) {
150 *out++ = base64_table[((in[0] & 0x03) << 4) + (in[1] >> 4)];
151 *out++ = base64_table[(in[1] & 0x0f) << 2];
152 *out++ = base64_pad;
153 } else {
154 *out++ = base64_table[(in[0] & 0x03) << 4];
155 *out++ = base64_pad;
156 *out++ = base64_pad;
157 }
158 }
159
160 *out = '\0';
161
162 return out;
163 }
164 /* }}} */
165
166 #ifdef __aarch64__
decode_fromascii(const uint8x16_t input,uint8x16_t * error,const uint8x16x2_t shiftLUT,const uint8x16x2_t maskLUT,const uint8x16x2_t bitposLUT)167 static zend_always_inline uint8x16_t decode_fromascii(const uint8x16_t input, uint8x16_t *error, const uint8x16x2_t shiftLUT, const uint8x16x2_t maskLUT, const uint8x16x2_t bitposLUT) {
168 const uint8x16_t higher_nibble = vshrq_n_u8(input, 4);
169 const uint8x16_t lower_nibble = vandq_u8(input, vdupq_n_u8(0x0f));
170 const uint8x16_t sh = vqtbl2q_u8(shiftLUT, higher_nibble);
171 const uint8x16_t eq_2f = vceqq_u8(input, vdupq_n_u8(0x2f));
172 const uint8x16_t shift = vbslq_u8(eq_2f, vdupq_n_u8(16), sh);
173 const uint8x16_t M = vqtbl2q_u8(maskLUT, lower_nibble);
174 const uint8x16_t bit = vqtbl2q_u8(bitposLUT, higher_nibble);
175 *error = vceqq_u8(vandq_u8(M, bit), vdupq_n_u8(0));
176 return vaddq_u8(input, shift);
177 }
178
neon_base64_decode(const unsigned char * in,size_t inl,unsigned char * out,size_t * left)179 static zend_always_inline size_t neon_base64_decode(const unsigned char *in, size_t inl, unsigned char *out, size_t *left) {
180 unsigned char *out_orig = out;
181 const uint8_t shiftLUT_[32] = {
182 0, 0, 19, 4, (uint8_t)-65, (uint8_t)-65, (uint8_t)-71, (uint8_t)-71,
183 0, 0, 0, 0, 0, 0, 0, 0,
184 0, 0, 19, 4, (uint8_t)-65, (uint8_t)-65, (uint8_t)-71, (uint8_t)-71,
185 0, 0, 0, 0, 0, 0, 0, 0};
186 const uint8_t maskLUT_[32] = {
187 /* 0 : 0b1010_1000*/ 0xa8,
188 /* 1 .. 9 : 0b1111_1000*/ 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
189 /* 10 : 0b1111_0000*/ 0xf0,
190 /* 11 : 0b0101_0100*/ 0x54,
191 /* 12 .. 14 : 0b0101_0000*/ 0x50, 0x50, 0x50,
192 /* 15 : 0b0101_0100*/ 0x54,
193
194 /* 0 : 0b1010_1000*/ 0xa8,
195 /* 1 .. 9 : 0b1111_1000*/ 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
196 /* 10 : 0b1111_0000*/ 0xf0,
197 /* 11 : 0b0101_0100*/ 0x54,
198 /* 12 .. 14 : 0b0101_0000*/ 0x50, 0x50, 0x50,
199 /* 15 : 0b0101_0100*/ 0x54
200 };
201 const uint8_t bitposLUT_[32] = {
202 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
203 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
204
205 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
206 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
207 };
208 const uint8x16x2_t shiftLUT = *((const uint8x16x2_t *)shiftLUT_);
209 const uint8x16x2_t maskLUT = *((const uint8x16x2_t *)maskLUT_);
210 const uint8x16x2_t bitposLUT = *((const uint8x16x2_t *)bitposLUT_);;
211
212 do {
213 const uint8x16x4_t x = vld4q_u8((const unsigned char *)in);
214 uint8x16_t error_a;
215 uint8x16_t error_b;
216 uint8x16_t error_c;
217 uint8x16_t error_d;
218 uint8x16_t field_a = decode_fromascii(x.val[0], &error_a, shiftLUT, maskLUT, bitposLUT);
219 uint8x16_t field_b = decode_fromascii(x.val[1], &error_b, shiftLUT, maskLUT, bitposLUT);
220 uint8x16_t field_c = decode_fromascii(x.val[2], &error_c, shiftLUT, maskLUT, bitposLUT);
221 uint8x16_t field_d = decode_fromascii(x.val[3], &error_d, shiftLUT, maskLUT, bitposLUT);
222
223 const uint8x16_t err = vorrq_u8(vorrq_u8(error_a, error_b), vorrq_u8(error_c, error_d));
224 union {uint8_t mem[16]; uint64_t dw[2]; } error;
225 vst1q_u8(error.mem, err);
226
227 /* Check that the input only contains bytes belonging to the alphabet of
228 Base64. If there are errors, decode the rest of the string with the
229 scalar decoder. */
230 if (error.dw[0] | error.dw[1])
231 break;
232
233 uint8x16x3_t result;
234 result.val[0] = vorrq_u8(vshrq_n_u8(field_b, 4), vshlq_n_u8(field_a, 2));
235 result.val[1] = vorrq_u8(vshrq_n_u8(field_c, 2), vshlq_n_u8(field_b, 4));
236 result.val[2] = vorrq_u8(field_d, vshlq_n_u8(field_c, 6));
237
238 vst3q_u8((unsigned char *)out, result);
239 out += 16 * 3;
240 in += 16 * 4;
241 inl -= 16 * 4;
242 } while (inl >= 16 * 4);
243 *left = inl;
244 return out - out_orig;
245 }
246 #endif /* __aarch64__ */
247
php_base64_decode_impl(const unsigned char * in,size_t inl,unsigned char * out,size_t * outl,zend_bool strict)248 static zend_always_inline int php_base64_decode_impl(const unsigned char *in, size_t inl, unsigned char *out, size_t *outl, zend_bool strict) /* {{{ */
249 {
250 int ch;
251 size_t i = 0, padding = 0, j = *outl;
252
253 #ifdef __aarch64__
254 if (inl >= 16 * 4) {
255 size_t left = 0;
256 j += neon_base64_decode(in, inl, out, &left);
257 i = inl - left;
258 in += i;
259 inl = left;
260 }
261 #endif
262
263 /* run through the whole string, converting as we go */
264 while (inl-- > 0) {
265 ch = *in++;
266 if (ch == base64_pad) {
267 padding++;
268 continue;
269 }
270
271 ch = base64_reverse_table[ch];
272 if (!strict) {
273 /* skip unknown characters and whitespace */
274 if (ch < 0) {
275 continue;
276 }
277 } else {
278 /* skip whitespace */
279 if (ch == -1) {
280 continue;
281 }
282 /* fail on bad characters or if any data follows padding */
283 if (ch == -2 || padding) {
284 goto fail;
285 }
286 }
287
288 switch (i % 4) {
289 case 0:
290 out[j] = ch << 2;
291 break;
292 case 1:
293 out[j++] |= ch >> 4;
294 out[j] = (ch & 0x0f) << 4;
295 break;
296 case 2:
297 out[j++] |= ch >>2;
298 out[j] = (ch & 0x03) << 6;
299 break;
300 case 3:
301 out[j++] |= ch;
302 break;
303 }
304 i++;
305 }
306
307 /* fail if the input is truncated (only one char in last group) */
308 if (strict && i % 4 == 1) {
309 goto fail;
310 }
311
312 /* fail if the padding length is wrong (not VV==, VVV=), but accept zero padding
313 * RFC 4648: "In some circumstances, the use of padding [--] is not required" */
314 if (strict && padding && (padding > 2 || (i + padding) % 4 != 0)) {
315 goto fail;
316 }
317
318 *outl = j;
319 out[j] = '\0';
320
321 return 1;
322
323 fail:
324 return 0;
325 }
326 /* }}} */
327
328 /* {{{ php_base64_encode */
329
330 #if ZEND_INTRIN_AVX2_NATIVE
331 # undef ZEND_INTRIN_SSSE3_NATIVE
332 # undef ZEND_INTRIN_SSSE3_RESOLVER
333 # undef ZEND_INTRIN_SSSE3_FUNC_PROTO
334 # undef ZEND_INTRIN_SSSE3_FUNC_PTR
335 #elif ZEND_INTRIN_AVX2_FUNC_PROTO && ZEND_INTRIN_SSSE3_NATIVE
336 # undef ZEND_INTRIN_SSSE3_NATIVE
337 # undef ZEND_INTRIN_SSSE3_RESOLVER
338 # define ZEND_INTRIN_SSSE3_RESOLVER 1
339 # define ZEND_INTRIN_SSSE3_FUNC_PROTO 1
340 # undef ZEND_INTRIN_SSSE3_FUNC_DECL
341 # ifdef HAVE_FUNC_ATTRIBUTE_TARGET
342 # define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func __attribute__((target("ssse3")))
343 # else
344 # define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func
345 # endif
346 #elif ZEND_INTRIN_AVX2_FUNC_PTR && ZEND_INTRIN_SSSE3_NATIVE
347 # undef ZEND_INTRIN_SSSE3_NATIVE
348 # undef ZEND_INTRIN_SSSE3_RESOLVER
349 # define ZEND_INTRIN_SSSE3_RESOLVER 1
350 # define ZEND_INTRIN_SSSE3_FUNC_PTR 1
351 # undef ZEND_INTRIN_SSSE3_FUNC_DECL
352 # ifdef HAVE_FUNC_ATTRIBUTE_TARGET
353 # define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func __attribute__((target("ssse3")))
354 # else
355 # define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func
356 # endif
357 #endif
358
359 #if ZEND_INTRIN_AVX2_NATIVE
360 # include <immintrin.h>
361 #elif ZEND_INTRIN_SSSE3_NATIVE
362 # include <tmmintrin.h>
363 #elif (ZEND_INTRIN_SSSE3_RESOLVER || ZEND_INTRIN_AVX2_RESOLVER)
364 # if ZEND_INTRIN_AVX2_RESOLVER
365 # include <immintrin.h>
366 # else
367 # include <tmmintrin.h>
368 # endif /* (ZEND_INTRIN_SSSE3_RESOLVER || ZEND_INTRIN_AVX2_RESOLVER) */
369 # include "Zend/zend_cpuinfo.h"
370
371 # if ZEND_INTRIN_AVX2_RESOLVER
372 ZEND_INTRIN_AVX2_FUNC_DECL(zend_string *php_base64_encode_avx2(const unsigned char *str, size_t length));
373 ZEND_INTRIN_AVX2_FUNC_DECL(zend_string *php_base64_decode_ex_avx2(const unsigned char *str, size_t length, zend_bool strict));
374 # endif
375
376 # if ZEND_INTRIN_SSSE3_RESOLVER
377 ZEND_INTRIN_SSSE3_FUNC_DECL(zend_string *php_base64_encode_ssse3(const unsigned char *str, size_t length));
378 ZEND_INTRIN_SSSE3_FUNC_DECL(zend_string *php_base64_decode_ex_ssse3(const unsigned char *str, size_t length, zend_bool strict));
379 # endif
380
381 zend_string *php_base64_encode_default(const unsigned char *str, size_t length);
382 zend_string *php_base64_decode_ex_default(const unsigned char *str, size_t length, zend_bool strict);
383
384 # if (ZEND_INTRIN_AVX2_FUNC_PROTO || ZEND_INTRIN_SSSE3_FUNC_PROTO)
385 PHPAPI zend_string *php_base64_encode(const unsigned char *str, size_t length) __attribute__((ifunc("resolve_base64_encode")));
386 PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, zend_bool strict) __attribute__((ifunc("resolve_base64_decode")));
387
388 ZEND_NO_SANITIZE_ADDRESS
389 ZEND_ATTRIBUTE_UNUSED /* clang mistakenly warns about this */
resolve_base64_encode()390 static void *resolve_base64_encode() {
391 # if ZEND_INTRIN_AVX2_FUNC_PROTO
392 if (zend_cpu_supports_avx2()) {
393 return php_base64_encode_avx2;
394 } else
395 # endif
396 #if ZEND_INTRIN_SSSE3_FUNC_PROTO
397 if (zend_cpu_supports_ssse3()) {
398 return php_base64_encode_ssse3;
399 }
400 #endif
401 return php_base64_encode_default;
402 }
403
404 ZEND_NO_SANITIZE_ADDRESS
405 ZEND_ATTRIBUTE_UNUSED /* clang mistakenly warns about this */
resolve_base64_decode()406 static void *resolve_base64_decode() {
407 # if ZEND_INTRIN_AVX2_FUNC_PROTO
408 if (zend_cpu_supports_avx2()) {
409 return php_base64_decode_ex_avx2;
410 } else
411 # endif
412 #if ZEND_INTRIN_SSSE3_FUNC_PROTO
413 if (zend_cpu_supports_ssse3()) {
414 return php_base64_decode_ex_ssse3;
415 }
416 #endif
417 return php_base64_decode_ex_default;
418 }
419 # else /* (ZEND_INTRIN_AVX2_FUNC_PROTO || ZEND_INTRIN_SSSE3_FUNC_PROTO) */
420
421 PHPAPI zend_string *(*php_base64_encode_ptr)(const unsigned char *str, size_t length) = NULL;
422 PHPAPI zend_string *(*php_base64_decode_ex_ptr)(const unsigned char *str, size_t length, zend_bool strict) = NULL;
423
php_base64_encode(const unsigned char * str,size_t length)424 PHPAPI zend_string *php_base64_encode(const unsigned char *str, size_t length) {
425 return php_base64_encode_ptr(str, length);
426 }
php_base64_decode_ex(const unsigned char * str,size_t length,zend_bool strict)427 PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, zend_bool strict) {
428 return php_base64_decode_ex_ptr(str, length, strict);
429 }
430
PHP_MINIT_FUNCTION(base64_intrin)431 PHP_MINIT_FUNCTION(base64_intrin)
432 {
433 # if ZEND_INTRIN_AVX2_FUNC_PTR
434 if (zend_cpu_supports_avx2()) {
435 php_base64_encode_ptr = php_base64_encode_avx2;
436 php_base64_decode_ex_ptr = php_base64_decode_ex_avx2;
437 } else
438 # endif
439 #if ZEND_INTRIN_SSSE3_FUNC_PTR
440 if (zend_cpu_supports_ssse3()) {
441 php_base64_encode_ptr = php_base64_encode_ssse3;
442 php_base64_decode_ex_ptr = php_base64_decode_ex_ssse3;
443 } else
444 #endif
445 {
446 php_base64_encode_ptr = php_base64_encode_default;
447 php_base64_decode_ex_ptr = php_base64_decode_ex_default;
448 }
449 return SUCCESS;
450 }
451 # endif /* (ZEND_INTRIN_AVX2_FUNC_PROTO || ZEND_INTRIN_SSSE3_FUNC_PROTO) */
452 #endif /* ZEND_INTRIN_AVX2_NATIVE */
453
454 #if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER
455 # if ZEND_INTRIN_AVX2_RESOLVER && defined(HAVE_FUNC_ATTRIBUTE_TARGET)
456 static __m256i php_base64_encode_avx2_reshuffle(__m256i in) __attribute__((target("avx2")));
457 static __m256i php_base64_encode_avx2_translate(__m256i in) __attribute__((target("avx2")));
458 # endif
php_base64_encode_avx2_reshuffle(__m256i in)459 static __m256i php_base64_encode_avx2_reshuffle(__m256i in)
460 {
461 /* This one works with shifted (4 bytes) input in order to
462 * be able to work efficiently in the 2 128-bit lanes */
463 __m256i t0, t1, t2, t3;
464
465 /* input, bytes MSB to LSB:
466 * 0 0 0 0 x w v u t s r q p o n m
467 * l k j i h g f e d c b a 0 0 0 0 */
468 in = _mm256_shuffle_epi8(in, _mm256_set_epi8(
469 10, 11, 9, 10,
470 7, 8, 6, 7,
471 4, 5, 3, 4,
472 1, 2, 0, 1,
473
474 14, 15, 13, 14,
475 11, 12, 10, 11,
476 8, 9, 7, 8,
477 5, 6, 4, 5));
478
479 t0 = _mm256_and_si256(in, _mm256_set1_epi32(0x0fc0fc00));
480
481 t1 = _mm256_mulhi_epu16(t0, _mm256_set1_epi32(0x04000040));
482
483 t2 = _mm256_and_si256(in, _mm256_set1_epi32(0x003f03f0));
484
485 t3 = _mm256_mullo_epi16(t2, _mm256_set1_epi32(0x01000010));
486
487 return _mm256_or_si256(t1, t3);
488 /* 00xxxxxx 00wwwwXX 00vvWWWW 00VVVVVV
489 * 00uuuuuu 00ttttUU 00ssTTTT 00SSSSSS
490 * 00rrrrrr 00qqqqRR 00ppQQQQ 00PPPPPP
491 * 00oooooo 00nnnnOO 00mmNNNN 00MMMMMM
492 * 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ
493 * 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG
494 * 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD
495 * 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA */
496 }
497
php_base64_encode_avx2_translate(__m256i in)498 static __m256i php_base64_encode_avx2_translate(__m256i in)
499 {
500 __m256i lut, indices, mask;
501
502 lut = _mm256_setr_epi8(
503 65, 71, -4, -4, -4, -4, -4, -4,
504 -4, -4, -4, -4, -19, -16, 0, 0,
505 65, 71, -4, -4, -4, -4, -4, -4,
506 -4, -4, -4, -4, -19, -16, 0, 0);
507
508 indices = _mm256_subs_epu8(in, _mm256_set1_epi8(51));
509
510 mask = _mm256_cmpgt_epi8(in, _mm256_set1_epi8(25));
511
512 indices = _mm256_sub_epi8(indices, mask);
513
514 return _mm256_add_epi8(in, _mm256_shuffle_epi8(lut, indices));
515
516 }
517 #endif /* ZEND_INTRIN_AVX2_NATIVE || (ZEND_INTRIN_AVX2_RESOLVER && !ZEND_INTRIN_SSSE3_NATIVE) */
518
519 #if ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER
520
521 # if ZEND_INTRIN_SSSE3_RESOLVER && defined(HAVE_FUNC_ATTRIBUTE_TARGET)
522 static __m128i php_base64_encode_ssse3_reshuffle(__m128i in) __attribute__((target("ssse3")));
523 static __m128i php_base64_encode_ssse3_translate(__m128i in) __attribute__((target("ssse3")));
524 # endif
525
php_base64_encode_ssse3_reshuffle(__m128i in)526 static __m128i php_base64_encode_ssse3_reshuffle(__m128i in)
527 {
528 __m128i t0, t1, t2, t3;
529
530 /* input, bytes MSB to LSB:
531 * 0 0 0 0 l k j i h g f e d c b a */
532 in = _mm_shuffle_epi8(in, _mm_set_epi8(
533 10, 11, 9, 10,
534 7, 8, 6, 7,
535 4, 5, 3, 4,
536 1, 2, 0, 1));
537
538 t0 = _mm_and_si128(in, _mm_set1_epi32(0x0fc0fc00));
539
540 t1 = _mm_mulhi_epu16(t0, _mm_set1_epi32(0x04000040));
541
542 t2 = _mm_and_si128(in, _mm_set1_epi32(0x003f03f0));
543
544 t3 = _mm_mullo_epi16(t2, _mm_set1_epi32(0x01000010));
545
546 /* output (upper case are MSB, lower case are LSB):
547 * 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ
548 * 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG
549 * 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD
550 * 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA */
551 return _mm_or_si128(t1, t3);
552 }
553
php_base64_encode_ssse3_translate(__m128i in)554 static __m128i php_base64_encode_ssse3_translate(__m128i in)
555 {
556 __m128i mask, indices;
557 __m128i lut = _mm_setr_epi8(
558 65, 71, -4, -4,
559 -4, -4, -4, -4,
560 -4, -4, -4, -4,
561 -19, -16, 0, 0
562 );
563
564 /* Translate values 0..63 to the Base64 alphabet. There are five sets:
565 * # From To Abs Index Characters
566 * 0 [0..25] [65..90] +65 0 ABCDEFGHIJKLMNOPQRSTUVWXYZ
567 * 1 [26..51] [97..122] +71 1 abcdefghijklmnopqrstuvwxyz
568 * 2 [52..61] [48..57] -4 [2..11] 0123456789
569 * 3 [62] [43] -19 12 +
570 * 4 [63] [47] -16 13 / */
571
572 /* Create LUT indices from input:
573 * the index for range #0 is right, others are 1 less than expected: */
574 indices = _mm_subs_epu8(in, _mm_set1_epi8(51));
575
576 /* mask is 0xFF (-1) for range #[1..4] and 0x00 for range #0: */
577 mask = _mm_cmpgt_epi8(in, _mm_set1_epi8(25));
578
579 /* subtract -1, so add 1 to indices for range #[1..4], All indices are now correct: */
580 indices = _mm_sub_epi8(indices, mask);
581
582 /* Add offsets to input values: */
583 return _mm_add_epi8(in, _mm_shuffle_epi8(lut, indices));
584 }
585
586 #define PHP_BASE64_ENCODE_SSSE3_LOOP \
587 while (length > 15) { \
588 __m128i s = _mm_loadu_si128((__m128i *)c); \
589 \
590 s = php_base64_encode_ssse3_reshuffle(s); \
591 \
592 s = php_base64_encode_ssse3_translate(s); \
593 \
594 _mm_storeu_si128((__m128i *)o, s); \
595 c += 12; \
596 o += 16; \
597 length -= 12; \
598 }
599
600 #endif /* ZEND_INTRIN_SSSE3_NATIVE || (ZEND_INTRIN_SSSE3_RESOLVER && !ZEND_INTRIN_AVX2_NATIVE) */
601
602 #if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER
603 # if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_SSSE3_NATIVE
php_base64_encode(const unsigned char * str,size_t length)604 PHPAPI zend_string *php_base64_encode(const unsigned char *str, size_t length)
605 # elif ZEND_INTRIN_AVX2_RESOLVER
606 zend_string *php_base64_encode_avx2(const unsigned char *str, size_t length)
607 # else /* ZEND_INTRIN_SSSE3_RESOLVER */
608 zend_string *php_base64_encode_ssse3(const unsigned char *str, size_t length)
609 # endif
610 {
611 const unsigned char *c = str;
612 unsigned char *o;
613 zend_string *result;
614
615 result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0);
616 o = (unsigned char *)ZSTR_VAL(result);
617 # if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER
618 if (length > 31) {
619 __m256i s = _mm256_loadu_si256((__m256i *)c);
620
621 s = _mm256_permutevar8x32_epi32(s, _mm256_setr_epi32(0, 0, 1, 2, 3, 4, 5, 6));
622
623 for (;;) {
624 s = php_base64_encode_avx2_reshuffle(s);
625
626 s = php_base64_encode_avx2_translate(s);
627
628 _mm256_storeu_si256((__m256i *)o, s);
629 c += 24;
630 o += 32;
631 length -= 24;
632 if (length < 28) {
633 break;
634 }
635 s = _mm256_loadu_si256((__m256i *)(c - 4));
636 }
637 }
638 # else
639 PHP_BASE64_ENCODE_SSSE3_LOOP;
640 # endif
641
642 o = php_base64_encode_impl(c, length, o);
643
644 ZSTR_LEN(result) = (o - (unsigned char *)ZSTR_VAL(result));
645
646 return result;
647 }
648
649 # if ZEND_INTRIN_SSSE3_RESOLVER && ZEND_INTRIN_AVX2_RESOLVER
php_base64_encode_ssse3(const unsigned char * str,size_t length)650 zend_string *php_base64_encode_ssse3(const unsigned char *str, size_t length)
651 {
652 const unsigned char *c = str;
653 unsigned char *o;
654 zend_string *result;
655
656 result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0);
657 o = (unsigned char *)ZSTR_VAL(result);
658
659 PHP_BASE64_ENCODE_SSSE3_LOOP;
660
661 o = php_base64_encode_impl(c, length, o);
662
663 ZSTR_LEN(result) = (o - (unsigned char *)ZSTR_VAL(result));
664
665 return result;
666 }
667 # endif
668 #endif /* ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER */
669
670 /* }}} */
671
672 #if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER
673 # if ZEND_INTRIN_AVX2_RESOLVER && defined(HAVE_FUNC_ATTRIBUTE_TARGET)
674 static __m256i php_base64_decode_avx2_reshuffle(__m256i in) __attribute__((target("avx2")));
675 # endif
676
php_base64_decode_avx2_reshuffle(__m256i in)677 static __m256i php_base64_decode_avx2_reshuffle(__m256i in)
678 {
679 __m256i merge_ab_and_bc, out;
680
681 merge_ab_and_bc = _mm256_maddubs_epi16(in, _mm256_set1_epi32(0x01400140));
682
683 out = _mm256_madd_epi16(merge_ab_and_bc, _mm256_set1_epi32(0x00011000));
684
685 out = _mm256_shuffle_epi8(out, _mm256_setr_epi8(
686 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1,
687 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1));
688
689 return _mm256_permutevar8x32_epi32(out, _mm256_setr_epi32(0, 1, 2, 4, 5, 6, -1, -1));
690 }
691 #endif
692
693 #if ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER
694 # if ZEND_INTRIN_SSSE3_RESOLVER && defined(HAVE_FUNC_ATTRIBUTE_TARGET)
695 static __m128i php_base64_decode_ssse3_reshuffle(__m128i in) __attribute__((target("ssse3")));
696 # endif
697
php_base64_decode_ssse3_reshuffle(__m128i in)698 static __m128i php_base64_decode_ssse3_reshuffle(__m128i in)
699 {
700 __m128i merge_ab_and_bc, out;
701
702 merge_ab_and_bc = _mm_maddubs_epi16(in, _mm_set1_epi32(0x01400140));
703 /* 0000kkkk LLllllll 0000JJJJ JJjjKKKK
704 * 0000hhhh IIiiiiii 0000GGGG GGggHHHH
705 * 0000eeee FFffffff 0000DDDD DDddEEEE
706 * 0000bbbb CCcccccc 0000AAAA AAaaBBBB */
707
708 out = _mm_madd_epi16(merge_ab_and_bc, _mm_set1_epi32(0x00011000));
709 /* 00000000 JJJJJJjj KKKKkkkk LLllllll
710 * 00000000 GGGGGGgg HHHHhhhh IIiiiiii
711 * 00000000 DDDDDDdd EEEEeeee FFffffff
712 * 00000000 AAAAAAaa BBBBbbbb CCcccccc */
713
714 return _mm_shuffle_epi8(out, _mm_setr_epi8(
715 2, 1, 0,
716 6, 5, 4,
717 10, 9, 8,
718 14, 13, 12,
719 -1, -1, -1, -1));
720 /* 00000000 00000000 00000000 00000000
721 * LLllllll KKKKkkkk JJJJJJjj IIiiiiii
722 * HHHHhhhh GGGGGGgg FFffffff EEEEeeee
723 * DDDDDDdd CCcccccc BBBBbbbb AAAAAAaa */
724 }
725
726 #define PHP_BASE64_DECODE_SSSE3_LOOP \
727 while (length > 15 + 6 + 2) { \
728 __m128i lut_lo, lut_hi, lut_roll; \
729 __m128i hi_nibbles, lo_nibbles, hi, lo; \
730 __m128i s = _mm_loadu_si128((__m128i *)c); \
731 \
732 lut_lo = _mm_setr_epi8( \
733 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, \
734 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A); \
735 lut_hi = _mm_setr_epi8( \
736 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08, \
737 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10); \
738 lut_roll = _mm_setr_epi8( \
739 0, 16, 19, 4, -65, -65, -71, -71, \
740 0, 0, 0, 0, 0, 0, 0, 0); \
741 \
742 hi_nibbles = _mm_and_si128( \
743 _mm_srli_epi32(s, 4), _mm_set1_epi8(0x2f)); \
744 lo_nibbles = _mm_and_si128(s, _mm_set1_epi8(0x2f)); \
745 hi = _mm_shuffle_epi8(lut_hi, hi_nibbles); \
746 lo = _mm_shuffle_epi8(lut_lo, lo_nibbles); \
747 \
748 \
749 if (UNEXPECTED( \
750 _mm_movemask_epi8( \
751 _mm_cmpgt_epi8( \
752 _mm_and_si128(lo, hi), _mm_set1_epi8(0))))) { \
753 break; \
754 } else { \
755 __m128i eq_2f, roll; \
756 \
757 eq_2f = _mm_cmpeq_epi8(s, _mm_set1_epi8(0x2f)); \
758 roll = _mm_shuffle_epi8( \
759 lut_roll, _mm_add_epi8(eq_2f, hi_nibbles)); \
760 \
761 s = _mm_add_epi8(s, roll); \
762 s = php_base64_decode_ssse3_reshuffle(s); \
763 \
764 _mm_storeu_si128((__m128i *)o, s); \
765 \
766 c += 16; \
767 o += 12; \
768 outl += 12; \
769 length -= 16; \
770 } \
771 }
772
773 #endif
774
775 #if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER
776 # if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_SSSE3_NATIVE
php_base64_decode_ex(const unsigned char * str,size_t length,zend_bool strict)777 PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, zend_bool strict)
778 # elif ZEND_INTRIN_AVX2_RESOLVER
779 zend_string *php_base64_decode_ex_avx2(const unsigned char *str, size_t length, zend_bool strict)
780 # else
781 zend_string *php_base64_decode_ex_ssse3(const unsigned char *str, size_t length, zend_bool strict)
782 # endif
783 {
784 const unsigned char *c = str;
785 unsigned char *o;
786 size_t outl = 0;
787 zend_string *result;
788
789 result = zend_string_alloc(length, 0);
790 o = (unsigned char *)ZSTR_VAL(result);
791
792 /* See: "Faster Base64 Encoding and Decoding using AVX2 Instructions"
793 * https://arxiv.org/pdf/1704.00605.pdf */
794 # if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER
795 while (length > 31 + 11 + 2) {
796 __m256i lut_lo, lut_hi, lut_roll;
797 __m256i hi_nibbles, lo_nibbles, hi, lo;
798 __m256i str = _mm256_loadu_si256((__m256i *)c);
799
800 lut_lo = _mm256_setr_epi8(
801 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
802 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A,
803 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
804 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A);
805
806 lut_hi = _mm256_setr_epi8(
807 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
808 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
809 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
810 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10);
811
812 lut_roll = _mm256_setr_epi8(
813 0, 16, 19, 4, -65, -65, -71, -71,
814 0, 0, 0, 0, 0, 0, 0, 0,
815 0, 16, 19, 4, -65, -65, -71, -71,
816 0, 0, 0, 0, 0, 0, 0, 0);
817
818 hi_nibbles = _mm256_and_si256(_mm256_srli_epi32(str, 4), _mm256_set1_epi8(0x2f));
819 lo_nibbles = _mm256_and_si256(str, _mm256_set1_epi8(0x2f));
820 hi = _mm256_shuffle_epi8(lut_hi, hi_nibbles);
821 lo = _mm256_shuffle_epi8(lut_lo, lo_nibbles);
822
823 if (!_mm256_testz_si256(lo, hi)) {
824 break;
825 } else {
826 __m256i eq_2f, roll;
827 eq_2f = _mm256_cmpeq_epi8(str, _mm256_set1_epi8(0x2f));
828 roll = _mm256_shuffle_epi8(lut_roll, _mm256_add_epi8(eq_2f, hi_nibbles));
829
830
831 str = _mm256_add_epi8(str, roll);
832
833 str = php_base64_decode_avx2_reshuffle(str);
834
835 _mm256_storeu_si256((__m256i *)o, str);
836
837 c += 32;
838 o += 24;
839 outl += 24;
840 length -= 32;
841 }
842 }
843 # else
844 PHP_BASE64_DECODE_SSSE3_LOOP;
845 # endif
846
847 if (!php_base64_decode_impl(c, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) {
848 zend_string_efree(result);
849 return NULL;
850 }
851
852 ZSTR_LEN(result) = outl;
853
854 return result;
855 }
856
857 # if ZEND_INTRIN_SSSE3_RESOLVER && ZEND_INTRIN_AVX2_RESOLVER
php_base64_decode_ex_ssse3(const unsigned char * str,size_t length,zend_bool strict)858 zend_string *php_base64_decode_ex_ssse3(const unsigned char *str, size_t length, zend_bool strict)
859 {
860 const unsigned char *c = str;
861 unsigned char *o;
862 size_t outl = 0;
863 zend_string *result;
864
865 result = zend_string_alloc(length, 0);
866 o = (unsigned char *)ZSTR_VAL(result);
867
868 PHP_BASE64_DECODE_SSSE3_LOOP;
869
870 if (!php_base64_decode_impl(c, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) {
871 zend_string_efree(result);
872 return NULL;
873 }
874
875 ZSTR_LEN(result) = outl;
876
877 return result;
878 }
879 # endif
880 #endif /* ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER */
881
882 #if !ZEND_INTRIN_AVX2_NATIVE && !ZEND_INTRIN_SSSE3_NATIVE
883 #if ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_RESOLVER
php_base64_encode_default(const unsigned char * str,size_t length)884 zend_string *php_base64_encode_default(const unsigned char *str, size_t length)
885 #else
886 PHPAPI zend_string *php_base64_encode(const unsigned char *str, size_t length)
887 #endif
888 {
889 unsigned char *p;
890 zend_string *result;
891
892 result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0);
893 p = (unsigned char *)ZSTR_VAL(result);
894
895 p = php_base64_encode_impl(str, length, p);
896
897 ZSTR_LEN(result) = (p - (unsigned char *)ZSTR_VAL(result));
898
899 return result;
900 }
901 #endif
902
903 #if !ZEND_INTRIN_AVX2_NATIVE && !ZEND_INTRIN_SSSE3_NATIVE
904 #if ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_RESOLVER
php_base64_decode_ex_default(const unsigned char * str,size_t length,zend_bool strict)905 zend_string *php_base64_decode_ex_default(const unsigned char *str, size_t length, zend_bool strict)
906 #else
907 PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, zend_bool strict)
908 #endif
909 {
910 zend_string *result;
911 size_t outl = 0;
912
913 result = zend_string_alloc(length, 0);
914
915 if (!php_base64_decode_impl(str, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) {
916 zend_string_efree(result);
917 return NULL;
918 }
919
920 ZSTR_LEN(result) = outl;
921
922 return result;
923 }
924 #endif
925 /* }}} */
926
927 /* {{{ proto string base64_encode(string str)
928 Encodes string using MIME base64 algorithm */
PHP_FUNCTION(base64_encode)929 PHP_FUNCTION(base64_encode)
930 {
931 char *str;
932 size_t str_len;
933 zend_string *result;
934
935 ZEND_PARSE_PARAMETERS_START(1, 1)
936 Z_PARAM_STRING(str, str_len)
937 ZEND_PARSE_PARAMETERS_END();
938
939 result = php_base64_encode((unsigned char*)str, str_len);
940 RETURN_STR(result);
941 }
942 /* }}} */
943
944 /* {{{ proto string base64_decode(string str[, bool strict])
945 Decodes string using MIME base64 algorithm */
PHP_FUNCTION(base64_decode)946 PHP_FUNCTION(base64_decode)
947 {
948 char *str;
949 zend_bool strict = 0;
950 size_t str_len;
951 zend_string *result;
952
953 ZEND_PARSE_PARAMETERS_START(1, 2)
954 Z_PARAM_STRING(str, str_len)
955 Z_PARAM_OPTIONAL
956 Z_PARAM_BOOL(strict)
957 ZEND_PARSE_PARAMETERS_END();
958
959 result = php_base64_decode_ex((unsigned char*)str, str_len, strict);
960 if (result != NULL) {
961 RETURN_STR(result);
962 } else {
963 RETURN_FALSE;
964 }
965 }
966 /* }}} */
967