1 /*
2  * "streamable kanji code filter and converter"
3  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4  *
5  * LICENSE NOTICES
6  *
7  * This file is part of "streamable kanji code filter and converter",
8  * which is distributed under the terms of GNU Lesser General Public
9  * License (version 2) as published by the Free Software Foundation.
10  *
11  * This software is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with "streamable kanji code filter and converter";
18  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19  * Suite 330, Boston, MA  02111-1307  USA
20  *
21  * The author of this file:
22  *
23  */
24 /*
25  * The source code included in this file was separated from mbfilter.c
26  * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
27  *
28  */
29 
30 #include "zend_bitset.h"
31 #include "mbfilter.h"
32 #include "mbfilter_utf16.h"
33 
34 #ifdef ZEND_INTRIN_AVX2_NATIVE
35 
36 /* We are building AVX2-only binary */
37 # include <immintrin.h>
38 # define mb_utf16be_to_wchar mb_utf16be_to_wchar_avx2
39 # define mb_utf16le_to_wchar mb_utf16le_to_wchar_avx2
40 # define mb_wchar_to_utf16be mb_wchar_to_utf16be_avx2
41 # define mb_wchar_to_utf16le mb_wchar_to_utf16le_avx2
42 
43 static size_t mb_utf16be_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
44 static void mb_wchar_to_utf16be_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
45 static size_t mb_utf16le_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
46 static void mb_wchar_to_utf16le_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
47 
48 #elif defined(ZEND_INTRIN_AVX2_RESOLVER)
49 
50 /* We are building binary which works with or without AVX2; whether or not to use
51  * AVX2-accelerated functions will be determined at runtime */
52 # include <immintrin.h>
53 # include "Zend/zend_cpuinfo.h"
54 
55 static size_t mb_utf16be_to_wchar_default(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
56 static void mb_wchar_to_utf16be_default(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
57 static size_t mb_utf16le_to_wchar_default(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
58 static void mb_wchar_to_utf16le_default(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
59 
60 # ifdef ZEND_INTRIN_AVX2_FUNC_PROTO
61 /* Dynamic linker will decide whether or not to use AVX2-based functions and
62  * resolve symbols accordingly */
63 
64 ZEND_INTRIN_AVX2_FUNC_DECL(size_t mb_utf16be_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state));
65 ZEND_INTRIN_AVX2_FUNC_DECL(void mb_wchar_to_utf16be_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end));
66 ZEND_INTRIN_AVX2_FUNC_DECL(size_t mb_utf16le_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state));
67 ZEND_INTRIN_AVX2_FUNC_DECL(void mb_wchar_to_utf16le_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end));
68 
69 size_t mb_utf16be_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) __attribute__((ifunc("resolve_utf16be_wchar")));
70 void mb_wchar_to_utf16be(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) __attribute__((ifunc("resolve_wchar_utf16be")));
71 size_t mb_utf16le_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) __attribute__((ifunc("resolve_utf16le_wchar")));
72 void mb_wchar_to_utf16le(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) __attribute__((ifunc("resolve_wchar_utf16le")));
73 
74 ZEND_NO_SANITIZE_ADDRESS
75 ZEND_ATTRIBUTE_UNUSED
resolve_utf16be_wchar(void)76 static mb_to_wchar_fn resolve_utf16be_wchar(void)
77 {
78 	return zend_cpu_supports_avx2() ? mb_utf16be_to_wchar_avx2 : mb_utf16be_to_wchar_default;
79 }
80 
81 ZEND_NO_SANITIZE_ADDRESS
82 ZEND_ATTRIBUTE_UNUSED
resolve_wchar_utf16be(void)83 static mb_from_wchar_fn resolve_wchar_utf16be(void)
84 {
85 	return zend_cpu_supports_avx2() ? mb_wchar_to_utf16be_avx2 : mb_wchar_to_utf16be_default;
86 }
87 
88 ZEND_NO_SANITIZE_ADDRESS
89 ZEND_ATTRIBUTE_UNUSED
resolve_utf16le_wchar(void)90 static mb_to_wchar_fn resolve_utf16le_wchar(void)
91 {
92 	return zend_cpu_supports_avx2() ? mb_utf16le_to_wchar_avx2 : mb_utf16le_to_wchar_default;
93 }
94 
95 ZEND_NO_SANITIZE_ADDRESS
96 ZEND_ATTRIBUTE_UNUSED
resolve_wchar_utf16le(void)97 static mb_from_wchar_fn resolve_wchar_utf16le(void)
98 {
99 	return zend_cpu_supports_avx2() ? mb_wchar_to_utf16le_avx2 : mb_wchar_to_utf16le_default;
100 }
101 
102 # else /* ZEND_INTRIN_AVX2_FUNC_PTR */
103 /* We are compiling for a target where the dynamic linker will not be able to
104  * resolve symbols according to whether the host supports AVX2 or not; so instead,
105  * we can make calls go through a function pointer and set the function pointer
106  * on module load */
107 
108 #ifdef HAVE_FUNC_ATTRIBUTE_TARGET
109 static size_t mb_utf16be_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) __attribute__((target("avx2")));
110 static void mb_wchar_to_utf16be_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) __attribute__((target("avx2")));
111 static size_t mb_utf16le_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) __attribute__((target("avx2")));
112 static void mb_wchar_to_utf16le_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) __attribute__((target("avx2")));
113 #else
114 static size_t mb_utf16be_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
115 static void mb_wchar_to_utf16be_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
116 static size_t mb_utf16le_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
117 static void mb_wchar_to_utf16le_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
118 #endif
119 
120 static mb_to_wchar_fn utf16be_to_wchar_ptr = NULL;
121 static mb_from_wchar_fn wchar_to_utf16be_ptr = NULL;
122 static mb_to_wchar_fn utf16le_to_wchar_ptr = NULL;
123 static mb_from_wchar_fn wchar_to_utf16le_ptr = NULL;
124 
mb_utf16be_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)125 static size_t mb_utf16be_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
126 {
127 	return utf16be_to_wchar_ptr(in, in_len, buf, bufsize, NULL);
128 }
129 
mb_wchar_to_utf16be(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)130 static void mb_wchar_to_utf16be(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
131 {
132 	wchar_to_utf16be_ptr(in, len, buf, end);
133 }
134 
mb_utf16le_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)135 static size_t mb_utf16le_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
136 {
137 	return utf16le_to_wchar_ptr(in, in_len, buf, bufsize, NULL);
138 }
139 
mb_wchar_to_utf16le(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)140 static void mb_wchar_to_utf16le(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
141 {
142 	wchar_to_utf16le_ptr(in, len, buf, end);
143 }
144 
init_convert_utf16(void)145 void init_convert_utf16(void)
146 {
147 	if (zend_cpu_supports_avx2()) {
148 		utf16be_to_wchar_ptr = mb_utf16be_to_wchar_avx2;
149 		wchar_to_utf16be_ptr = mb_wchar_to_utf16be_avx2;
150 		utf16le_to_wchar_ptr = mb_utf16le_to_wchar_avx2;
151 		wchar_to_utf16le_ptr = mb_wchar_to_utf16le_avx2;
152 	} else {
153 		utf16be_to_wchar_ptr = mb_utf16be_to_wchar_default;
154 		wchar_to_utf16be_ptr = mb_wchar_to_utf16be_default;
155 		utf16le_to_wchar_ptr = mb_utf16le_to_wchar_default;
156 		wchar_to_utf16le_ptr = mb_wchar_to_utf16le_default;
157 	}
158 }
159 # endif
160 
161 #else
162 
163 /* No AVX2 support */
164 # define mb_utf16be_to_wchar mb_utf16be_to_wchar_default
165 # define mb_utf16le_to_wchar mb_utf16le_to_wchar_default
166 # define mb_wchar_to_utf16be mb_wchar_to_utf16be_default
167 # define mb_wchar_to_utf16le mb_wchar_to_utf16le_default
168 
169 static size_t mb_utf16be_to_wchar_default(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
170 static void mb_wchar_to_utf16be_default(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
171 static size_t mb_utf16le_to_wchar_default(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
172 static void mb_wchar_to_utf16le_default(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
173 
174 #endif
175 
176 static int mbfl_filt_conv_utf16_wchar_flush(mbfl_convert_filter *filter);
177 static size_t mb_utf16_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
178 
179 static const char *mbfl_encoding_utf16_aliases[] = {"utf16", NULL};
180 
181 const mbfl_encoding mbfl_encoding_utf16 = {
182 	mbfl_no_encoding_utf16,
183 	"UTF-16",
184 	"UTF-16",
185 	mbfl_encoding_utf16_aliases,
186 	NULL,
187 	0,
188 	&vtbl_utf16_wchar,
189 	&vtbl_wchar_utf16,
190 	mb_utf16_to_wchar,
191 	mb_wchar_to_utf16be,
192 	NULL
193 };
194 
195 const mbfl_encoding mbfl_encoding_utf16be = {
196 	mbfl_no_encoding_utf16be,
197 	"UTF-16BE",
198 	"UTF-16BE",
199 	NULL,
200 	NULL,
201 	0,
202 	&vtbl_utf16be_wchar,
203 	&vtbl_wchar_utf16be,
204 	mb_utf16be_to_wchar,
205 	mb_wchar_to_utf16be,
206 	NULL
207 };
208 
209 const mbfl_encoding mbfl_encoding_utf16le = {
210 	mbfl_no_encoding_utf16le,
211 	"UTF-16LE",
212 	"UTF-16LE",
213 	NULL,
214 	NULL,
215 	0,
216 	&vtbl_utf16le_wchar,
217 	&vtbl_wchar_utf16le,
218 	mb_utf16le_to_wchar,
219 	mb_wchar_to_utf16le,
220 	NULL
221 };
222 
223 const struct mbfl_convert_vtbl vtbl_utf16_wchar = {
224 	mbfl_no_encoding_utf16,
225 	mbfl_no_encoding_wchar,
226 	mbfl_filt_conv_common_ctor,
227 	NULL,
228 	mbfl_filt_conv_utf16_wchar,
229 	mbfl_filt_conv_utf16_wchar_flush,
230 	NULL,
231 };
232 
233 const struct mbfl_convert_vtbl vtbl_wchar_utf16 = {
234 	mbfl_no_encoding_wchar,
235 	mbfl_no_encoding_utf16,
236 	mbfl_filt_conv_common_ctor,
237 	NULL,
238 	mbfl_filt_conv_wchar_utf16be,
239 	mbfl_filt_conv_common_flush,
240 	NULL,
241 };
242 
243 const struct mbfl_convert_vtbl vtbl_utf16be_wchar = {
244 	mbfl_no_encoding_utf16be,
245 	mbfl_no_encoding_wchar,
246 	mbfl_filt_conv_common_ctor,
247 	NULL,
248 	mbfl_filt_conv_utf16be_wchar,
249 	mbfl_filt_conv_utf16_wchar_flush,
250 	NULL,
251 };
252 
253 const struct mbfl_convert_vtbl vtbl_wchar_utf16be = {
254 	mbfl_no_encoding_wchar,
255 	mbfl_no_encoding_utf16be,
256 	mbfl_filt_conv_common_ctor,
257 	NULL,
258 	mbfl_filt_conv_wchar_utf16be,
259 	mbfl_filt_conv_common_flush,
260 	NULL,
261 };
262 
263 const struct mbfl_convert_vtbl vtbl_utf16le_wchar = {
264 	mbfl_no_encoding_utf16le,
265 	mbfl_no_encoding_wchar,
266 	mbfl_filt_conv_common_ctor,
267 	NULL,
268 	mbfl_filt_conv_utf16le_wchar,
269 	mbfl_filt_conv_utf16_wchar_flush,
270 	NULL,
271 };
272 
273 const struct mbfl_convert_vtbl vtbl_wchar_utf16le = {
274 	mbfl_no_encoding_wchar,
275 	mbfl_no_encoding_utf16le,
276 	mbfl_filt_conv_common_ctor,
277 	NULL,
278 	mbfl_filt_conv_wchar_utf16le,
279 	mbfl_filt_conv_common_flush,
280 	NULL,
281 };
282 
283 #define CK(statement)	do { if ((statement) < 0) return (-1); } while (0)
284 
mbfl_filt_conv_utf16_wchar(int c,mbfl_convert_filter * filter)285 int mbfl_filt_conv_utf16_wchar(int c, mbfl_convert_filter *filter)
286 {
287 	/* Start with the assumption that the string is big-endian;
288 	 * If we find a little-endian BOM, then we will change that assumption */
289 	if (filter->status == 0) {
290 		filter->cache = c & 0xFF;
291 		filter->status = 1;
292 	} else {
293 		int n = (filter->cache << 8) | (c & 0xFF);
294 		filter->cache = filter->status = 0;
295 		if (n == 0xFFFE) {
296 			/* Switch to little-endian mode */
297 			filter->filter_function = mbfl_filt_conv_utf16le_wchar;
298 		} else {
299 			filter->filter_function = mbfl_filt_conv_utf16be_wchar;
300 			if (n >= 0xD800 && n <= 0xDBFF) {
301 				filter->cache = n & 0x3FF; /* Pick out 10 data bits */
302 				filter->status = 2;
303 				return 0;
304 			} else if (n >= 0xDC00 && n <= 0xDFFF) {
305 				/* This is wrong; second part of surrogate pair has come first */
306 				CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
307 			} else if (n != 0xFEFF) {
308 				CK((*filter->output_function)(n, filter->data));
309 			}
310 		}
311 	}
312 
313 	return 0;
314 }
315 
mbfl_filt_conv_utf16be_wchar(int c,mbfl_convert_filter * filter)316 int mbfl_filt_conv_utf16be_wchar(int c, mbfl_convert_filter *filter)
317 {
318 	int n;
319 
320 	switch (filter->status) {
321 	case 0: /* First byte */
322 		filter->cache = c & 0xFF;
323 		filter->status = 1;
324 		break;
325 
326 	case 1: /* Second byte */
327 		n = (filter->cache << 8) | (c & 0xFF);
328 		if (n >= 0xD800 && n <= 0xDBFF) {
329 			filter->cache = n & 0x3FF; /* Pick out 10 data bits */
330 			filter->status = 2;
331 		} else if (n >= 0xDC00 && n <= 0xDFFF) {
332 			/* This is wrong; second part of surrogate pair has come first */
333 			filter->status = 0;
334 			CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
335 		} else {
336 			filter->status = 0;
337 			CK((*filter->output_function)(n, filter->data));
338 		}
339 		break;
340 
341 	case 2: /* Second part of surrogate, first byte */
342 		filter->cache = (filter->cache << 8) | (c & 0xFF);
343 		filter->status = 3;
344 		break;
345 
346 	case 3: /* Second part of surrogate, second byte */
347 		n = ((filter->cache & 0xFF) << 8) | (c & 0xFF);
348 		if (n >= 0xD800 && n <= 0xDBFF) {
349 			/* Wrong; that's the first half of a surrogate pair, not the second */
350 			filter->cache = n & 0x3FF;
351 			filter->status = 2;
352 			CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
353 		} else if (n >= 0xDC00 && n <= 0xDFFF) {
354 			filter->status = 0;
355 			n = ((filter->cache & 0x3FF00) << 2) + (n & 0x3FF) + 0x10000;
356 			CK((*filter->output_function)(n, filter->data));
357 		} else {
358 			filter->status = 0;
359 			CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
360 			CK((*filter->output_function)(n, filter->data));
361 		}
362 	}
363 
364 	return 0;
365 }
366 
mbfl_filt_conv_wchar_utf16be(int c,mbfl_convert_filter * filter)367 int mbfl_filt_conv_wchar_utf16be(int c, mbfl_convert_filter *filter)
368 {
369 	int n;
370 
371 	if (c >= 0 && c < MBFL_WCSPLANE_UCS2MAX) {
372 		CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
373 		CK((*filter->output_function)(c & 0xff, filter->data));
374 	} else if (c >= MBFL_WCSPLANE_SUPMIN && c < MBFL_WCSPLANE_SUPMAX) {
375 		n = ((c >> 10) - 0x40) | 0xd800;
376 		CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
377 		CK((*filter->output_function)(n & 0xff, filter->data));
378 		n = (c & 0x3ff) | 0xdc00;
379 		CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
380 		CK((*filter->output_function)(n & 0xff, filter->data));
381 	} else {
382 		CK(mbfl_filt_conv_illegal_output(c, filter));
383 	}
384 
385 	return 0;
386 }
387 
mbfl_filt_conv_utf16le_wchar(int c,mbfl_convert_filter * filter)388 int mbfl_filt_conv_utf16le_wchar(int c, mbfl_convert_filter *filter)
389 {
390 	int n;
391 
392 	switch (filter->status) {
393 	case 0:
394 		filter->cache = c & 0xff;
395 		filter->status = 1;
396 		break;
397 
398 	case 1:
399 		if ((c & 0xfc) == 0xd8) {
400 			/* Looks like we have a surrogate pair here */
401 			filter->cache += ((c & 0x3) << 8);
402 			filter->status = 2;
403 		} else if ((c & 0xfc) == 0xdc) {
404 			/* This is wrong; the second part of the surrogate pair has come first */
405 			filter->status = 0;
406 			CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
407 		} else {
408 			filter->status = 0;
409 			CK((*filter->output_function)(filter->cache + ((c & 0xff) << 8), filter->data));
410 		}
411 		break;
412 
413 	case 2:
414 		filter->cache = (filter->cache << 10) + (c & 0xff);
415 		filter->status = 3;
416 		break;
417 
418 	case 3:
419 		n = (filter->cache & 0xFF) | ((c & 0xFF) << 8);
420 		if (n >= 0xD800 && n <= 0xDBFF) {
421 			/* We previously saw the first part of a surrogate pair and were
422 			 * expecting the second part; this is another first part */
423 			filter->cache = n & 0x3FF;
424 			filter->status = 2;
425 			CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
426 		} else if (n >= 0xDC00 && n <= 0xDFFF) {
427 			n = filter->cache + ((c & 0x3) << 8) + 0x10000;
428 			filter->status = 0;
429 			CK((*filter->output_function)(n, filter->data));
430 		} else {
431 			/* The first part of a surrogate pair was followed by some other codepoint
432 			 * which is not part of a surrogate pair at all */
433 			filter->status = 0;
434 			CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
435 			CK((*filter->output_function)(n, filter->data));
436 		}
437 		break;
438 	}
439 
440 	return 0;
441 }
442 
mbfl_filt_conv_wchar_utf16le(int c,mbfl_convert_filter * filter)443 int mbfl_filt_conv_wchar_utf16le(int c, mbfl_convert_filter *filter)
444 {
445 	int n;
446 
447 	if (c >= 0 && c < MBFL_WCSPLANE_UCS2MAX) {
448 		CK((*filter->output_function)(c & 0xff, filter->data));
449 		CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
450 	} else if (c >= MBFL_WCSPLANE_SUPMIN && c < MBFL_WCSPLANE_SUPMAX) {
451 		n = ((c >> 10) - 0x40) | 0xd800;
452 		CK((*filter->output_function)(n & 0xff, filter->data));
453 		CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
454 		n = (c & 0x3ff) | 0xdc00;
455 		CK((*filter->output_function)(n & 0xff, filter->data));
456 		CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
457 	} else {
458 		CK(mbfl_filt_conv_illegal_output(c, filter));
459 	}
460 
461 	return 0;
462 }
463 
mbfl_filt_conv_utf16_wchar_flush(mbfl_convert_filter * filter)464 static int mbfl_filt_conv_utf16_wchar_flush(mbfl_convert_filter *filter)
465 {
466 	if (filter->status) {
467 		/* Input string was truncated */
468 		filter->status = 0;
469 		CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
470 	}
471 
472 	if (filter->flush_function) {
473 		(*filter->flush_function)(filter->data);
474 	}
475 
476 	return 0;
477 }
478 
479 #define DETECTED_BE 1
480 #define DETECTED_LE 2
481 
mb_utf16_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)482 static size_t mb_utf16_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
483 {
484 	if (*state == DETECTED_BE) {
485 		return mb_utf16be_to_wchar(in, in_len, buf, bufsize, NULL);
486 	} else if (*state == DETECTED_LE) {
487 		return mb_utf16le_to_wchar(in, in_len, buf, bufsize, NULL);
488 	} else if (*in_len >= 2) {
489 		unsigned char *p = *in;
490 		unsigned char c1 = *p++;
491 		unsigned char c2 = *p++;
492 		uint16_t n = (c1 << 8) | c2;
493 
494 		if (n == 0xFFFE) {
495 			/* Little-endian BOM */
496 			*in = p;
497 			*in_len -= 2;
498 			*state = DETECTED_LE;
499 			return mb_utf16le_to_wchar(in, in_len, buf, bufsize, NULL);
500 		} if (n == 0xFEFF) {
501 			/* Big-endian BOM; don't send to output */
502 			*in = p;
503 			*in_len -= 2;
504 		}
505 	}
506 
507 	*state = DETECTED_BE;
508 	return mb_utf16be_to_wchar(in, in_len, buf, bufsize, NULL);
509 }
510 
mb_utf16be_to_wchar_default(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)511 static size_t mb_utf16be_to_wchar_default(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
512 {
513 	/* We only want to read 16-bit words out of `str`; any trailing byte will be handled at the end */
514 	unsigned char *p = *in, *e = p + (*in_len & ~1);
515 	/* Set `limit` to one less than the actual amount of space in the buffer; this is because
516 	 * on some iterations of the below loop, we might produce two output words */
517 	uint32_t *out = buf, *limit = buf + bufsize - 1;
518 
519 	while (p < e && out < limit) {
520 		unsigned char c1 = *p++;
521 		unsigned char c2 = *p++;
522 		uint16_t n = (c1 << 8) | c2;
523 
524 		if (n >= 0xD800 && n <= 0xDBFF) {
525 			/* Handle surrogate */
526 			if (p < e) {
527 				unsigned char c3 = *p++;
528 				unsigned char c4 = *p++;
529 				uint16_t n2 = (c3 << 8) | c4;
530 
531 				if (n2 >= 0xD800 && n2 <= 0xDBFF) {
532 					/* Wrong; that's the first half of a surrogate pair, when we were expecting the second */
533 					*out++ = MBFL_BAD_INPUT;
534 					p -= 2;
535 				} else if (n2 >= 0xDC00 && n2 <= 0xDFFF) {
536 					*out++ = (((n & 0x3FF) << 10) | (n2 & 0x3FF)) + 0x10000;
537 				} else {
538 					/* The first half of a surrogate pair was followed by a 'normal' codepoint */
539 					*out++ = MBFL_BAD_INPUT;
540 					*out++ = n2;
541 				}
542 			} else {
543 				*out++ = MBFL_BAD_INPUT;
544 			}
545 		} else if (n >= 0xDC00 && n <= 0xDFFF) {
546 			/* This is wrong; second part of surrogate pair has come first */
547 			*out++ = MBFL_BAD_INPUT;
548 		} else {
549 			*out++ = n;
550 		}
551 	}
552 
553 	if (p == e && (*in_len & 0x1) && out < limit) {
554 		/* There is an extra trailing byte (which shouldn't be there) */
555 		*out++ = MBFL_BAD_INPUT;
556 		p++;
557 	}
558 
559 	*in_len -= (p - *in);
560 	*in = p;
561 	return out - buf;
562 }
563 
mb_wchar_to_utf16be_default(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)564 static void mb_wchar_to_utf16be_default(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
565 {
566 	unsigned char *out, *limit;
567 	MB_CONVERT_BUF_LOAD(buf, out, limit);
568 	MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
569 
570 	while (len--) {
571 		uint32_t w = *in++;
572 
573 		if (w < MBFL_WCSPLANE_UCS2MAX) {
574 			out = mb_convert_buf_add2(out, (w >> 8) & 0xFF, w & 0xFF);
575 		} else if (w < MBFL_WCSPLANE_UTF32MAX) {
576 			uint16_t n1 = ((w >> 10) - 0x40) | 0xD800;
577 			uint16_t n2 = (w & 0x3FF) | 0xDC00;
578 			MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4);
579 			out = mb_convert_buf_add4(out, (n1 >> 8) & 0xFF, n1 & 0xFF, (n2 >> 8) & 0xFF, n2 & 0xFF);
580 		} else {
581 			MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_utf16be_default);
582 			MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
583 		}
584 	}
585 
586 	MB_CONVERT_BUF_STORE(buf, out, limit);
587 }
588 
mb_utf16le_to_wchar_default(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)589 static size_t mb_utf16le_to_wchar_default(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
590 {
591 	/* We only want to read 16-bit words out of `str`; any trailing byte will be handled at the end */
592 	unsigned char *p = *in, *e = p + (*in_len & ~1);
593 	/* Set `limit` to one less than the actual amount of space in the buffer; this is because
594 	 * on some iterations of the below loop, we might produce two output words */
595 	uint32_t *out = buf, *limit = buf + bufsize - 1;
596 
597 	while (p < e && out < limit) {
598 		unsigned char c1 = *p++;
599 		unsigned char c2 = *p++;
600 		uint16_t n = (c2 << 8) | c1;
601 
602 		if (n >= 0xD800 && n <= 0xDBFF) {
603 			/* Handle surrogate */
604 			if (p < e) {
605 				unsigned char c3 = *p++;
606 				unsigned char c4 = *p++;
607 				uint16_t n2 = (c4 << 8) | c3;
608 
609 				if (n2 >= 0xD800 && n2 <= 0xDBFF) {
610 					/* Wrong; that's the first half of a surrogate pair, when we were expecting the second */
611 					*out++ = MBFL_BAD_INPUT;
612 					p -= 2;
613 				} else if (n2 >= 0xDC00 && n2 <= 0xDFFF) {
614 					*out++ = (((n & 0x3FF) << 10) | (n2 & 0x3FF)) + 0x10000;
615 				} else {
616 					/* The first half of a surrogate pair was followed by a 'normal' codepoint */
617 					*out++ = MBFL_BAD_INPUT;
618 					*out++ = n2;
619 				}
620 			} else {
621 				*out++ = MBFL_BAD_INPUT;
622 			}
623 		} else if (n >= 0xDC00 && n <= 0xDFFF) {
624 			/* This is wrong; second part of surrogate pair has come first */
625 			*out++ = MBFL_BAD_INPUT;
626 		} else {
627 			*out++ = n;
628 		}
629 	}
630 
631 	if (p == e && (*in_len & 0x1) && out < limit) {
632 		/* There is an extra trailing byte (which shouldn't be there) */
633 		*out++ = MBFL_BAD_INPUT;
634 		p++;
635 	}
636 
637 	*in_len -= (p - *in);
638 	*in = p;
639 	return out - buf;
640 }
641 
mb_wchar_to_utf16le_default(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)642 static void mb_wchar_to_utf16le_default(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
643 {
644 	unsigned char *out, *limit;
645 	MB_CONVERT_BUF_LOAD(buf, out, limit);
646 	MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
647 
648 	while (len--) {
649 		uint32_t w = *in++;
650 
651 		if (w < MBFL_WCSPLANE_UCS2MAX) {
652 			out = mb_convert_buf_add2(out, w & 0xFF, (w >> 8) & 0xFF);
653 		} else if (w < MBFL_WCSPLANE_UTF32MAX) {
654 			uint16_t n1 = ((w >> 10) - 0x40) | 0xD800;
655 			uint16_t n2 = (w & 0x3FF) | 0xDC00;
656 			MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4);
657 			out = mb_convert_buf_add4(out, n1 & 0xFF, (n1 >> 8) & 0xFF, n2 & 0xFF, (n2 >> 8) & 0xFF);
658 		} else {
659 			MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_utf16le_default);
660 			MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
661 		}
662 	}
663 
664 	MB_CONVERT_BUF_STORE(buf, out, limit);
665 }
666 
667 #if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER)
668 
669 #ifdef ZEND_INTRIN_AVX2_FUNC_PROTO
mb_utf16be_to_wchar_avx2(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)670 size_t mb_utf16be_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
671 #else
672 static size_t mb_utf16be_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
673 #endif
674 {
675 	size_t len = *in_len;
676 
677 	if (len >= 32 && bufsize >= 16) {
678 		unsigned char *p = *in;
679 		uint32_t *out = buf;
680 
681 		/* Used to determine if a block of input bytes contains any surrogates */
682 		const __m256i _f8 = _mm256_set1_epi16(0xF8);
683 		const __m256i _d8 = _mm256_set1_epi16(0xD8);
684 		/* wchars must be in host byte order, which is little-endian on x86;
685 		 * Since we are reading in (big-endian) UTF-16BE, use this vector to swap byte order for output */
686 		const __m256i swap_bytes = _mm256_set_epi8(14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1);
687 
688 		do {
689 			__m256i operand = _mm256_loadu_si256((__m256i*)p); /* Load 32 bytes */
690 
691 			uint32_t surrogate_bitvec = _mm256_movemask_epi8(_mm256_cmpeq_epi16(_mm256_and_si256(operand, _f8), _d8));
692 			if (surrogate_bitvec == 0) {
693 				/* There are no surrogates among these 16 characters
694 				 * So converting the UTF-16 input to wchars is very simple; just extend each 16-bit value
695 				 * to a 32-bit value, filling in zero bits in the high end */
696 				operand = _mm256_shuffle_epi8(operand, swap_bytes);
697 				_mm256_storeu_si256((__m256i*)out, _mm256_cvtepu16_epi32(_mm256_castsi256_si128(operand)));
698 				_mm256_storeu_si256((__m256i*)(out + 8), _mm256_cvtepu16_epi32(_mm256_extracti128_si256(operand, 1)));
699 				out += 16;
700 				bufsize -= 16;
701 				p += sizeof(__m256i);
702 				len -= sizeof(__m256i);
703 			} else if ((surrogate_bitvec & 1) == 0) {
704 				/* Some prefix of the current block is non-surrogates; output those */
705 				uint8_t n_chars = zend_ulong_ntz(surrogate_bitvec) >> 1;
706 				operand = _mm256_shuffle_epi8(operand, swap_bytes);
707 				/* We know that the output buffer has at least 64 bytes of space available
708 				 * So don't bother trimming the output down to only include the non-surrogate prefix;
709 				 * rather, write out an entire block of 64 (or 32) bytes, then bump our output pointer
710 				 * forward just past the 'good part', so the 'bad part' will be overwritten on the next
711 				 * iteration of this loop */
712 				_mm256_storeu_si256((__m256i*)out, _mm256_cvtepu16_epi32(_mm256_castsi256_si128(operand)));
713 				if (n_chars > 8) {
714 					_mm256_storeu_si256((__m256i*)(out + 8), _mm256_cvtepu16_epi32(_mm256_extracti128_si256(operand, 1)));
715 				}
716 				out += n_chars;
717 				bufsize -= n_chars;
718 				p += n_chars * 2;
719 				len -= n_chars * 2;
720 			} else {
721 				/* Some prefix of the current block is (valid or invalid) surrogates
722 				 * Handle those using non-vectorized code */
723 				surrogate_bitvec = ~surrogate_bitvec;
724 				unsigned int n_chars = surrogate_bitvec ? zend_ulong_ntz(surrogate_bitvec) >> 1 : 16;
725 				do {
726 					unsigned char c1 = *p++;
727 					unsigned char c2 = *p++;
728 
729 					if (c1 & 0x4 || len < 4) {
730 						/* 2nd part of surrogate pair has come first OR string ended abruptly
731 						 * after 1st part of surrogate pair */
732 						*out++ = MBFL_BAD_INPUT;
733 						bufsize--;
734 						n_chars--;
735 						len -= 2;
736 						continue;
737 					}
738 
739 					uint16_t n = (c1 << 8) | c2;
740 					unsigned char c3 = *p++;
741 					unsigned char c4 = *p++;
742 
743 					if ((c3 & 0xFC) == 0xDC) {
744 						/* Valid surrogate pair */
745 						uint16_t n2 = (c3 << 8) | c4;
746 						*out++ = (((n & 0x3FF) << 10) | (n2 & 0x3FF)) + 0x10000;
747 						bufsize--;
748 						len -= 4;
749 #if defined(PHP_HAVE_BUILTIN_USUB_OVERFLOW) && PHP_HAVE_BUILTIN_USUB_OVERFLOW
750 						/* Subtracting 2 from `n_chars` will automatically set the CPU's flags;
751 						 * branch directly off the appropriate flag (CF on x86) rather than using
752 						 * another instruction (CMP on x86) to check for underflow */
753 						if (__builtin_usub_overflow(n_chars, 2, &n_chars)) {
754 							/* The last 2 bytes of this block and the first 2 bytes of the following
755 							 * block form a valid surrogate pair; now just make sure we don't get
756 							 * stuck in this loop due to underflow of the loop index */
757 							break;
758 						}
759 #else
760 						n_chars -= 2;
761 						if (n_chars == UINT_MAX) {
762 							break;
763 						}
764 #endif
765 					} else {
766 						/* First half of surrogate pair was followed by another first half
767 						 * OR by a non-surrogate character */
768 						*out++ = MBFL_BAD_INPUT;
769 						bufsize--;
770 						n_chars--;
771 						len -= 2;
772 						p -= 2; /* Back up so the last 2 bytes will be processed again */
773 					}
774 				} while (n_chars);
775 			}
776 		} while (len >= 32 && bufsize >= 16);
777 
778 		if (len && bufsize >= 4) {
779 			/* Finish up trailing bytes which don't fill a 32-byte block */
780 			out += mb_utf16be_to_wchar_default(&p, &len, out, bufsize, NULL);
781 		}
782 
783 		*in = p;
784 		*in_len = len;
785 		return out - buf;
786 	} else if (len) {
787 		return mb_utf16be_to_wchar_default(in, in_len, buf, bufsize, NULL);
788 	} else {
789 		return 0;
790 	}
791 }
792 
793 #ifdef ZEND_INTRIN_AVX2_FUNC_PROTO
mb_wchar_to_utf16be_avx2(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)794 void mb_wchar_to_utf16be_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
795 #else
796 static void mb_wchar_to_utf16be_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
797 #endif
798 {
799 	if (len >= 8) {
800 		unsigned char *out, *limit;
801 		MB_CONVERT_BUF_LOAD(buf, out, limit);
802 		MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
803 
804 		/* Used to find wchars which are outside the Unicode BMP (Basic Multilingual Plane) */
805 		const __m256i bmp_mask = _mm256_set1_epi32(0xFFFF);
806 		/* Used to extract 16 bits which we want from each of eight 32-bit values */
807 		const __m256i pack_8x16 = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 12, 13, 8, 9, 4, 5, 0, 1, 12, 13, 8, 9, 4, 5, 0, 1, -1, -1, -1, -1, -1, -1, -1, -1);
808 
809 		do {
810 			__m256i operand = _mm256_loadu_si256((__m256i*)in); /* Load 32 bytes */
811 
812 			uint32_t bmp_bitvec = _mm256_movemask_epi8(_mm256_cmpeq_epi32(_mm256_and_si256(operand, bmp_mask), operand));
813 			if (bmp_bitvec == 0xFFFFFFFF) {
814 				/* All eight wchars are in the BMP
815 				 * Shuffle bytes around to get the 16 bytes we want into the low 16 bytes of YMM register
816 				 * (which is equivalent to an XMM register) */
817 				operand = _mm256_shuffle_epi8(operand, pack_8x16);
818 				__m256i operand2 = _mm256_permute2x128_si256(operand, operand, 1);
819 				operand = _mm256_alignr_epi8(operand2, operand, 8);
820 				_mm_storeu_si128((__m128i*)out, _mm256_castsi256_si128(operand)); /* Store 16 bytes */
821 				out += 16;
822 				len -= 8;
823 				in += 8;
824 			} else if (bmp_bitvec & 1) {
825 				/* Some prefix of this block are codepoints in the BMP */
826 				unsigned int n_bytes = zend_ulong_ntz(~bmp_bitvec);
827 				operand = _mm256_shuffle_epi8(operand, pack_8x16);
828 				__m256i operand2 = _mm256_permute2x128_si256(operand, operand, 1);
829 				operand = _mm256_alignr_epi8(operand2, operand, 8);
830 				/* Store 16 bytes, but bump output pointer forward just past the 'good part',
831 				 * so the 'bad part' will be overwritten on the next iteration of this loop */
832 				_mm_storeu_si128((__m128i*)out, _mm256_castsi256_si128(operand));
833 				out += n_bytes >> 1;
834 				len -= n_bytes >> 2;
835 				in += n_bytes >> 2;
836 			} else {
837 				/* Some prefix of this block is codepoints outside the BMP OR error markers
838 				 * Handle them using non-vectorized code */
839 				unsigned int n_words = bmp_bitvec ? zend_ulong_ntz(bmp_bitvec) >> 2 : 8;
840 				do {
841 					uint32_t w = *in++;
842 					n_words--;
843 					len--;
844 
845 					if (w < MBFL_WCSPLANE_UTF32MAX) {
846 						uint16_t n1 = ((w >> 10) - 0x40) | 0xD800;
847 						uint16_t n2 = (w & 0x3FF) | 0xDC00;
848 						MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4);
849 						out = mb_convert_buf_add4(out, (n1 >> 8) & 0xFF, n1 & 0xFF, (n2 >> 8) & 0xFF, n2 & 0xFF);
850 					} else {
851 						MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_utf16be_default);
852 						MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
853 					}
854 				} while (n_words);
855 			}
856 		} while (len >= 8);
857 
858 		MB_CONVERT_BUF_STORE(buf, out, limit);
859 	}
860 
861 	if (len) {
862 		mb_wchar_to_utf16be_default(in, len, buf, end);
863 	}
864 }
865 
866 #ifdef ZEND_INTRIN_AVX2_FUNC_PROTO
mb_utf16le_to_wchar_avx2(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)867 size_t mb_utf16le_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
868 #else
869 static size_t mb_utf16le_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
870 #endif
871 {
872 	/* Most of this function is the same as `mb_utf16be_to_wchar_avx2`, above;
873 	 * See it for more detailed code comments */
874 
875 	size_t len = *in_len;
876 
877 	if (len >= 32 && bufsize >= 16) {
878 		unsigned char *p = *in;
879 		uint32_t *out = buf;
880 
881 		const __m256i _f8 = _mm256_set1_epi16(0xF800);
882 		const __m256i _d8 = _mm256_set1_epi16(0xD800);
883 
884 		do {
885 			__m256i operand = _mm256_loadu_si256((__m256i*)p);
886 
887 			uint32_t surrogate_bitvec = _mm256_movemask_epi8(_mm256_cmpeq_epi16(_mm256_and_si256(operand, _f8), _d8));
888 			if (surrogate_bitvec == 0) {
889 				/* There are no surrogates among these 16 characters */
890 				_mm256_storeu_si256((__m256i*)out, _mm256_cvtepu16_epi32(_mm256_castsi256_si128(operand)));
891 				_mm256_storeu_si256((__m256i*)(out + 8), _mm256_cvtepu16_epi32(_mm256_extracti128_si256(operand, 1)));
892 				out += 16;
893 				bufsize -= 16;
894 				p += sizeof(__m256i);
895 				len -= sizeof(__m256i);
896 			} else if ((surrogate_bitvec & 1) == 0) {
897 				/* Some prefix of the current block is non-surrogates */
898 				uint8_t n_chars = zend_ulong_ntz(surrogate_bitvec) >> 1;
899 				_mm256_storeu_si256((__m256i*)out, _mm256_cvtepu16_epi32(_mm256_castsi256_si128(operand)));
900 				if (n_chars > 8) {
901 					_mm256_storeu_si256((__m256i*)(out + 8), _mm256_cvtepu16_epi32(_mm256_extracti128_si256(operand, 1)));
902 				}
903 				out += n_chars;
904 				bufsize -= n_chars;
905 				p += n_chars * 2;
906 				len -= n_chars * 2;
907 			} else {
908 				/* Some prefix of the current block is (valid or invalid) surrogates */
909 				surrogate_bitvec = ~surrogate_bitvec;
910 				unsigned int n_chars = surrogate_bitvec ? zend_ulong_ntz(surrogate_bitvec) >> 1 : 16;
911 				do {
912 					unsigned char c1 = *p++;
913 					unsigned char c2 = *p++;
914 
915 					if (c2 & 0x4 || len < 4) {
916 						/* 2nd part of surrogate pair has come first OR string ended abruptly
917 						 * after 1st part of surrogate pair */
918 						*out++ = MBFL_BAD_INPUT;
919 						bufsize--;
920 						n_chars--;
921 						len -= 2;
922 						continue;
923 					}
924 
925 					uint16_t n = (c2 << 8) | c1;
926 					unsigned char c3 = *p++;
927 					unsigned char c4 = *p++;
928 
929 					if ((c4 & 0xFC) == 0xDC) {
930 						/* Valid surrogate pair */
931 						uint16_t n2 = (c4 << 8) | c3;
932 						*out++ = (((n & 0x3FF) << 10) | (n2 & 0x3FF)) + 0x10000;
933 						bufsize--;
934 						len -= 4;
935 #if defined(PHP_HAVE_BUILTIN_USUB_OVERFLOW) && PHP_HAVE_BUILTIN_USUB_OVERFLOW
936 						if (__builtin_usub_overflow(n_chars, 2, &n_chars)) {
937 							break;
938 						}
939 #else
940 						n_chars -= 2;
941 						if (n_chars == UINT_MAX) {
942 							break;
943 						}
944 #endif
945 					} else {
946 						/* First half of surrogate pair was followed by another first half
947 						 * OR by a non-surrogate character */
948 						*out++ = MBFL_BAD_INPUT;
949 						bufsize--;
950 						n_chars--;
951 						len -= 2;
952 						p -= 2; /* Back up so the last 2 bytes will be processed again */
953 					}
954 				} while (n_chars);
955 			}
956 		} while (len >= 32 && bufsize >= 16);
957 
958 		if (len && bufsize >= 4) {
959 			out += mb_utf16le_to_wchar_default(&p, &len, out, bufsize, NULL);
960 		}
961 
962 		*in = p;
963 		*in_len = len;
964 		return out - buf;
965 	} else if (len) {
966 		return mb_utf16le_to_wchar_default(in, in_len, buf, bufsize, NULL);
967 	} else {
968 		return 0;
969 	}
970 }
971 
972 #ifdef ZEND_INTRIN_AVX2_FUNC_PROTO
mb_wchar_to_utf16le_avx2(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)973 void mb_wchar_to_utf16le_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
974 #else
975 static void mb_wchar_to_utf16le_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
976 #endif
977 {
978 	if (len >= 8) {
979 		unsigned char *out, *limit;
980 		MB_CONVERT_BUF_LOAD(buf, out, limit);
981 		MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
982 
983 		/* Used to find wchars which are outside the Unicode BMP (Basic Multilingual Plane) */
984 		const __m256i bmp_mask = _mm256_set1_epi32(0xFFFF);
985 		/* Used to extract 16 bits which we want from each of eight 32-bit values */
986 		const __m256i pack_8x16 = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 13, 12, 9, 8, 5, 4, 1, 0, 13, 12, 9, 8, 5, 4, 1, 0, -1, -1, -1, -1, -1, -1, -1, -1);
987 
988 		do {
989 			__m256i operand = _mm256_loadu_si256((__m256i*)in);
990 
991 			uint32_t bmp_bitvec = _mm256_movemask_epi8(_mm256_cmpeq_epi32(_mm256_and_si256(operand, bmp_mask), operand));
992 			if (bmp_bitvec == 0xFFFFFFFF) {
993 				/* All eight wchars are in the BMP
994 				 * Shuffle bytes around to get the 16 bytes we want into the low 16 bytes of YMM register
995 				 * (which is equivalent to an XMM register) */
996 				operand = _mm256_shuffle_epi8(operand, pack_8x16);
997 				__m256i operand2 = _mm256_permute2x128_si256(operand, operand, 1);
998 				operand = _mm256_alignr_epi8(operand2, operand, 8);
999 				_mm_storeu_si128((__m128i*)out, _mm256_castsi256_si128(operand));
1000 				out += 16;
1001 				len -= 8;
1002 				in += 8;
1003 			} else if (bmp_bitvec & 1) {
1004 				/* Some prefix of this block are codepoints in the BMP */
1005 				unsigned int n_bytes = zend_ulong_ntz(~bmp_bitvec);
1006 				operand = _mm256_shuffle_epi8(operand, pack_8x16);
1007 				__m256i operand2 = _mm256_permute2x128_si256(operand, operand, 1);
1008 				operand = _mm256_alignr_epi8(operand2, operand, 8);
1009 				_mm_storeu_si128((__m128i*)out, _mm256_castsi256_si128(operand));
1010 				out += n_bytes >> 1;
1011 				len -= n_bytes >> 2;
1012 				in += n_bytes >> 2;
1013 			} else {
1014 				/* Some prefix of this block is codepoints outside the BMP OR error markers */
1015 				unsigned int n_words = bmp_bitvec ? zend_ulong_ntz(bmp_bitvec) >> 2 : 8;
1016 				do {
1017 					uint32_t w = *in++;
1018 					n_words--;
1019 					len--;
1020 
1021 					if (w < MBFL_WCSPLANE_UTF32MAX) {
1022 						uint16_t n1 = ((w >> 10) - 0x40) | 0xD800;
1023 						uint16_t n2 = (w & 0x3FF) | 0xDC00;
1024 						MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4);
1025 						out = mb_convert_buf_add4(out, n1 & 0xFF, (n1 >> 8) & 0xFF, n2 & 0xFF, (n2 >> 8) & 0xFF);
1026 					} else {
1027 						MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_utf16le_default);
1028 						MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
1029 					}
1030 				} while (n_words);
1031 			}
1032 		} while (len >= 8);
1033 
1034 		MB_CONVERT_BUF_STORE(buf, out, limit);
1035 	}
1036 
1037 	if (len) {
1038 		mb_wchar_to_utf16le_default(in, len, buf, end);
1039 	}
1040 }
1041 
1042 #endif /* defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER) */
1043