1 /*
2  * "streamable kanji code filter and converter"
3  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4  *
5  * LICENSE NOTICES
6  *
7  * This file is part of "streamable kanji code filter and converter",
8  * which is distributed under the terms of GNU Lesser General Public
9  * License (version 2) as published by the Free Software Foundation.
10  *
11  * This software is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with "streamable kanji code filter and converter";
18  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19  * Suite 330, Boston, MA  02111-1307  USA
20  *
21  * The author of this file:
22  *
23  */
24 /*
25  * The source code included in this file was separated from mbfilter.c
26  * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
27  *
28  */
29 
30 #include "zend_bitset.h"
31 #include "mbfilter.h"
32 #include "mbfilter_utf16.h"
33 
34 #ifdef ZEND_INTRIN_AVX2_NATIVE
35 
36 /* We are building AVX2-only binary */
37 # include <immintrin.h>
38 # define mb_utf16be_to_wchar mb_utf16be_to_wchar_avx2
39 # define mb_utf16le_to_wchar mb_utf16le_to_wchar_avx2
40 # define mb_wchar_to_utf16be mb_wchar_to_utf16be_avx2
41 # define mb_wchar_to_utf16le mb_wchar_to_utf16le_avx2
42 
43 static size_t mb_utf16be_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
44 static void mb_wchar_to_utf16be_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
45 static size_t mb_utf16le_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
46 static void mb_wchar_to_utf16le_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
47 
48 #elif defined(ZEND_INTRIN_AVX2_RESOLVER)
49 
50 /* We are building binary which works with or without AVX2; whether or not to use
51  * AVX2-accelerated functions will be determined at runtime */
52 # include <immintrin.h>
53 # include "Zend/zend_cpuinfo.h"
54 
55 static size_t mb_utf16be_to_wchar_default(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
56 static void mb_wchar_to_utf16be_default(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
57 static size_t mb_utf16le_to_wchar_default(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
58 static void mb_wchar_to_utf16le_default(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
59 
60 # ifdef ZEND_INTRIN_AVX2_FUNC_PROTO
61 /* Dynamic linker will decide whether or not to use AVX2-based functions and
62  * resolve symbols accordingly */
63 
64 ZEND_INTRIN_AVX2_FUNC_DECL(size_t mb_utf16be_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state));
65 ZEND_INTRIN_AVX2_FUNC_DECL(void mb_wchar_to_utf16be_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end));
66 ZEND_INTRIN_AVX2_FUNC_DECL(size_t mb_utf16le_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state));
67 ZEND_INTRIN_AVX2_FUNC_DECL(void mb_wchar_to_utf16le_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end));
68 
69 size_t mb_utf16be_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) __attribute__((ifunc("resolve_utf16be_wchar")));
70 void mb_wchar_to_utf16be(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) __attribute__((ifunc("resolve_wchar_utf16be")));
71 size_t mb_utf16le_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) __attribute__((ifunc("resolve_utf16le_wchar")));
72 void mb_wchar_to_utf16le(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) __attribute__((ifunc("resolve_wchar_utf16le")));
73 
74 ZEND_NO_SANITIZE_ADDRESS
75 ZEND_ATTRIBUTE_UNUSED
resolve_utf16be_wchar(void)76 static mb_to_wchar_fn resolve_utf16be_wchar(void)
77 {
78 	return zend_cpu_supports_avx2() ? mb_utf16be_to_wchar_avx2 : mb_utf16be_to_wchar_default;
79 }
80 
81 ZEND_NO_SANITIZE_ADDRESS
82 ZEND_ATTRIBUTE_UNUSED
resolve_wchar_utf16be(void)83 static mb_from_wchar_fn resolve_wchar_utf16be(void)
84 {
85 	return zend_cpu_supports_avx2() ? mb_wchar_to_utf16be_avx2 : mb_wchar_to_utf16be_default;
86 }
87 
88 ZEND_NO_SANITIZE_ADDRESS
89 ZEND_ATTRIBUTE_UNUSED
resolve_utf16le_wchar(void)90 static mb_to_wchar_fn resolve_utf16le_wchar(void)
91 {
92 	return zend_cpu_supports_avx2() ? mb_utf16le_to_wchar_avx2 : mb_utf16le_to_wchar_default;
93 }
94 
95 ZEND_NO_SANITIZE_ADDRESS
96 ZEND_ATTRIBUTE_UNUSED
resolve_wchar_utf16le(void)97 static mb_from_wchar_fn resolve_wchar_utf16le(void)
98 {
99 	return zend_cpu_supports_avx2() ? mb_wchar_to_utf16le_avx2 : mb_wchar_to_utf16le_default;
100 }
101 
102 # else /* ZEND_INTRIN_AVX2_FUNC_PTR */
103 /* We are compiling for a target where the dynamic linker will not be able to
104  * resolve symbols according to whether the host supports AVX2 or not; so instead,
105  * we can make calls go through a function pointer and set the function pointer
106  * on module load */
107 
108 #ifdef HAVE_FUNC_ATTRIBUTE_TARGET
109 static size_t mb_utf16be_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) __attribute__((target("avx2")));
110 static void mb_wchar_to_utf16be_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) __attribute__((target("avx2")));
111 static size_t mb_utf16le_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) __attribute__((target("avx2")));
112 static void mb_wchar_to_utf16le_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) __attribute__((target("avx2")));
113 #else
114 static size_t mb_utf16be_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
115 static void mb_wchar_to_utf16be_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
116 static size_t mb_utf16le_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
117 static void mb_wchar_to_utf16le_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
118 #endif
119 
120 static mb_to_wchar_fn utf16be_to_wchar_ptr = NULL;
121 static mb_from_wchar_fn wchar_to_utf16be_ptr = NULL;
122 static mb_to_wchar_fn utf16le_to_wchar_ptr = NULL;
123 static mb_from_wchar_fn wchar_to_utf16le_ptr = NULL;
124 
mb_utf16be_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)125 static size_t mb_utf16be_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
126 {
127 	return utf16be_to_wchar_ptr(in, in_len, buf, bufsize, NULL);
128 }
129 
mb_wchar_to_utf16be(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)130 static void mb_wchar_to_utf16be(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
131 {
132 	wchar_to_utf16be_ptr(in, len, buf, end);
133 }
134 
mb_utf16le_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)135 static size_t mb_utf16le_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
136 {
137 	return utf16le_to_wchar_ptr(in, in_len, buf, bufsize, NULL);
138 }
139 
mb_wchar_to_utf16le(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)140 static void mb_wchar_to_utf16le(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
141 {
142 	wchar_to_utf16le_ptr(in, len, buf, end);
143 }
144 
init_convert_utf16(void)145 void init_convert_utf16(void)
146 {
147 	if (zend_cpu_supports_avx2()) {
148 		utf16be_to_wchar_ptr = mb_utf16be_to_wchar_avx2;
149 		wchar_to_utf16be_ptr = mb_wchar_to_utf16be_avx2;
150 		utf16le_to_wchar_ptr = mb_utf16le_to_wchar_avx2;
151 		wchar_to_utf16le_ptr = mb_wchar_to_utf16le_avx2;
152 	} else {
153 		utf16be_to_wchar_ptr = mb_utf16be_to_wchar_default;
154 		wchar_to_utf16be_ptr = mb_wchar_to_utf16be_default;
155 		utf16le_to_wchar_ptr = mb_utf16le_to_wchar_default;
156 		wchar_to_utf16le_ptr = mb_wchar_to_utf16le_default;
157 	}
158 }
159 # endif
160 
161 #else
162 
163 /* No AVX2 support */
164 # define mb_utf16be_to_wchar mb_utf16be_to_wchar_default
165 # define mb_utf16le_to_wchar mb_utf16le_to_wchar_default
166 # define mb_wchar_to_utf16be mb_wchar_to_utf16be_default
167 # define mb_wchar_to_utf16le mb_wchar_to_utf16le_default
168 
169 static size_t mb_utf16be_to_wchar_default(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
170 static void mb_wchar_to_utf16be_default(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
171 static size_t mb_utf16le_to_wchar_default(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
172 static void mb_wchar_to_utf16le_default(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
173 
174 #endif
175 
176 static int mbfl_filt_conv_utf16_wchar_flush(mbfl_convert_filter *filter);
177 static size_t mb_utf16_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
178 static zend_string* mb_cut_utf16(unsigned char *str, size_t from, size_t len, unsigned char *end);
179 static zend_string* mb_cut_utf16be(unsigned char *str, size_t from, size_t len, unsigned char *end);
180 static zend_string* mb_cut_utf16le(unsigned char *str, size_t from, size_t len, unsigned char *end);
181 
182 static const char *mbfl_encoding_utf16_aliases[] = {"utf16", NULL};
183 
184 const mbfl_encoding mbfl_encoding_utf16 = {
185 	mbfl_no_encoding_utf16,
186 	"UTF-16",
187 	"UTF-16",
188 	mbfl_encoding_utf16_aliases,
189 	NULL,
190 	0,
191 	&vtbl_utf16_wchar,
192 	&vtbl_wchar_utf16,
193 	mb_utf16_to_wchar,
194 	mb_wchar_to_utf16be,
195 	NULL,
196 	mb_cut_utf16
197 };
198 
199 const mbfl_encoding mbfl_encoding_utf16be = {
200 	mbfl_no_encoding_utf16be,
201 	"UTF-16BE",
202 	"UTF-16BE",
203 	NULL,
204 	NULL,
205 	0,
206 	&vtbl_utf16be_wchar,
207 	&vtbl_wchar_utf16be,
208 	mb_utf16be_to_wchar,
209 	mb_wchar_to_utf16be,
210 	NULL,
211 	mb_cut_utf16be
212 };
213 
214 const mbfl_encoding mbfl_encoding_utf16le = {
215 	mbfl_no_encoding_utf16le,
216 	"UTF-16LE",
217 	"UTF-16LE",
218 	NULL,
219 	NULL,
220 	0,
221 	&vtbl_utf16le_wchar,
222 	&vtbl_wchar_utf16le,
223 	mb_utf16le_to_wchar,
224 	mb_wchar_to_utf16le,
225 	NULL,
226 	mb_cut_utf16le
227 };
228 
229 const struct mbfl_convert_vtbl vtbl_utf16_wchar = {
230 	mbfl_no_encoding_utf16,
231 	mbfl_no_encoding_wchar,
232 	mbfl_filt_conv_common_ctor,
233 	NULL,
234 	mbfl_filt_conv_utf16_wchar,
235 	mbfl_filt_conv_utf16_wchar_flush,
236 	NULL,
237 };
238 
239 const struct mbfl_convert_vtbl vtbl_wchar_utf16 = {
240 	mbfl_no_encoding_wchar,
241 	mbfl_no_encoding_utf16,
242 	mbfl_filt_conv_common_ctor,
243 	NULL,
244 	mbfl_filt_conv_wchar_utf16be,
245 	mbfl_filt_conv_common_flush,
246 	NULL,
247 };
248 
249 const struct mbfl_convert_vtbl vtbl_utf16be_wchar = {
250 	mbfl_no_encoding_utf16be,
251 	mbfl_no_encoding_wchar,
252 	mbfl_filt_conv_common_ctor,
253 	NULL,
254 	mbfl_filt_conv_utf16be_wchar,
255 	mbfl_filt_conv_utf16_wchar_flush,
256 	NULL,
257 };
258 
259 const struct mbfl_convert_vtbl vtbl_wchar_utf16be = {
260 	mbfl_no_encoding_wchar,
261 	mbfl_no_encoding_utf16be,
262 	mbfl_filt_conv_common_ctor,
263 	NULL,
264 	mbfl_filt_conv_wchar_utf16be,
265 	mbfl_filt_conv_common_flush,
266 	NULL,
267 };
268 
269 const struct mbfl_convert_vtbl vtbl_utf16le_wchar = {
270 	mbfl_no_encoding_utf16le,
271 	mbfl_no_encoding_wchar,
272 	mbfl_filt_conv_common_ctor,
273 	NULL,
274 	mbfl_filt_conv_utf16le_wchar,
275 	mbfl_filt_conv_utf16_wchar_flush,
276 	NULL,
277 };
278 
279 const struct mbfl_convert_vtbl vtbl_wchar_utf16le = {
280 	mbfl_no_encoding_wchar,
281 	mbfl_no_encoding_utf16le,
282 	mbfl_filt_conv_common_ctor,
283 	NULL,
284 	mbfl_filt_conv_wchar_utf16le,
285 	mbfl_filt_conv_common_flush,
286 	NULL,
287 };
288 
289 #define CK(statement)	do { if ((statement) < 0) return (-1); } while (0)
290 
mbfl_filt_conv_utf16_wchar(int c,mbfl_convert_filter * filter)291 int mbfl_filt_conv_utf16_wchar(int c, mbfl_convert_filter *filter)
292 {
293 	/* Start with the assumption that the string is big-endian;
294 	 * If we find a little-endian BOM, then we will change that assumption */
295 	if (filter->status == 0) {
296 		filter->cache = c & 0xFF;
297 		filter->status = 1;
298 	} else {
299 		int n = (filter->cache << 8) | (c & 0xFF);
300 		filter->cache = filter->status = 0;
301 		if (n == 0xFFFE) {
302 			/* Switch to little-endian mode */
303 			filter->filter_function = mbfl_filt_conv_utf16le_wchar;
304 		} else {
305 			filter->filter_function = mbfl_filt_conv_utf16be_wchar;
306 			if (n >= 0xD800 && n <= 0xDBFF) {
307 				filter->cache = n & 0x3FF; /* Pick out 10 data bits */
308 				filter->status = 2;
309 				return 0;
310 			} else if (n >= 0xDC00 && n <= 0xDFFF) {
311 				/* This is wrong; second part of surrogate pair has come first */
312 				CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
313 			} else if (n != 0xFEFF) {
314 				CK((*filter->output_function)(n, filter->data));
315 			}
316 		}
317 	}
318 
319 	return 0;
320 }
321 
mbfl_filt_conv_utf16be_wchar(int c,mbfl_convert_filter * filter)322 int mbfl_filt_conv_utf16be_wchar(int c, mbfl_convert_filter *filter)
323 {
324 	int n;
325 
326 	switch (filter->status) {
327 	case 0: /* First byte */
328 		filter->cache = c & 0xFF;
329 		filter->status = 1;
330 		break;
331 
332 	case 1: /* Second byte */
333 		n = (filter->cache << 8) | (c & 0xFF);
334 		if (n >= 0xD800 && n <= 0xDBFF) {
335 			filter->cache = n & 0x3FF; /* Pick out 10 data bits */
336 			filter->status = 2;
337 		} else if (n >= 0xDC00 && n <= 0xDFFF) {
338 			/* This is wrong; second part of surrogate pair has come first */
339 			filter->status = 0;
340 			CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
341 		} else {
342 			filter->status = 0;
343 			CK((*filter->output_function)(n, filter->data));
344 		}
345 		break;
346 
347 	case 2: /* Second part of surrogate, first byte */
348 		filter->cache = (filter->cache << 8) | (c & 0xFF);
349 		filter->status = 3;
350 		break;
351 
352 	case 3: /* Second part of surrogate, second byte */
353 		n = ((filter->cache & 0xFF) << 8) | (c & 0xFF);
354 		if (n >= 0xD800 && n <= 0xDBFF) {
355 			/* Wrong; that's the first half of a surrogate pair, not the second */
356 			filter->cache = n & 0x3FF;
357 			filter->status = 2;
358 			CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
359 		} else if (n >= 0xDC00 && n <= 0xDFFF) {
360 			filter->status = 0;
361 			n = ((filter->cache & 0x3FF00) << 2) + (n & 0x3FF) + 0x10000;
362 			CK((*filter->output_function)(n, filter->data));
363 		} else {
364 			filter->status = 0;
365 			CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
366 			CK((*filter->output_function)(n, filter->data));
367 		}
368 	}
369 
370 	return 0;
371 }
372 
mbfl_filt_conv_wchar_utf16be(int c,mbfl_convert_filter * filter)373 int mbfl_filt_conv_wchar_utf16be(int c, mbfl_convert_filter *filter)
374 {
375 	int n;
376 
377 	if (c >= 0 && c < MBFL_WCSPLANE_UCS2MAX) {
378 		CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
379 		CK((*filter->output_function)(c & 0xff, filter->data));
380 	} else if (c >= MBFL_WCSPLANE_SUPMIN && c < MBFL_WCSPLANE_SUPMAX) {
381 		n = ((c >> 10) - 0x40) | 0xd800;
382 		CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
383 		CK((*filter->output_function)(n & 0xff, filter->data));
384 		n = (c & 0x3ff) | 0xdc00;
385 		CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
386 		CK((*filter->output_function)(n & 0xff, filter->data));
387 	} else {
388 		CK(mbfl_filt_conv_illegal_output(c, filter));
389 	}
390 
391 	return 0;
392 }
393 
mbfl_filt_conv_utf16le_wchar(int c,mbfl_convert_filter * filter)394 int mbfl_filt_conv_utf16le_wchar(int c, mbfl_convert_filter *filter)
395 {
396 	int n;
397 
398 	switch (filter->status) {
399 	case 0:
400 		filter->cache = c & 0xff;
401 		filter->status = 1;
402 		break;
403 
404 	case 1:
405 		if ((c & 0xfc) == 0xd8) {
406 			/* Looks like we have a surrogate pair here */
407 			filter->cache += ((c & 0x3) << 8);
408 			filter->status = 2;
409 		} else if ((c & 0xfc) == 0xdc) {
410 			/* This is wrong; the second part of the surrogate pair has come first */
411 			filter->status = 0;
412 			CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
413 		} else {
414 			filter->status = 0;
415 			CK((*filter->output_function)(filter->cache + ((c & 0xff) << 8), filter->data));
416 		}
417 		break;
418 
419 	case 2:
420 		filter->cache = (filter->cache << 10) + (c & 0xff);
421 		filter->status = 3;
422 		break;
423 
424 	case 3:
425 		n = (filter->cache & 0xFF) | ((c & 0xFF) << 8);
426 		if (n >= 0xD800 && n <= 0xDBFF) {
427 			/* We previously saw the first part of a surrogate pair and were
428 			 * expecting the second part; this is another first part */
429 			filter->cache = n & 0x3FF;
430 			filter->status = 2;
431 			CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
432 		} else if (n >= 0xDC00 && n <= 0xDFFF) {
433 			n = filter->cache + ((c & 0x3) << 8) + 0x10000;
434 			filter->status = 0;
435 			CK((*filter->output_function)(n, filter->data));
436 		} else {
437 			/* The first part of a surrogate pair was followed by some other codepoint
438 			 * which is not part of a surrogate pair at all */
439 			filter->status = 0;
440 			CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
441 			CK((*filter->output_function)(n, filter->data));
442 		}
443 		break;
444 	}
445 
446 	return 0;
447 }
448 
mbfl_filt_conv_wchar_utf16le(int c,mbfl_convert_filter * filter)449 int mbfl_filt_conv_wchar_utf16le(int c, mbfl_convert_filter *filter)
450 {
451 	int n;
452 
453 	if (c >= 0 && c < MBFL_WCSPLANE_UCS2MAX) {
454 		CK((*filter->output_function)(c & 0xff, filter->data));
455 		CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
456 	} else if (c >= MBFL_WCSPLANE_SUPMIN && c < MBFL_WCSPLANE_SUPMAX) {
457 		n = ((c >> 10) - 0x40) | 0xd800;
458 		CK((*filter->output_function)(n & 0xff, filter->data));
459 		CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
460 		n = (c & 0x3ff) | 0xdc00;
461 		CK((*filter->output_function)(n & 0xff, filter->data));
462 		CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
463 	} else {
464 		CK(mbfl_filt_conv_illegal_output(c, filter));
465 	}
466 
467 	return 0;
468 }
469 
mbfl_filt_conv_utf16_wchar_flush(mbfl_convert_filter * filter)470 static int mbfl_filt_conv_utf16_wchar_flush(mbfl_convert_filter *filter)
471 {
472 	if (filter->status) {
473 		/* Input string was truncated */
474 		filter->status = 0;
475 		CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
476 	}
477 
478 	if (filter->flush_function) {
479 		(*filter->flush_function)(filter->data);
480 	}
481 
482 	return 0;
483 }
484 
485 #define DETECTED_BE 1
486 #define DETECTED_LE 2
487 
mb_utf16_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)488 static size_t mb_utf16_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
489 {
490 	if (*state == DETECTED_BE) {
491 		return mb_utf16be_to_wchar(in, in_len, buf, bufsize, NULL);
492 	} else if (*state == DETECTED_LE) {
493 		return mb_utf16le_to_wchar(in, in_len, buf, bufsize, NULL);
494 	} else if (*in_len >= 2) {
495 		unsigned char *p = *in;
496 		unsigned char c1 = *p++;
497 		unsigned char c2 = *p++;
498 		uint16_t n = (c1 << 8) | c2;
499 
500 		if (n == 0xFFFE) {
501 			/* Little-endian BOM */
502 			*in = p;
503 			*in_len -= 2;
504 			*state = DETECTED_LE;
505 			return mb_utf16le_to_wchar(in, in_len, buf, bufsize, NULL);
506 		} if (n == 0xFEFF) {
507 			/* Big-endian BOM; don't send to output */
508 			*in = p;
509 			*in_len -= 2;
510 		}
511 	}
512 
513 	*state = DETECTED_BE;
514 	return mb_utf16be_to_wchar(in, in_len, buf, bufsize, NULL);
515 }
516 
mb_utf16be_to_wchar_default(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)517 static size_t mb_utf16be_to_wchar_default(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
518 {
519 	/* We only want to read 16-bit words out of `str`; any trailing byte will be handled at the end */
520 	unsigned char *p = *in, *e = p + (*in_len & ~1);
521 	/* Set `limit` to one less than the actual amount of space in the buffer; this is because
522 	 * on some iterations of the below loop, we might produce two output words */
523 	uint32_t *out = buf, *limit = buf + bufsize - 1;
524 
525 	while (p < e && out < limit) {
526 		unsigned char c1 = *p++;
527 		unsigned char c2 = *p++;
528 		uint16_t n = (c1 << 8) | c2;
529 
530 		if (n >= 0xD800 && n <= 0xDBFF) {
531 			/* Handle surrogate */
532 			if (p < e) {
533 				unsigned char c3 = *p++;
534 				unsigned char c4 = *p++;
535 				uint16_t n2 = (c3 << 8) | c4;
536 
537 				if (n2 >= 0xD800 && n2 <= 0xDBFF) {
538 					/* Wrong; that's the first half of a surrogate pair, when we were expecting the second */
539 					*out++ = MBFL_BAD_INPUT;
540 					p -= 2;
541 				} else if (n2 >= 0xDC00 && n2 <= 0xDFFF) {
542 					*out++ = (((n & 0x3FF) << 10) | (n2 & 0x3FF)) + 0x10000;
543 				} else {
544 					/* The first half of a surrogate pair was followed by a 'normal' codepoint */
545 					*out++ = MBFL_BAD_INPUT;
546 					*out++ = n2;
547 				}
548 			} else {
549 				*out++ = MBFL_BAD_INPUT;
550 			}
551 		} else if (n >= 0xDC00 && n <= 0xDFFF) {
552 			/* This is wrong; second part of surrogate pair has come first */
553 			*out++ = MBFL_BAD_INPUT;
554 		} else {
555 			*out++ = n;
556 		}
557 	}
558 
559 	if (p == e && (*in_len & 0x1) && out < limit) {
560 		/* There is an extra trailing byte (which shouldn't be there) */
561 		*out++ = MBFL_BAD_INPUT;
562 		p++;
563 	}
564 
565 	*in_len -= (p - *in);
566 	*in = p;
567 	return out - buf;
568 }
569 
mb_wchar_to_utf16be_default(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)570 static void mb_wchar_to_utf16be_default(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
571 {
572 	unsigned char *out, *limit;
573 	MB_CONVERT_BUF_LOAD(buf, out, limit);
574 	MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
575 
576 	while (len--) {
577 		uint32_t w = *in++;
578 
579 		if (w < MBFL_WCSPLANE_UCS2MAX) {
580 			out = mb_convert_buf_add2(out, (w >> 8) & 0xFF, w & 0xFF);
581 		} else if (w < MBFL_WCSPLANE_UTF32MAX) {
582 			uint16_t n1 = ((w >> 10) - 0x40) | 0xD800;
583 			uint16_t n2 = (w & 0x3FF) | 0xDC00;
584 			MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4);
585 			out = mb_convert_buf_add4(out, (n1 >> 8) & 0xFF, n1 & 0xFF, (n2 >> 8) & 0xFF, n2 & 0xFF);
586 		} else {
587 			MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_utf16be_default);
588 			MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
589 		}
590 	}
591 
592 	MB_CONVERT_BUF_STORE(buf, out, limit);
593 }
594 
mb_utf16le_to_wchar_default(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)595 static size_t mb_utf16le_to_wchar_default(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
596 {
597 	/* We only want to read 16-bit words out of `str`; any trailing byte will be handled at the end */
598 	unsigned char *p = *in, *e = p + (*in_len & ~1);
599 	/* Set `limit` to one less than the actual amount of space in the buffer; this is because
600 	 * on some iterations of the below loop, we might produce two output words */
601 	uint32_t *out = buf, *limit = buf + bufsize - 1;
602 
603 	while (p < e && out < limit) {
604 		unsigned char c1 = *p++;
605 		unsigned char c2 = *p++;
606 		uint16_t n = (c2 << 8) | c1;
607 
608 		if (n >= 0xD800 && n <= 0xDBFF) {
609 			/* Handle surrogate */
610 			if (p < e) {
611 				unsigned char c3 = *p++;
612 				unsigned char c4 = *p++;
613 				uint16_t n2 = (c4 << 8) | c3;
614 
615 				if (n2 >= 0xD800 && n2 <= 0xDBFF) {
616 					/* Wrong; that's the first half of a surrogate pair, when we were expecting the second */
617 					*out++ = MBFL_BAD_INPUT;
618 					p -= 2;
619 				} else if (n2 >= 0xDC00 && n2 <= 0xDFFF) {
620 					*out++ = (((n & 0x3FF) << 10) | (n2 & 0x3FF)) + 0x10000;
621 				} else {
622 					/* The first half of a surrogate pair was followed by a 'normal' codepoint */
623 					*out++ = MBFL_BAD_INPUT;
624 					*out++ = n2;
625 				}
626 			} else {
627 				*out++ = MBFL_BAD_INPUT;
628 			}
629 		} else if (n >= 0xDC00 && n <= 0xDFFF) {
630 			/* This is wrong; second part of surrogate pair has come first */
631 			*out++ = MBFL_BAD_INPUT;
632 		} else {
633 			*out++ = n;
634 		}
635 	}
636 
637 	if (p == e && (*in_len & 0x1) && out < limit) {
638 		/* There is an extra trailing byte (which shouldn't be there) */
639 		*out++ = MBFL_BAD_INPUT;
640 		p++;
641 	}
642 
643 	*in_len -= (p - *in);
644 	*in = p;
645 	return out - buf;
646 }
647 
mb_wchar_to_utf16le_default(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)648 static void mb_wchar_to_utf16le_default(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
649 {
650 	unsigned char *out, *limit;
651 	MB_CONVERT_BUF_LOAD(buf, out, limit);
652 	MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
653 
654 	while (len--) {
655 		uint32_t w = *in++;
656 
657 		if (w < MBFL_WCSPLANE_UCS2MAX) {
658 			out = mb_convert_buf_add2(out, w & 0xFF, (w >> 8) & 0xFF);
659 		} else if (w < MBFL_WCSPLANE_UTF32MAX) {
660 			uint16_t n1 = ((w >> 10) - 0x40) | 0xD800;
661 			uint16_t n2 = (w & 0x3FF) | 0xDC00;
662 			MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4);
663 			out = mb_convert_buf_add4(out, n1 & 0xFF, (n1 >> 8) & 0xFF, n2 & 0xFF, (n2 >> 8) & 0xFF);
664 		} else {
665 			MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_utf16le_default);
666 			MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
667 		}
668 	}
669 
670 	MB_CONVERT_BUF_STORE(buf, out, limit);
671 }
672 
673 #if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER)
674 
675 #ifdef ZEND_INTRIN_AVX2_FUNC_PROTO
mb_utf16be_to_wchar_avx2(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)676 size_t mb_utf16be_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
677 #else
678 static size_t mb_utf16be_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
679 #endif
680 {
681 	size_t len = *in_len;
682 
683 	if (len >= 32 && bufsize >= 16) {
684 		unsigned char *p = *in;
685 		uint32_t *out = buf;
686 
687 		/* Used to determine if a block of input bytes contains any surrogates */
688 		const __m256i _f8 = _mm256_set1_epi16(0xF8);
689 		const __m256i _d8 = _mm256_set1_epi16(0xD8);
690 		/* wchars must be in host byte order, which is little-endian on x86;
691 		 * Since we are reading in (big-endian) UTF-16BE, use this vector to swap byte order for output */
692 		const __m256i swap_bytes = _mm256_set_epi8(14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1);
693 
694 		do {
695 			__m256i operand = _mm256_loadu_si256((__m256i*)p); /* Load 32 bytes */
696 
697 			uint32_t surrogate_bitvec = _mm256_movemask_epi8(_mm256_cmpeq_epi16(_mm256_and_si256(operand, _f8), _d8));
698 			if (surrogate_bitvec == 0) {
699 				/* There are no surrogates among these 16 characters
700 				 * So converting the UTF-16 input to wchars is very simple; just extend each 16-bit value
701 				 * to a 32-bit value, filling in zero bits in the high end */
702 				operand = _mm256_shuffle_epi8(operand, swap_bytes);
703 				_mm256_storeu_si256((__m256i*)out, _mm256_cvtepu16_epi32(_mm256_castsi256_si128(operand)));
704 				_mm256_storeu_si256((__m256i*)(out + 8), _mm256_cvtepu16_epi32(_mm256_extracti128_si256(operand, 1)));
705 				out += 16;
706 				bufsize -= 16;
707 				p += sizeof(__m256i);
708 				len -= sizeof(__m256i);
709 			} else if ((surrogate_bitvec & 1) == 0) {
710 				/* Some prefix of the current block is non-surrogates; output those */
711 				uint8_t n_chars = zend_ulong_ntz(surrogate_bitvec) >> 1;
712 				operand = _mm256_shuffle_epi8(operand, swap_bytes);
713 				/* We know that the output buffer has at least 64 bytes of space available
714 				 * So don't bother trimming the output down to only include the non-surrogate prefix;
715 				 * rather, write out an entire block of 64 (or 32) bytes, then bump our output pointer
716 				 * forward just past the 'good part', so the 'bad part' will be overwritten on the next
717 				 * iteration of this loop */
718 				_mm256_storeu_si256((__m256i*)out, _mm256_cvtepu16_epi32(_mm256_castsi256_si128(operand)));
719 				if (n_chars > 8) {
720 					_mm256_storeu_si256((__m256i*)(out + 8), _mm256_cvtepu16_epi32(_mm256_extracti128_si256(operand, 1)));
721 				}
722 				out += n_chars;
723 				bufsize -= n_chars;
724 				p += n_chars * 2;
725 				len -= n_chars * 2;
726 			} else {
727 				/* Some prefix of the current block is (valid or invalid) surrogates
728 				 * Handle those using non-vectorized code */
729 				surrogate_bitvec = ~surrogate_bitvec;
730 				unsigned int n_chars = surrogate_bitvec ? zend_ulong_ntz(surrogate_bitvec) >> 1 : 16;
731 				do {
732 					unsigned char c1 = *p++;
733 					unsigned char c2 = *p++;
734 
735 					if (c1 & 0x4 || len < 4) {
736 						/* 2nd part of surrogate pair has come first OR string ended abruptly
737 						 * after 1st part of surrogate pair */
738 						*out++ = MBFL_BAD_INPUT;
739 						bufsize--;
740 						n_chars--;
741 						len -= 2;
742 						continue;
743 					}
744 
745 					uint16_t n = (c1 << 8) | c2;
746 					unsigned char c3 = *p++;
747 					unsigned char c4 = *p++;
748 
749 					if ((c3 & 0xFC) == 0xDC) {
750 						/* Valid surrogate pair */
751 						uint16_t n2 = (c3 << 8) | c4;
752 						*out++ = (((n & 0x3FF) << 10) | (n2 & 0x3FF)) + 0x10000;
753 						bufsize--;
754 						len -= 4;
755 #if defined(PHP_HAVE_BUILTIN_USUB_OVERFLOW) && PHP_HAVE_BUILTIN_USUB_OVERFLOW
756 						/* Subtracting 2 from `n_chars` will automatically set the CPU's flags;
757 						 * branch directly off the appropriate flag (CF on x86) rather than using
758 						 * another instruction (CMP on x86) to check for underflow */
759 						if (__builtin_usub_overflow(n_chars, 2, &n_chars)) {
760 							/* The last 2 bytes of this block and the first 2 bytes of the following
761 							 * block form a valid surrogate pair; now just make sure we don't get
762 							 * stuck in this loop due to underflow of the loop index */
763 							break;
764 						}
765 #else
766 						n_chars -= 2;
767 						if (n_chars == UINT_MAX) {
768 							break;
769 						}
770 #endif
771 					} else {
772 						/* First half of surrogate pair was followed by another first half
773 						 * OR by a non-surrogate character */
774 						*out++ = MBFL_BAD_INPUT;
775 						bufsize--;
776 						n_chars--;
777 						len -= 2;
778 						p -= 2; /* Back up so the last 2 bytes will be processed again */
779 					}
780 				} while (n_chars);
781 			}
782 		} while (len >= 32 && bufsize >= 16);
783 
784 		if (len && bufsize >= 4) {
785 			/* Finish up trailing bytes which don't fill a 32-byte block */
786 			out += mb_utf16be_to_wchar_default(&p, &len, out, bufsize, NULL);
787 		}
788 
789 		*in = p;
790 		*in_len = len;
791 		return out - buf;
792 	} else if (len) {
793 		return mb_utf16be_to_wchar_default(in, in_len, buf, bufsize, NULL);
794 	} else {
795 		return 0;
796 	}
797 }
798 
799 #ifdef ZEND_INTRIN_AVX2_FUNC_PROTO
mb_wchar_to_utf16be_avx2(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)800 void mb_wchar_to_utf16be_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
801 #else
802 static void mb_wchar_to_utf16be_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
803 #endif
804 {
805 	if (len >= 8) {
806 		unsigned char *out, *limit;
807 		MB_CONVERT_BUF_LOAD(buf, out, limit);
808 		MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
809 
810 		/* Used to find wchars which are outside the Unicode BMP (Basic Multilingual Plane) */
811 		const __m256i bmp_mask = _mm256_set1_epi32(0xFFFF);
812 		/* Used to extract 16 bits which we want from each of eight 32-bit values */
813 		const __m256i pack_8x16 = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 12, 13, 8, 9, 4, 5, 0, 1, 12, 13, 8, 9, 4, 5, 0, 1, -1, -1, -1, -1, -1, -1, -1, -1);
814 
815 		do {
816 			__m256i operand = _mm256_loadu_si256((__m256i*)in); /* Load 32 bytes */
817 
818 			uint32_t bmp_bitvec = _mm256_movemask_epi8(_mm256_cmpeq_epi32(_mm256_and_si256(operand, bmp_mask), operand));
819 			if (bmp_bitvec == 0xFFFFFFFF) {
820 				/* All eight wchars are in the BMP
821 				 * Shuffle bytes around to get the 16 bytes we want into the low 16 bytes of YMM register
822 				 * (which is equivalent to an XMM register) */
823 				operand = _mm256_shuffle_epi8(operand, pack_8x16);
824 				__m256i operand2 = _mm256_permute2x128_si256(operand, operand, 1);
825 				operand = _mm256_alignr_epi8(operand2, operand, 8);
826 				_mm_storeu_si128((__m128i*)out, _mm256_castsi256_si128(operand)); /* Store 16 bytes */
827 				out += 16;
828 				len -= 8;
829 				in += 8;
830 			} else if (bmp_bitvec & 1) {
831 				/* Some prefix of this block are codepoints in the BMP */
832 				unsigned int n_bytes = zend_ulong_ntz(~bmp_bitvec);
833 				operand = _mm256_shuffle_epi8(operand, pack_8x16);
834 				__m256i operand2 = _mm256_permute2x128_si256(operand, operand, 1);
835 				operand = _mm256_alignr_epi8(operand2, operand, 8);
836 				/* Store 16 bytes, but bump output pointer forward just past the 'good part',
837 				 * so the 'bad part' will be overwritten on the next iteration of this loop */
838 				_mm_storeu_si128((__m128i*)out, _mm256_castsi256_si128(operand));
839 				out += n_bytes >> 1;
840 				len -= n_bytes >> 2;
841 				in += n_bytes >> 2;
842 			} else {
843 				/* Some prefix of this block is codepoints outside the BMP OR error markers
844 				 * Handle them using non-vectorized code */
845 				unsigned int n_words = bmp_bitvec ? zend_ulong_ntz(bmp_bitvec) >> 2 : 8;
846 				do {
847 					uint32_t w = *in++;
848 					n_words--;
849 					len--;
850 
851 					if (w < MBFL_WCSPLANE_UTF32MAX) {
852 						uint16_t n1 = ((w >> 10) - 0x40) | 0xD800;
853 						uint16_t n2 = (w & 0x3FF) | 0xDC00;
854 						MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4);
855 						out = mb_convert_buf_add4(out, (n1 >> 8) & 0xFF, n1 & 0xFF, (n2 >> 8) & 0xFF, n2 & 0xFF);
856 					} else {
857 						MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_utf16be_default);
858 						MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
859 					}
860 				} while (n_words);
861 			}
862 		} while (len >= 8);
863 
864 		MB_CONVERT_BUF_STORE(buf, out, limit);
865 	}
866 
867 	if (len) {
868 		mb_wchar_to_utf16be_default(in, len, buf, end);
869 	}
870 }
871 
872 #ifdef ZEND_INTRIN_AVX2_FUNC_PROTO
mb_utf16le_to_wchar_avx2(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)873 size_t mb_utf16le_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
874 #else
875 static size_t mb_utf16le_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
876 #endif
877 {
878 	/* Most of this function is the same as `mb_utf16be_to_wchar_avx2`, above;
879 	 * See it for more detailed code comments */
880 
881 	size_t len = *in_len;
882 
883 	if (len >= 32 && bufsize >= 16) {
884 		unsigned char *p = *in;
885 		uint32_t *out = buf;
886 
887 		const __m256i _f8 = _mm256_set1_epi16(0xF800);
888 		const __m256i _d8 = _mm256_set1_epi16(0xD800);
889 
890 		do {
891 			__m256i operand = _mm256_loadu_si256((__m256i*)p);
892 
893 			uint32_t surrogate_bitvec = _mm256_movemask_epi8(_mm256_cmpeq_epi16(_mm256_and_si256(operand, _f8), _d8));
894 			if (surrogate_bitvec == 0) {
895 				/* There are no surrogates among these 16 characters */
896 				_mm256_storeu_si256((__m256i*)out, _mm256_cvtepu16_epi32(_mm256_castsi256_si128(operand)));
897 				_mm256_storeu_si256((__m256i*)(out + 8), _mm256_cvtepu16_epi32(_mm256_extracti128_si256(operand, 1)));
898 				out += 16;
899 				bufsize -= 16;
900 				p += sizeof(__m256i);
901 				len -= sizeof(__m256i);
902 			} else if ((surrogate_bitvec & 1) == 0) {
903 				/* Some prefix of the current block is non-surrogates */
904 				uint8_t n_chars = zend_ulong_ntz(surrogate_bitvec) >> 1;
905 				_mm256_storeu_si256((__m256i*)out, _mm256_cvtepu16_epi32(_mm256_castsi256_si128(operand)));
906 				if (n_chars > 8) {
907 					_mm256_storeu_si256((__m256i*)(out + 8), _mm256_cvtepu16_epi32(_mm256_extracti128_si256(operand, 1)));
908 				}
909 				out += n_chars;
910 				bufsize -= n_chars;
911 				p += n_chars * 2;
912 				len -= n_chars * 2;
913 			} else {
914 				/* Some prefix of the current block is (valid or invalid) surrogates */
915 				surrogate_bitvec = ~surrogate_bitvec;
916 				unsigned int n_chars = surrogate_bitvec ? zend_ulong_ntz(surrogate_bitvec) >> 1 : 16;
917 				do {
918 					unsigned char c1 = *p++;
919 					unsigned char c2 = *p++;
920 
921 					if (c2 & 0x4 || len < 4) {
922 						/* 2nd part of surrogate pair has come first OR string ended abruptly
923 						 * after 1st part of surrogate pair */
924 						*out++ = MBFL_BAD_INPUT;
925 						bufsize--;
926 						n_chars--;
927 						len -= 2;
928 						continue;
929 					}
930 
931 					uint16_t n = (c2 << 8) | c1;
932 					unsigned char c3 = *p++;
933 					unsigned char c4 = *p++;
934 
935 					if ((c4 & 0xFC) == 0xDC) {
936 						/* Valid surrogate pair */
937 						uint16_t n2 = (c4 << 8) | c3;
938 						*out++ = (((n & 0x3FF) << 10) | (n2 & 0x3FF)) + 0x10000;
939 						bufsize--;
940 						len -= 4;
941 #if defined(PHP_HAVE_BUILTIN_USUB_OVERFLOW) && PHP_HAVE_BUILTIN_USUB_OVERFLOW
942 						if (__builtin_usub_overflow(n_chars, 2, &n_chars)) {
943 							break;
944 						}
945 #else
946 						n_chars -= 2;
947 						if (n_chars == UINT_MAX) {
948 							break;
949 						}
950 #endif
951 					} else {
952 						/* First half of surrogate pair was followed by another first half
953 						 * OR by a non-surrogate character */
954 						*out++ = MBFL_BAD_INPUT;
955 						bufsize--;
956 						n_chars--;
957 						len -= 2;
958 						p -= 2; /* Back up so the last 2 bytes will be processed again */
959 					}
960 				} while (n_chars);
961 			}
962 		} while (len >= 32 && bufsize >= 16);
963 
964 		if (len && bufsize >= 4) {
965 			out += mb_utf16le_to_wchar_default(&p, &len, out, bufsize, NULL);
966 		}
967 
968 		*in = p;
969 		*in_len = len;
970 		return out - buf;
971 	} else if (len) {
972 		return mb_utf16le_to_wchar_default(in, in_len, buf, bufsize, NULL);
973 	} else {
974 		return 0;
975 	}
976 }
977 
978 #ifdef ZEND_INTRIN_AVX2_FUNC_PROTO
mb_wchar_to_utf16le_avx2(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)979 void mb_wchar_to_utf16le_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
980 #else
981 static void mb_wchar_to_utf16le_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
982 #endif
983 {
984 	if (len >= 8) {
985 		unsigned char *out, *limit;
986 		MB_CONVERT_BUF_LOAD(buf, out, limit);
987 		MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
988 
989 		/* Used to find wchars which are outside the Unicode BMP (Basic Multilingual Plane) */
990 		const __m256i bmp_mask = _mm256_set1_epi32(0xFFFF);
991 		/* Used to extract 16 bits which we want from each of eight 32-bit values */
992 		const __m256i pack_8x16 = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 13, 12, 9, 8, 5, 4, 1, 0, 13, 12, 9, 8, 5, 4, 1, 0, -1, -1, -1, -1, -1, -1, -1, -1);
993 
994 		do {
995 			__m256i operand = _mm256_loadu_si256((__m256i*)in);
996 
997 			uint32_t bmp_bitvec = _mm256_movemask_epi8(_mm256_cmpeq_epi32(_mm256_and_si256(operand, bmp_mask), operand));
998 			if (bmp_bitvec == 0xFFFFFFFF) {
999 				/* All eight wchars are in the BMP
1000 				 * Shuffle bytes around to get the 16 bytes we want into the low 16 bytes of YMM register
1001 				 * (which is equivalent to an XMM register) */
1002 				operand = _mm256_shuffle_epi8(operand, pack_8x16);
1003 				__m256i operand2 = _mm256_permute2x128_si256(operand, operand, 1);
1004 				operand = _mm256_alignr_epi8(operand2, operand, 8);
1005 				_mm_storeu_si128((__m128i*)out, _mm256_castsi256_si128(operand));
1006 				out += 16;
1007 				len -= 8;
1008 				in += 8;
1009 			} else if (bmp_bitvec & 1) {
1010 				/* Some prefix of this block are codepoints in the BMP */
1011 				unsigned int n_bytes = zend_ulong_ntz(~bmp_bitvec);
1012 				operand = _mm256_shuffle_epi8(operand, pack_8x16);
1013 				__m256i operand2 = _mm256_permute2x128_si256(operand, operand, 1);
1014 				operand = _mm256_alignr_epi8(operand2, operand, 8);
1015 				_mm_storeu_si128((__m128i*)out, _mm256_castsi256_si128(operand));
1016 				out += n_bytes >> 1;
1017 				len -= n_bytes >> 2;
1018 				in += n_bytes >> 2;
1019 			} else {
1020 				/* Some prefix of this block is codepoints outside the BMP OR error markers */
1021 				unsigned int n_words = bmp_bitvec ? zend_ulong_ntz(bmp_bitvec) >> 2 : 8;
1022 				do {
1023 					uint32_t w = *in++;
1024 					n_words--;
1025 					len--;
1026 
1027 					if (w < MBFL_WCSPLANE_UTF32MAX) {
1028 						uint16_t n1 = ((w >> 10) - 0x40) | 0xD800;
1029 						uint16_t n2 = (w & 0x3FF) | 0xDC00;
1030 						MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4);
1031 						out = mb_convert_buf_add4(out, n1 & 0xFF, (n1 >> 8) & 0xFF, n2 & 0xFF, (n2 >> 8) & 0xFF);
1032 					} else {
1033 						MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_utf16le_default);
1034 						MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
1035 					}
1036 				} while (n_words);
1037 			}
1038 		} while (len >= 8);
1039 
1040 		MB_CONVERT_BUF_STORE(buf, out, limit);
1041 	}
1042 
1043 	if (len) {
1044 		mb_wchar_to_utf16le_default(in, len, buf, end);
1045 	}
1046 }
1047 
1048 #endif /* defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER) */
1049 
mb_cut_utf16be(unsigned char * str,size_t from,size_t len,unsigned char * end)1050 static zend_string* mb_cut_utf16be(unsigned char *str, size_t from, size_t len, unsigned char *end)
1051 {
1052 	if (len > end - (str + from)) {
1053 		len = end - (str + from);
1054 	}
1055 	from &= ~1;
1056 	len &= ~1;
1057 	unsigned char *start = str + from;
1058 	if (len < 2 || (end - start) < 2) {
1059 		return zend_empty_string;
1060 	}
1061 	/* Check if 1st codepoint is 2nd part of surrogate pair */
1062 	if (from > 0) {
1063 		uint32_t start_cp = (*start << 8) + *(start + 1);
1064 		if (start_cp >= 0xDC00 && start_cp <= 0xDFFF) {
1065 			uint32_t preceding_cp = (*(start - 2) << 8) + *(start - 1);
1066 			if (preceding_cp >= 0xD800 && preceding_cp <= 0xDBFF) {
1067 				from -= 2;
1068 			}
1069 		}
1070 	}
1071 	/* Same for ending cut point */
1072 	unsigned char *_end = start + len;
1073 	if (_end > end) {
1074 		_end = end;
1075 	}
1076 	uint32_t ending_cp = (*(_end - 2) << 8) + *(_end - 1);
1077 	if (ending_cp >= 0xD800 && ending_cp <= 0xDBFF) {
1078 		_end -= 2;
1079 	}
1080 	return zend_string_init_fast((char*)start, _end - start);
1081 }
1082 
mb_cut_utf16le(unsigned char * str,size_t from,size_t len,unsigned char * end)1083 static zend_string* mb_cut_utf16le(unsigned char *str, size_t from, size_t len, unsigned char *end)
1084 {
1085 	if (len > end - (str + from)) {
1086 		len = end - (str + from);
1087 	}
1088 	from &= ~1;
1089 	len &= ~1;
1090 	unsigned char *start = str + from;
1091 	if (len < 2 || (end - start) < 2) {
1092 		return zend_empty_string;
1093 	}
1094 	/* Check if 1st codepoint is 2nd part of surrogate pair */
1095 	if (from > 0) {
1096 		uint32_t start_cp = (*(start + 1) << 8) + *start;
1097 		if (start_cp >= 0xDC00 && start_cp <= 0xDFFF) {
1098 			uint32_t preceding_cp = (*(start - 1) << 8) + *(start - 2);
1099 			if (preceding_cp >= 0xD800 && preceding_cp <= 0xDBFF) {
1100 				from -= 2;
1101 			}
1102 		}
1103 	}
1104 	/* Same for ending cut point */
1105 	unsigned char *_end = start + len;
1106 	if (_end > end) {
1107 		_end = end;
1108 	}
1109 	uint32_t ending_cp = (*(_end - 1) << 8) + *(_end - 2);
1110 	if (ending_cp >= 0xD800 && ending_cp <= 0xDBFF) {
1111 		_end -= 2;
1112 	}
1113 	return zend_string_init_fast((char*)start, _end - start);
1114 }
1115 
mb_cut_utf16(unsigned char * str,size_t from,size_t len,unsigned char * end)1116 static zend_string* mb_cut_utf16(unsigned char *str, size_t from, size_t len, unsigned char *end)
1117 {
1118 	if (len < 2 || (end - str) < 2) {
1119 		return zend_empty_string;
1120 	}
1121 	uint32_t cp = (*str << 8) + *(str + 1);
1122 	if (cp == 0xFFFE) {
1123 		/* Little-endian BOM */
1124 		if (from < 2) {
1125 			from = 2;
1126 		}
1127 		return mb_cut_utf16le(str, from, len, end);
1128 	} else {
1129 		if (cp == 0xFEFF && from < 2) {
1130 			from = 2;
1131 		}
1132 		return mb_cut_utf16be(str, from, len, end);
1133 	}
1134 }
1135