1 /*
2 * "streamable kanji code filter and converter"
3 * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4 *
5 * LICENSE NOTICES
6 *
7 * This file is part of "streamable kanji code filter and converter",
8 * which is distributed under the terms of GNU Lesser General Public
9 * License (version 2) as published by the Free Software Foundation.
10 *
11 * This software is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with "streamable kanji code filter and converter";
18 * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19 * Suite 330, Boston, MA 02111-1307 USA
20 *
21 * The author of this file:
22 *
23 */
24 /*
25 * The source code included in this file was separated from mbfilter.c
26 * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
27 *
28 */
29
30 #include "zend_bitset.h"
31 #include "mbfilter.h"
32 #include "mbfilter_utf16.h"
33
34 #ifdef ZEND_INTRIN_AVX2_NATIVE
35
36 /* We are building AVX2-only binary */
37 # include <immintrin.h>
38 # define mb_utf16be_to_wchar mb_utf16be_to_wchar_avx2
39 # define mb_utf16le_to_wchar mb_utf16le_to_wchar_avx2
40 # define mb_wchar_to_utf16be mb_wchar_to_utf16be_avx2
41 # define mb_wchar_to_utf16le mb_wchar_to_utf16le_avx2
42
43 static size_t mb_utf16be_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
44 static void mb_wchar_to_utf16be_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
45 static size_t mb_utf16le_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
46 static void mb_wchar_to_utf16le_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
47
48 #elif defined(ZEND_INTRIN_AVX2_RESOLVER)
49
50 /* We are building binary which works with or without AVX2; whether or not to use
51 * AVX2-accelerated functions will be determined at runtime */
52 # include <immintrin.h>
53 # include "Zend/zend_cpuinfo.h"
54
55 static size_t mb_utf16be_to_wchar_default(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
56 static void mb_wchar_to_utf16be_default(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
57 static size_t mb_utf16le_to_wchar_default(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
58 static void mb_wchar_to_utf16le_default(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
59
60 # ifdef ZEND_INTRIN_AVX2_FUNC_PROTO
61 /* Dynamic linker will decide whether or not to use AVX2-based functions and
62 * resolve symbols accordingly */
63
64 ZEND_INTRIN_AVX2_FUNC_DECL(size_t mb_utf16be_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state));
65 ZEND_INTRIN_AVX2_FUNC_DECL(void mb_wchar_to_utf16be_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end));
66 ZEND_INTRIN_AVX2_FUNC_DECL(size_t mb_utf16le_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state));
67 ZEND_INTRIN_AVX2_FUNC_DECL(void mb_wchar_to_utf16le_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end));
68
69 size_t mb_utf16be_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) __attribute__((ifunc("resolve_utf16be_wchar")));
70 void mb_wchar_to_utf16be(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) __attribute__((ifunc("resolve_wchar_utf16be")));
71 size_t mb_utf16le_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) __attribute__((ifunc("resolve_utf16le_wchar")));
72 void mb_wchar_to_utf16le(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) __attribute__((ifunc("resolve_wchar_utf16le")));
73
74 ZEND_NO_SANITIZE_ADDRESS
75 ZEND_ATTRIBUTE_UNUSED
resolve_utf16be_wchar(void)76 static mb_to_wchar_fn resolve_utf16be_wchar(void)
77 {
78 return zend_cpu_supports_avx2() ? mb_utf16be_to_wchar_avx2 : mb_utf16be_to_wchar_default;
79 }
80
81 ZEND_NO_SANITIZE_ADDRESS
82 ZEND_ATTRIBUTE_UNUSED
resolve_wchar_utf16be(void)83 static mb_from_wchar_fn resolve_wchar_utf16be(void)
84 {
85 return zend_cpu_supports_avx2() ? mb_wchar_to_utf16be_avx2 : mb_wchar_to_utf16be_default;
86 }
87
88 ZEND_NO_SANITIZE_ADDRESS
89 ZEND_ATTRIBUTE_UNUSED
resolve_utf16le_wchar(void)90 static mb_to_wchar_fn resolve_utf16le_wchar(void)
91 {
92 return zend_cpu_supports_avx2() ? mb_utf16le_to_wchar_avx2 : mb_utf16le_to_wchar_default;
93 }
94
95 ZEND_NO_SANITIZE_ADDRESS
96 ZEND_ATTRIBUTE_UNUSED
resolve_wchar_utf16le(void)97 static mb_from_wchar_fn resolve_wchar_utf16le(void)
98 {
99 return zend_cpu_supports_avx2() ? mb_wchar_to_utf16le_avx2 : mb_wchar_to_utf16le_default;
100 }
101
102 # else /* ZEND_INTRIN_AVX2_FUNC_PTR */
103 /* We are compiling for a target where the dynamic linker will not be able to
104 * resolve symbols according to whether the host supports AVX2 or not; so instead,
105 * we can make calls go through a function pointer and set the function pointer
106 * on module load */
107
108 #ifdef HAVE_FUNC_ATTRIBUTE_TARGET
109 static size_t mb_utf16be_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) __attribute__((target("avx2")));
110 static void mb_wchar_to_utf16be_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) __attribute__((target("avx2")));
111 static size_t mb_utf16le_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) __attribute__((target("avx2")));
112 static void mb_wchar_to_utf16le_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) __attribute__((target("avx2")));
113 #else
114 static size_t mb_utf16be_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
115 static void mb_wchar_to_utf16be_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
116 static size_t mb_utf16le_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
117 static void mb_wchar_to_utf16le_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
118 #endif
119
120 static mb_to_wchar_fn utf16be_to_wchar_ptr = NULL;
121 static mb_from_wchar_fn wchar_to_utf16be_ptr = NULL;
122 static mb_to_wchar_fn utf16le_to_wchar_ptr = NULL;
123 static mb_from_wchar_fn wchar_to_utf16le_ptr = NULL;
124
mb_utf16be_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)125 static size_t mb_utf16be_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
126 {
127 return utf16be_to_wchar_ptr(in, in_len, buf, bufsize, NULL);
128 }
129
mb_wchar_to_utf16be(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)130 static void mb_wchar_to_utf16be(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
131 {
132 wchar_to_utf16be_ptr(in, len, buf, end);
133 }
134
mb_utf16le_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)135 static size_t mb_utf16le_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
136 {
137 return utf16le_to_wchar_ptr(in, in_len, buf, bufsize, NULL);
138 }
139
mb_wchar_to_utf16le(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)140 static void mb_wchar_to_utf16le(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
141 {
142 wchar_to_utf16le_ptr(in, len, buf, end);
143 }
144
init_convert_utf16(void)145 void init_convert_utf16(void)
146 {
147 if (zend_cpu_supports_avx2()) {
148 utf16be_to_wchar_ptr = mb_utf16be_to_wchar_avx2;
149 wchar_to_utf16be_ptr = mb_wchar_to_utf16be_avx2;
150 utf16le_to_wchar_ptr = mb_utf16le_to_wchar_avx2;
151 wchar_to_utf16le_ptr = mb_wchar_to_utf16le_avx2;
152 } else {
153 utf16be_to_wchar_ptr = mb_utf16be_to_wchar_default;
154 wchar_to_utf16be_ptr = mb_wchar_to_utf16be_default;
155 utf16le_to_wchar_ptr = mb_utf16le_to_wchar_default;
156 wchar_to_utf16le_ptr = mb_wchar_to_utf16le_default;
157 }
158 }
159 # endif
160
161 #else
162
163 /* No AVX2 support */
164 # define mb_utf16be_to_wchar mb_utf16be_to_wchar_default
165 # define mb_utf16le_to_wchar mb_utf16le_to_wchar_default
166 # define mb_wchar_to_utf16be mb_wchar_to_utf16be_default
167 # define mb_wchar_to_utf16le mb_wchar_to_utf16le_default
168
169 static size_t mb_utf16be_to_wchar_default(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
170 static void mb_wchar_to_utf16be_default(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
171 static size_t mb_utf16le_to_wchar_default(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
172 static void mb_wchar_to_utf16le_default(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
173
174 #endif
175
176 static int mbfl_filt_conv_utf16_wchar_flush(mbfl_convert_filter *filter);
177 static size_t mb_utf16_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
178
179 static const char *mbfl_encoding_utf16_aliases[] = {"utf16", NULL};
180
181 const mbfl_encoding mbfl_encoding_utf16 = {
182 mbfl_no_encoding_utf16,
183 "UTF-16",
184 "UTF-16",
185 mbfl_encoding_utf16_aliases,
186 NULL,
187 0,
188 &vtbl_utf16_wchar,
189 &vtbl_wchar_utf16,
190 mb_utf16_to_wchar,
191 mb_wchar_to_utf16be,
192 NULL
193 };
194
195 const mbfl_encoding mbfl_encoding_utf16be = {
196 mbfl_no_encoding_utf16be,
197 "UTF-16BE",
198 "UTF-16BE",
199 NULL,
200 NULL,
201 0,
202 &vtbl_utf16be_wchar,
203 &vtbl_wchar_utf16be,
204 mb_utf16be_to_wchar,
205 mb_wchar_to_utf16be,
206 NULL
207 };
208
209 const mbfl_encoding mbfl_encoding_utf16le = {
210 mbfl_no_encoding_utf16le,
211 "UTF-16LE",
212 "UTF-16LE",
213 NULL,
214 NULL,
215 0,
216 &vtbl_utf16le_wchar,
217 &vtbl_wchar_utf16le,
218 mb_utf16le_to_wchar,
219 mb_wchar_to_utf16le,
220 NULL
221 };
222
223 const struct mbfl_convert_vtbl vtbl_utf16_wchar = {
224 mbfl_no_encoding_utf16,
225 mbfl_no_encoding_wchar,
226 mbfl_filt_conv_common_ctor,
227 NULL,
228 mbfl_filt_conv_utf16_wchar,
229 mbfl_filt_conv_utf16_wchar_flush,
230 NULL,
231 };
232
233 const struct mbfl_convert_vtbl vtbl_wchar_utf16 = {
234 mbfl_no_encoding_wchar,
235 mbfl_no_encoding_utf16,
236 mbfl_filt_conv_common_ctor,
237 NULL,
238 mbfl_filt_conv_wchar_utf16be,
239 mbfl_filt_conv_common_flush,
240 NULL,
241 };
242
243 const struct mbfl_convert_vtbl vtbl_utf16be_wchar = {
244 mbfl_no_encoding_utf16be,
245 mbfl_no_encoding_wchar,
246 mbfl_filt_conv_common_ctor,
247 NULL,
248 mbfl_filt_conv_utf16be_wchar,
249 mbfl_filt_conv_utf16_wchar_flush,
250 NULL,
251 };
252
253 const struct mbfl_convert_vtbl vtbl_wchar_utf16be = {
254 mbfl_no_encoding_wchar,
255 mbfl_no_encoding_utf16be,
256 mbfl_filt_conv_common_ctor,
257 NULL,
258 mbfl_filt_conv_wchar_utf16be,
259 mbfl_filt_conv_common_flush,
260 NULL,
261 };
262
263 const struct mbfl_convert_vtbl vtbl_utf16le_wchar = {
264 mbfl_no_encoding_utf16le,
265 mbfl_no_encoding_wchar,
266 mbfl_filt_conv_common_ctor,
267 NULL,
268 mbfl_filt_conv_utf16le_wchar,
269 mbfl_filt_conv_utf16_wchar_flush,
270 NULL,
271 };
272
273 const struct mbfl_convert_vtbl vtbl_wchar_utf16le = {
274 mbfl_no_encoding_wchar,
275 mbfl_no_encoding_utf16le,
276 mbfl_filt_conv_common_ctor,
277 NULL,
278 mbfl_filt_conv_wchar_utf16le,
279 mbfl_filt_conv_common_flush,
280 NULL,
281 };
282
283 #define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
284
mbfl_filt_conv_utf16_wchar(int c,mbfl_convert_filter * filter)285 int mbfl_filt_conv_utf16_wchar(int c, mbfl_convert_filter *filter)
286 {
287 /* Start with the assumption that the string is big-endian;
288 * If we find a little-endian BOM, then we will change that assumption */
289 if (filter->status == 0) {
290 filter->cache = c & 0xFF;
291 filter->status = 1;
292 } else {
293 int n = (filter->cache << 8) | (c & 0xFF);
294 filter->cache = filter->status = 0;
295 if (n == 0xFFFE) {
296 /* Switch to little-endian mode */
297 filter->filter_function = mbfl_filt_conv_utf16le_wchar;
298 } else {
299 filter->filter_function = mbfl_filt_conv_utf16be_wchar;
300 if (n >= 0xD800 && n <= 0xDBFF) {
301 filter->cache = n & 0x3FF; /* Pick out 10 data bits */
302 filter->status = 2;
303 return 0;
304 } else if (n >= 0xDC00 && n <= 0xDFFF) {
305 /* This is wrong; second part of surrogate pair has come first */
306 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
307 } else if (n != 0xFEFF) {
308 CK((*filter->output_function)(n, filter->data));
309 }
310 }
311 }
312
313 return 0;
314 }
315
mbfl_filt_conv_utf16be_wchar(int c,mbfl_convert_filter * filter)316 int mbfl_filt_conv_utf16be_wchar(int c, mbfl_convert_filter *filter)
317 {
318 int n;
319
320 switch (filter->status) {
321 case 0: /* First byte */
322 filter->cache = c & 0xFF;
323 filter->status = 1;
324 break;
325
326 case 1: /* Second byte */
327 n = (filter->cache << 8) | (c & 0xFF);
328 if (n >= 0xD800 && n <= 0xDBFF) {
329 filter->cache = n & 0x3FF; /* Pick out 10 data bits */
330 filter->status = 2;
331 } else if (n >= 0xDC00 && n <= 0xDFFF) {
332 /* This is wrong; second part of surrogate pair has come first */
333 filter->status = 0;
334 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
335 } else {
336 filter->status = 0;
337 CK((*filter->output_function)(n, filter->data));
338 }
339 break;
340
341 case 2: /* Second part of surrogate, first byte */
342 filter->cache = (filter->cache << 8) | (c & 0xFF);
343 filter->status = 3;
344 break;
345
346 case 3: /* Second part of surrogate, second byte */
347 n = ((filter->cache & 0xFF) << 8) | (c & 0xFF);
348 if (n >= 0xD800 && n <= 0xDBFF) {
349 /* Wrong; that's the first half of a surrogate pair, not the second */
350 filter->cache = n & 0x3FF;
351 filter->status = 2;
352 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
353 } else if (n >= 0xDC00 && n <= 0xDFFF) {
354 filter->status = 0;
355 n = ((filter->cache & 0x3FF00) << 2) + (n & 0x3FF) + 0x10000;
356 CK((*filter->output_function)(n, filter->data));
357 } else {
358 filter->status = 0;
359 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
360 CK((*filter->output_function)(n, filter->data));
361 }
362 }
363
364 return 0;
365 }
366
mbfl_filt_conv_wchar_utf16be(int c,mbfl_convert_filter * filter)367 int mbfl_filt_conv_wchar_utf16be(int c, mbfl_convert_filter *filter)
368 {
369 int n;
370
371 if (c >= 0 && c < MBFL_WCSPLANE_UCS2MAX) {
372 CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
373 CK((*filter->output_function)(c & 0xff, filter->data));
374 } else if (c >= MBFL_WCSPLANE_SUPMIN && c < MBFL_WCSPLANE_SUPMAX) {
375 n = ((c >> 10) - 0x40) | 0xd800;
376 CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
377 CK((*filter->output_function)(n & 0xff, filter->data));
378 n = (c & 0x3ff) | 0xdc00;
379 CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
380 CK((*filter->output_function)(n & 0xff, filter->data));
381 } else {
382 CK(mbfl_filt_conv_illegal_output(c, filter));
383 }
384
385 return 0;
386 }
387
mbfl_filt_conv_utf16le_wchar(int c,mbfl_convert_filter * filter)388 int mbfl_filt_conv_utf16le_wchar(int c, mbfl_convert_filter *filter)
389 {
390 int n;
391
392 switch (filter->status) {
393 case 0:
394 filter->cache = c & 0xff;
395 filter->status = 1;
396 break;
397
398 case 1:
399 if ((c & 0xfc) == 0xd8) {
400 /* Looks like we have a surrogate pair here */
401 filter->cache += ((c & 0x3) << 8);
402 filter->status = 2;
403 } else if ((c & 0xfc) == 0xdc) {
404 /* This is wrong; the second part of the surrogate pair has come first */
405 filter->status = 0;
406 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
407 } else {
408 filter->status = 0;
409 CK((*filter->output_function)(filter->cache + ((c & 0xff) << 8), filter->data));
410 }
411 break;
412
413 case 2:
414 filter->cache = (filter->cache << 10) + (c & 0xff);
415 filter->status = 3;
416 break;
417
418 case 3:
419 n = (filter->cache & 0xFF) | ((c & 0xFF) << 8);
420 if (n >= 0xD800 && n <= 0xDBFF) {
421 /* We previously saw the first part of a surrogate pair and were
422 * expecting the second part; this is another first part */
423 filter->cache = n & 0x3FF;
424 filter->status = 2;
425 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
426 } else if (n >= 0xDC00 && n <= 0xDFFF) {
427 n = filter->cache + ((c & 0x3) << 8) + 0x10000;
428 filter->status = 0;
429 CK((*filter->output_function)(n, filter->data));
430 } else {
431 /* The first part of a surrogate pair was followed by some other codepoint
432 * which is not part of a surrogate pair at all */
433 filter->status = 0;
434 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
435 CK((*filter->output_function)(n, filter->data));
436 }
437 break;
438 }
439
440 return 0;
441 }
442
mbfl_filt_conv_wchar_utf16le(int c,mbfl_convert_filter * filter)443 int mbfl_filt_conv_wchar_utf16le(int c, mbfl_convert_filter *filter)
444 {
445 int n;
446
447 if (c >= 0 && c < MBFL_WCSPLANE_UCS2MAX) {
448 CK((*filter->output_function)(c & 0xff, filter->data));
449 CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
450 } else if (c >= MBFL_WCSPLANE_SUPMIN && c < MBFL_WCSPLANE_SUPMAX) {
451 n = ((c >> 10) - 0x40) | 0xd800;
452 CK((*filter->output_function)(n & 0xff, filter->data));
453 CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
454 n = (c & 0x3ff) | 0xdc00;
455 CK((*filter->output_function)(n & 0xff, filter->data));
456 CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
457 } else {
458 CK(mbfl_filt_conv_illegal_output(c, filter));
459 }
460
461 return 0;
462 }
463
mbfl_filt_conv_utf16_wchar_flush(mbfl_convert_filter * filter)464 static int mbfl_filt_conv_utf16_wchar_flush(mbfl_convert_filter *filter)
465 {
466 if (filter->status) {
467 /* Input string was truncated */
468 filter->status = 0;
469 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
470 }
471
472 if (filter->flush_function) {
473 (*filter->flush_function)(filter->data);
474 }
475
476 return 0;
477 }
478
479 #define DETECTED_BE 1
480 #define DETECTED_LE 2
481
mb_utf16_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)482 static size_t mb_utf16_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
483 {
484 if (*state == DETECTED_BE) {
485 return mb_utf16be_to_wchar(in, in_len, buf, bufsize, NULL);
486 } else if (*state == DETECTED_LE) {
487 return mb_utf16le_to_wchar(in, in_len, buf, bufsize, NULL);
488 } else if (*in_len >= 2) {
489 unsigned char *p = *in;
490 unsigned char c1 = *p++;
491 unsigned char c2 = *p++;
492 uint16_t n = (c1 << 8) | c2;
493
494 if (n == 0xFFFE) {
495 /* Little-endian BOM */
496 *in = p;
497 *in_len -= 2;
498 *state = DETECTED_LE;
499 return mb_utf16le_to_wchar(in, in_len, buf, bufsize, NULL);
500 } if (n == 0xFEFF) {
501 /* Big-endian BOM; don't send to output */
502 *in = p;
503 *in_len -= 2;
504 }
505 }
506
507 *state = DETECTED_BE;
508 return mb_utf16be_to_wchar(in, in_len, buf, bufsize, NULL);
509 }
510
mb_utf16be_to_wchar_default(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)511 static size_t mb_utf16be_to_wchar_default(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
512 {
513 /* We only want to read 16-bit words out of `str`; any trailing byte will be handled at the end */
514 unsigned char *p = *in, *e = p + (*in_len & ~1);
515 /* Set `limit` to one less than the actual amount of space in the buffer; this is because
516 * on some iterations of the below loop, we might produce two output words */
517 uint32_t *out = buf, *limit = buf + bufsize - 1;
518
519 while (p < e && out < limit) {
520 unsigned char c1 = *p++;
521 unsigned char c2 = *p++;
522 uint16_t n = (c1 << 8) | c2;
523
524 if (n >= 0xD800 && n <= 0xDBFF) {
525 /* Handle surrogate */
526 if (p < e) {
527 unsigned char c3 = *p++;
528 unsigned char c4 = *p++;
529 uint16_t n2 = (c3 << 8) | c4;
530
531 if (n2 >= 0xD800 && n2 <= 0xDBFF) {
532 /* Wrong; that's the first half of a surrogate pair, when we were expecting the second */
533 *out++ = MBFL_BAD_INPUT;
534 p -= 2;
535 } else if (n2 >= 0xDC00 && n2 <= 0xDFFF) {
536 *out++ = (((n & 0x3FF) << 10) | (n2 & 0x3FF)) + 0x10000;
537 } else {
538 /* The first half of a surrogate pair was followed by a 'normal' codepoint */
539 *out++ = MBFL_BAD_INPUT;
540 *out++ = n2;
541 }
542 } else {
543 *out++ = MBFL_BAD_INPUT;
544 }
545 } else if (n >= 0xDC00 && n <= 0xDFFF) {
546 /* This is wrong; second part of surrogate pair has come first */
547 *out++ = MBFL_BAD_INPUT;
548 } else {
549 *out++ = n;
550 }
551 }
552
553 if (p == e && (*in_len & 0x1) && out < limit) {
554 /* There is an extra trailing byte (which shouldn't be there) */
555 *out++ = MBFL_BAD_INPUT;
556 p++;
557 }
558
559 *in_len -= (p - *in);
560 *in = p;
561 return out - buf;
562 }
563
mb_wchar_to_utf16be_default(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)564 static void mb_wchar_to_utf16be_default(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
565 {
566 unsigned char *out, *limit;
567 MB_CONVERT_BUF_LOAD(buf, out, limit);
568 MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
569
570 while (len--) {
571 uint32_t w = *in++;
572
573 if (w < MBFL_WCSPLANE_UCS2MAX) {
574 out = mb_convert_buf_add2(out, (w >> 8) & 0xFF, w & 0xFF);
575 } else if (w < MBFL_WCSPLANE_UTF32MAX) {
576 uint16_t n1 = ((w >> 10) - 0x40) | 0xD800;
577 uint16_t n2 = (w & 0x3FF) | 0xDC00;
578 MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4);
579 out = mb_convert_buf_add4(out, (n1 >> 8) & 0xFF, n1 & 0xFF, (n2 >> 8) & 0xFF, n2 & 0xFF);
580 } else {
581 MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_utf16be_default);
582 MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
583 }
584 }
585
586 MB_CONVERT_BUF_STORE(buf, out, limit);
587 }
588
mb_utf16le_to_wchar_default(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)589 static size_t mb_utf16le_to_wchar_default(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
590 {
591 /* We only want to read 16-bit words out of `str`; any trailing byte will be handled at the end */
592 unsigned char *p = *in, *e = p + (*in_len & ~1);
593 /* Set `limit` to one less than the actual amount of space in the buffer; this is because
594 * on some iterations of the below loop, we might produce two output words */
595 uint32_t *out = buf, *limit = buf + bufsize - 1;
596
597 while (p < e && out < limit) {
598 unsigned char c1 = *p++;
599 unsigned char c2 = *p++;
600 uint16_t n = (c2 << 8) | c1;
601
602 if (n >= 0xD800 && n <= 0xDBFF) {
603 /* Handle surrogate */
604 if (p < e) {
605 unsigned char c3 = *p++;
606 unsigned char c4 = *p++;
607 uint16_t n2 = (c4 << 8) | c3;
608
609 if (n2 >= 0xD800 && n2 <= 0xDBFF) {
610 /* Wrong; that's the first half of a surrogate pair, when we were expecting the second */
611 *out++ = MBFL_BAD_INPUT;
612 p -= 2;
613 } else if (n2 >= 0xDC00 && n2 <= 0xDFFF) {
614 *out++ = (((n & 0x3FF) << 10) | (n2 & 0x3FF)) + 0x10000;
615 } else {
616 /* The first half of a surrogate pair was followed by a 'normal' codepoint */
617 *out++ = MBFL_BAD_INPUT;
618 *out++ = n2;
619 }
620 } else {
621 *out++ = MBFL_BAD_INPUT;
622 }
623 } else if (n >= 0xDC00 && n <= 0xDFFF) {
624 /* This is wrong; second part of surrogate pair has come first */
625 *out++ = MBFL_BAD_INPUT;
626 } else {
627 *out++ = n;
628 }
629 }
630
631 if (p == e && (*in_len & 0x1) && out < limit) {
632 /* There is an extra trailing byte (which shouldn't be there) */
633 *out++ = MBFL_BAD_INPUT;
634 p++;
635 }
636
637 *in_len -= (p - *in);
638 *in = p;
639 return out - buf;
640 }
641
mb_wchar_to_utf16le_default(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)642 static void mb_wchar_to_utf16le_default(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
643 {
644 unsigned char *out, *limit;
645 MB_CONVERT_BUF_LOAD(buf, out, limit);
646 MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
647
648 while (len--) {
649 uint32_t w = *in++;
650
651 if (w < MBFL_WCSPLANE_UCS2MAX) {
652 out = mb_convert_buf_add2(out, w & 0xFF, (w >> 8) & 0xFF);
653 } else if (w < MBFL_WCSPLANE_UTF32MAX) {
654 uint16_t n1 = ((w >> 10) - 0x40) | 0xD800;
655 uint16_t n2 = (w & 0x3FF) | 0xDC00;
656 MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4);
657 out = mb_convert_buf_add4(out, n1 & 0xFF, (n1 >> 8) & 0xFF, n2 & 0xFF, (n2 >> 8) & 0xFF);
658 } else {
659 MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_utf16le_default);
660 MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
661 }
662 }
663
664 MB_CONVERT_BUF_STORE(buf, out, limit);
665 }
666
667 #if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER)
668
669 #ifdef ZEND_INTRIN_AVX2_FUNC_PROTO
mb_utf16be_to_wchar_avx2(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)670 size_t mb_utf16be_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
671 #else
672 static size_t mb_utf16be_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
673 #endif
674 {
675 size_t len = *in_len;
676
677 if (len >= 32 && bufsize >= 16) {
678 unsigned char *p = *in;
679 uint32_t *out = buf;
680
681 /* Used to determine if a block of input bytes contains any surrogates */
682 const __m256i _f8 = _mm256_set1_epi16(0xF8);
683 const __m256i _d8 = _mm256_set1_epi16(0xD8);
684 /* wchars must be in host byte order, which is little-endian on x86;
685 * Since we are reading in (big-endian) UTF-16BE, use this vector to swap byte order for output */
686 const __m256i swap_bytes = _mm256_set_epi8(14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1);
687
688 do {
689 __m256i operand = _mm256_loadu_si256((__m256i*)p); /* Load 32 bytes */
690
691 uint32_t surrogate_bitvec = _mm256_movemask_epi8(_mm256_cmpeq_epi16(_mm256_and_si256(operand, _f8), _d8));
692 if (surrogate_bitvec == 0) {
693 /* There are no surrogates among these 16 characters
694 * So converting the UTF-16 input to wchars is very simple; just extend each 16-bit value
695 * to a 32-bit value, filling in zero bits in the high end */
696 operand = _mm256_shuffle_epi8(operand, swap_bytes);
697 _mm256_storeu_si256((__m256i*)out, _mm256_cvtepu16_epi32(_mm256_castsi256_si128(operand)));
698 _mm256_storeu_si256((__m256i*)(out + 8), _mm256_cvtepu16_epi32(_mm256_extracti128_si256(operand, 1)));
699 out += 16;
700 bufsize -= 16;
701 p += sizeof(__m256i);
702 len -= sizeof(__m256i);
703 } else if ((surrogate_bitvec & 1) == 0) {
704 /* Some prefix of the current block is non-surrogates; output those */
705 uint8_t n_chars = zend_ulong_ntz(surrogate_bitvec) >> 1;
706 operand = _mm256_shuffle_epi8(operand, swap_bytes);
707 /* We know that the output buffer has at least 64 bytes of space available
708 * So don't bother trimming the output down to only include the non-surrogate prefix;
709 * rather, write out an entire block of 64 (or 32) bytes, then bump our output pointer
710 * forward just past the 'good part', so the 'bad part' will be overwritten on the next
711 * iteration of this loop */
712 _mm256_storeu_si256((__m256i*)out, _mm256_cvtepu16_epi32(_mm256_castsi256_si128(operand)));
713 if (n_chars > 8) {
714 _mm256_storeu_si256((__m256i*)(out + 8), _mm256_cvtepu16_epi32(_mm256_extracti128_si256(operand, 1)));
715 }
716 out += n_chars;
717 bufsize -= n_chars;
718 p += n_chars * 2;
719 len -= n_chars * 2;
720 } else {
721 /* Some prefix of the current block is (valid or invalid) surrogates
722 * Handle those using non-vectorized code */
723 surrogate_bitvec = ~surrogate_bitvec;
724 unsigned int n_chars = surrogate_bitvec ? zend_ulong_ntz(surrogate_bitvec) >> 1 : 16;
725 do {
726 unsigned char c1 = *p++;
727 unsigned char c2 = *p++;
728
729 if (c1 & 0x4 || len < 4) {
730 /* 2nd part of surrogate pair has come first OR string ended abruptly
731 * after 1st part of surrogate pair */
732 *out++ = MBFL_BAD_INPUT;
733 bufsize--;
734 n_chars--;
735 len -= 2;
736 continue;
737 }
738
739 uint16_t n = (c1 << 8) | c2;
740 unsigned char c3 = *p++;
741 unsigned char c4 = *p++;
742
743 if ((c3 & 0xFC) == 0xDC) {
744 /* Valid surrogate pair */
745 uint16_t n2 = (c3 << 8) | c4;
746 *out++ = (((n & 0x3FF) << 10) | (n2 & 0x3FF)) + 0x10000;
747 bufsize--;
748 len -= 4;
749 #if defined(PHP_HAVE_BUILTIN_USUB_OVERFLOW) && PHP_HAVE_BUILTIN_USUB_OVERFLOW
750 /* Subtracting 2 from `n_chars` will automatically set the CPU's flags;
751 * branch directly off the appropriate flag (CF on x86) rather than using
752 * another instruction (CMP on x86) to check for underflow */
753 if (__builtin_usub_overflow(n_chars, 2, &n_chars)) {
754 /* The last 2 bytes of this block and the first 2 bytes of the following
755 * block form a valid surrogate pair; now just make sure we don't get
756 * stuck in this loop due to underflow of the loop index */
757 break;
758 }
759 #else
760 n_chars -= 2;
761 if (n_chars == UINT_MAX) {
762 break;
763 }
764 #endif
765 } else {
766 /* First half of surrogate pair was followed by another first half
767 * OR by a non-surrogate character */
768 *out++ = MBFL_BAD_INPUT;
769 bufsize--;
770 n_chars--;
771 len -= 2;
772 p -= 2; /* Back up so the last 2 bytes will be processed again */
773 }
774 } while (n_chars);
775 }
776 } while (len >= 32 && bufsize >= 16);
777
778 if (len && bufsize >= 4) {
779 /* Finish up trailing bytes which don't fill a 32-byte block */
780 out += mb_utf16be_to_wchar_default(&p, &len, out, bufsize, NULL);
781 }
782
783 *in = p;
784 *in_len = len;
785 return out - buf;
786 } else if (len) {
787 return mb_utf16be_to_wchar_default(in, in_len, buf, bufsize, NULL);
788 } else {
789 return 0;
790 }
791 }
792
793 #ifdef ZEND_INTRIN_AVX2_FUNC_PROTO
mb_wchar_to_utf16be_avx2(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)794 void mb_wchar_to_utf16be_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
795 #else
796 static void mb_wchar_to_utf16be_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
797 #endif
798 {
799 if (len >= 8) {
800 unsigned char *out, *limit;
801 MB_CONVERT_BUF_LOAD(buf, out, limit);
802 MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
803
804 /* Used to find wchars which are outside the Unicode BMP (Basic Multilingual Plane) */
805 const __m256i bmp_mask = _mm256_set1_epi32(0xFFFF);
806 /* Used to extract 16 bits which we want from each of eight 32-bit values */
807 const __m256i pack_8x16 = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 12, 13, 8, 9, 4, 5, 0, 1, 12, 13, 8, 9, 4, 5, 0, 1, -1, -1, -1, -1, -1, -1, -1, -1);
808
809 do {
810 __m256i operand = _mm256_loadu_si256((__m256i*)in); /* Load 32 bytes */
811
812 uint32_t bmp_bitvec = _mm256_movemask_epi8(_mm256_cmpeq_epi32(_mm256_and_si256(operand, bmp_mask), operand));
813 if (bmp_bitvec == 0xFFFFFFFF) {
814 /* All eight wchars are in the BMP
815 * Shuffle bytes around to get the 16 bytes we want into the low 16 bytes of YMM register
816 * (which is equivalent to an XMM register) */
817 operand = _mm256_shuffle_epi8(operand, pack_8x16);
818 __m256i operand2 = _mm256_permute2x128_si256(operand, operand, 1);
819 operand = _mm256_alignr_epi8(operand2, operand, 8);
820 _mm_storeu_si128((__m128i*)out, _mm256_castsi256_si128(operand)); /* Store 16 bytes */
821 out += 16;
822 len -= 8;
823 in += 8;
824 } else if (bmp_bitvec & 1) {
825 /* Some prefix of this block are codepoints in the BMP */
826 unsigned int n_bytes = zend_ulong_ntz(~bmp_bitvec);
827 operand = _mm256_shuffle_epi8(operand, pack_8x16);
828 __m256i operand2 = _mm256_permute2x128_si256(operand, operand, 1);
829 operand = _mm256_alignr_epi8(operand2, operand, 8);
830 /* Store 16 bytes, but bump output pointer forward just past the 'good part',
831 * so the 'bad part' will be overwritten on the next iteration of this loop */
832 _mm_storeu_si128((__m128i*)out, _mm256_castsi256_si128(operand));
833 out += n_bytes >> 1;
834 len -= n_bytes >> 2;
835 in += n_bytes >> 2;
836 } else {
837 /* Some prefix of this block is codepoints outside the BMP OR error markers
838 * Handle them using non-vectorized code */
839 unsigned int n_words = bmp_bitvec ? zend_ulong_ntz(bmp_bitvec) >> 2 : 8;
840 do {
841 uint32_t w = *in++;
842 n_words--;
843 len--;
844
845 if (w < MBFL_WCSPLANE_UTF32MAX) {
846 uint16_t n1 = ((w >> 10) - 0x40) | 0xD800;
847 uint16_t n2 = (w & 0x3FF) | 0xDC00;
848 MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4);
849 out = mb_convert_buf_add4(out, (n1 >> 8) & 0xFF, n1 & 0xFF, (n2 >> 8) & 0xFF, n2 & 0xFF);
850 } else {
851 MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_utf16be_default);
852 MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
853 }
854 } while (n_words);
855 }
856 } while (len >= 8);
857
858 MB_CONVERT_BUF_STORE(buf, out, limit);
859 }
860
861 if (len) {
862 mb_wchar_to_utf16be_default(in, len, buf, end);
863 }
864 }
865
866 #ifdef ZEND_INTRIN_AVX2_FUNC_PROTO
mb_utf16le_to_wchar_avx2(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)867 size_t mb_utf16le_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
868 #else
869 static size_t mb_utf16le_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
870 #endif
871 {
872 /* Most of this function is the same as `mb_utf16be_to_wchar_avx2`, above;
873 * See it for more detailed code comments */
874
875 size_t len = *in_len;
876
877 if (len >= 32 && bufsize >= 16) {
878 unsigned char *p = *in;
879 uint32_t *out = buf;
880
881 const __m256i _f8 = _mm256_set1_epi16(0xF800);
882 const __m256i _d8 = _mm256_set1_epi16(0xD800);
883
884 do {
885 __m256i operand = _mm256_loadu_si256((__m256i*)p);
886
887 uint32_t surrogate_bitvec = _mm256_movemask_epi8(_mm256_cmpeq_epi16(_mm256_and_si256(operand, _f8), _d8));
888 if (surrogate_bitvec == 0) {
889 /* There are no surrogates among these 16 characters */
890 _mm256_storeu_si256((__m256i*)out, _mm256_cvtepu16_epi32(_mm256_castsi256_si128(operand)));
891 _mm256_storeu_si256((__m256i*)(out + 8), _mm256_cvtepu16_epi32(_mm256_extracti128_si256(operand, 1)));
892 out += 16;
893 bufsize -= 16;
894 p += sizeof(__m256i);
895 len -= sizeof(__m256i);
896 } else if ((surrogate_bitvec & 1) == 0) {
897 /* Some prefix of the current block is non-surrogates */
898 uint8_t n_chars = zend_ulong_ntz(surrogate_bitvec) >> 1;
899 _mm256_storeu_si256((__m256i*)out, _mm256_cvtepu16_epi32(_mm256_castsi256_si128(operand)));
900 if (n_chars > 8) {
901 _mm256_storeu_si256((__m256i*)(out + 8), _mm256_cvtepu16_epi32(_mm256_extracti128_si256(operand, 1)));
902 }
903 out += n_chars;
904 bufsize -= n_chars;
905 p += n_chars * 2;
906 len -= n_chars * 2;
907 } else {
908 /* Some prefix of the current block is (valid or invalid) surrogates */
909 surrogate_bitvec = ~surrogate_bitvec;
910 unsigned int n_chars = surrogate_bitvec ? zend_ulong_ntz(surrogate_bitvec) >> 1 : 16;
911 do {
912 unsigned char c1 = *p++;
913 unsigned char c2 = *p++;
914
915 if (c2 & 0x4 || len < 4) {
916 /* 2nd part of surrogate pair has come first OR string ended abruptly
917 * after 1st part of surrogate pair */
918 *out++ = MBFL_BAD_INPUT;
919 bufsize--;
920 n_chars--;
921 len -= 2;
922 continue;
923 }
924
925 uint16_t n = (c2 << 8) | c1;
926 unsigned char c3 = *p++;
927 unsigned char c4 = *p++;
928
929 if ((c4 & 0xFC) == 0xDC) {
930 /* Valid surrogate pair */
931 uint16_t n2 = (c4 << 8) | c3;
932 *out++ = (((n & 0x3FF) << 10) | (n2 & 0x3FF)) + 0x10000;
933 bufsize--;
934 len -= 4;
935 #if defined(PHP_HAVE_BUILTIN_USUB_OVERFLOW) && PHP_HAVE_BUILTIN_USUB_OVERFLOW
936 if (__builtin_usub_overflow(n_chars, 2, &n_chars)) {
937 break;
938 }
939 #else
940 n_chars -= 2;
941 if (n_chars == UINT_MAX) {
942 break;
943 }
944 #endif
945 } else {
946 /* First half of surrogate pair was followed by another first half
947 * OR by a non-surrogate character */
948 *out++ = MBFL_BAD_INPUT;
949 bufsize--;
950 n_chars--;
951 len -= 2;
952 p -= 2; /* Back up so the last 2 bytes will be processed again */
953 }
954 } while (n_chars);
955 }
956 } while (len >= 32 && bufsize >= 16);
957
958 if (len && bufsize >= 4) {
959 out += mb_utf16le_to_wchar_default(&p, &len, out, bufsize, NULL);
960 }
961
962 *in = p;
963 *in_len = len;
964 return out - buf;
965 } else if (len) {
966 return mb_utf16le_to_wchar_default(in, in_len, buf, bufsize, NULL);
967 } else {
968 return 0;
969 }
970 }
971
972 #ifdef ZEND_INTRIN_AVX2_FUNC_PROTO
mb_wchar_to_utf16le_avx2(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)973 void mb_wchar_to_utf16le_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
974 #else
975 static void mb_wchar_to_utf16le_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
976 #endif
977 {
978 if (len >= 8) {
979 unsigned char *out, *limit;
980 MB_CONVERT_BUF_LOAD(buf, out, limit);
981 MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
982
983 /* Used to find wchars which are outside the Unicode BMP (Basic Multilingual Plane) */
984 const __m256i bmp_mask = _mm256_set1_epi32(0xFFFF);
985 /* Used to extract 16 bits which we want from each of eight 32-bit values */
986 const __m256i pack_8x16 = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 13, 12, 9, 8, 5, 4, 1, 0, 13, 12, 9, 8, 5, 4, 1, 0, -1, -1, -1, -1, -1, -1, -1, -1);
987
988 do {
989 __m256i operand = _mm256_loadu_si256((__m256i*)in);
990
991 uint32_t bmp_bitvec = _mm256_movemask_epi8(_mm256_cmpeq_epi32(_mm256_and_si256(operand, bmp_mask), operand));
992 if (bmp_bitvec == 0xFFFFFFFF) {
993 /* All eight wchars are in the BMP
994 * Shuffle bytes around to get the 16 bytes we want into the low 16 bytes of YMM register
995 * (which is equivalent to an XMM register) */
996 operand = _mm256_shuffle_epi8(operand, pack_8x16);
997 __m256i operand2 = _mm256_permute2x128_si256(operand, operand, 1);
998 operand = _mm256_alignr_epi8(operand2, operand, 8);
999 _mm_storeu_si128((__m128i*)out, _mm256_castsi256_si128(operand));
1000 out += 16;
1001 len -= 8;
1002 in += 8;
1003 } else if (bmp_bitvec & 1) {
1004 /* Some prefix of this block are codepoints in the BMP */
1005 unsigned int n_bytes = zend_ulong_ntz(~bmp_bitvec);
1006 operand = _mm256_shuffle_epi8(operand, pack_8x16);
1007 __m256i operand2 = _mm256_permute2x128_si256(operand, operand, 1);
1008 operand = _mm256_alignr_epi8(operand2, operand, 8);
1009 _mm_storeu_si128((__m128i*)out, _mm256_castsi256_si128(operand));
1010 out += n_bytes >> 1;
1011 len -= n_bytes >> 2;
1012 in += n_bytes >> 2;
1013 } else {
1014 /* Some prefix of this block is codepoints outside the BMP OR error markers */
1015 unsigned int n_words = bmp_bitvec ? zend_ulong_ntz(bmp_bitvec) >> 2 : 8;
1016 do {
1017 uint32_t w = *in++;
1018 n_words--;
1019 len--;
1020
1021 if (w < MBFL_WCSPLANE_UTF32MAX) {
1022 uint16_t n1 = ((w >> 10) - 0x40) | 0xD800;
1023 uint16_t n2 = (w & 0x3FF) | 0xDC00;
1024 MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4);
1025 out = mb_convert_buf_add4(out, n1 & 0xFF, (n1 >> 8) & 0xFF, n2 & 0xFF, (n2 >> 8) & 0xFF);
1026 } else {
1027 MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_utf16le_default);
1028 MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
1029 }
1030 } while (n_words);
1031 }
1032 } while (len >= 8);
1033
1034 MB_CONVERT_BUF_STORE(buf, out, limit);
1035 }
1036
1037 if (len) {
1038 mb_wchar_to_utf16le_default(in, len, buf, end);
1039 }
1040 }
1041
1042 #endif /* defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER) */
1043