1 /*
2 * "streamable kanji code filter and converter"
3 * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4 *
5 * LICENSE NOTICES
6 *
7 * This file is part of "streamable kanji code filter and converter",
8 * which is distributed under the terms of GNU Lesser General Public
9 * License (version 2) as published by the Free Software Foundation.
10 *
11 * This software is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with "streamable kanji code filter and converter";
18 * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19 * Suite 330, Boston, MA 02111-1307 USA
20 *
21 * The author of this file:
22 *
23 */
24 /*
25 * The source code included in this file was separated from mbfilter.c
26 * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
27 *
28 */
29
30 #include "zend_bitset.h"
31 #include "mbfilter.h"
32 #include "mbfilter_utf16.h"
33
34 #ifdef ZEND_INTRIN_AVX2_NATIVE
35
36 /* We are building AVX2-only binary */
37 # include <immintrin.h>
38 # define mb_utf16be_to_wchar mb_utf16be_to_wchar_avx2
39 # define mb_utf16le_to_wchar mb_utf16le_to_wchar_avx2
40 # define mb_wchar_to_utf16be mb_wchar_to_utf16be_avx2
41 # define mb_wchar_to_utf16le mb_wchar_to_utf16le_avx2
42
43 static size_t mb_utf16be_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
44 static void mb_wchar_to_utf16be_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
45 static size_t mb_utf16le_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
46 static void mb_wchar_to_utf16le_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
47
48 #elif defined(ZEND_INTRIN_AVX2_RESOLVER)
49
50 /* We are building binary which works with or without AVX2; whether or not to use
51 * AVX2-accelerated functions will be determined at runtime */
52 # include <immintrin.h>
53 # include "Zend/zend_cpuinfo.h"
54
55 static size_t mb_utf16be_to_wchar_default(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
56 static void mb_wchar_to_utf16be_default(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
57 static size_t mb_utf16le_to_wchar_default(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
58 static void mb_wchar_to_utf16le_default(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
59
60 # ifdef ZEND_INTRIN_AVX2_FUNC_PROTO
61 /* Dynamic linker will decide whether or not to use AVX2-based functions and
62 * resolve symbols accordingly */
63
64 ZEND_INTRIN_AVX2_FUNC_DECL(size_t mb_utf16be_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state));
65 ZEND_INTRIN_AVX2_FUNC_DECL(void mb_wchar_to_utf16be_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end));
66 ZEND_INTRIN_AVX2_FUNC_DECL(size_t mb_utf16le_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state));
67 ZEND_INTRIN_AVX2_FUNC_DECL(void mb_wchar_to_utf16le_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end));
68
69 size_t mb_utf16be_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) __attribute__((ifunc("resolve_utf16be_wchar")));
70 void mb_wchar_to_utf16be(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) __attribute__((ifunc("resolve_wchar_utf16be")));
71 size_t mb_utf16le_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) __attribute__((ifunc("resolve_utf16le_wchar")));
72 void mb_wchar_to_utf16le(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) __attribute__((ifunc("resolve_wchar_utf16le")));
73
74 ZEND_NO_SANITIZE_ADDRESS
75 ZEND_ATTRIBUTE_UNUSED
resolve_utf16be_wchar(void)76 static mb_to_wchar_fn resolve_utf16be_wchar(void)
77 {
78 return zend_cpu_supports_avx2() ? mb_utf16be_to_wchar_avx2 : mb_utf16be_to_wchar_default;
79 }
80
81 ZEND_NO_SANITIZE_ADDRESS
82 ZEND_ATTRIBUTE_UNUSED
resolve_wchar_utf16be(void)83 static mb_from_wchar_fn resolve_wchar_utf16be(void)
84 {
85 return zend_cpu_supports_avx2() ? mb_wchar_to_utf16be_avx2 : mb_wchar_to_utf16be_default;
86 }
87
88 ZEND_NO_SANITIZE_ADDRESS
89 ZEND_ATTRIBUTE_UNUSED
resolve_utf16le_wchar(void)90 static mb_to_wchar_fn resolve_utf16le_wchar(void)
91 {
92 return zend_cpu_supports_avx2() ? mb_utf16le_to_wchar_avx2 : mb_utf16le_to_wchar_default;
93 }
94
95 ZEND_NO_SANITIZE_ADDRESS
96 ZEND_ATTRIBUTE_UNUSED
resolve_wchar_utf16le(void)97 static mb_from_wchar_fn resolve_wchar_utf16le(void)
98 {
99 return zend_cpu_supports_avx2() ? mb_wchar_to_utf16le_avx2 : mb_wchar_to_utf16le_default;
100 }
101
102 # else /* ZEND_INTRIN_AVX2_FUNC_PTR */
103 /* We are compiling for a target where the dynamic linker will not be able to
104 * resolve symbols according to whether the host supports AVX2 or not; so instead,
105 * we can make calls go through a function pointer and set the function pointer
106 * on module load */
107
108 #ifdef HAVE_FUNC_ATTRIBUTE_TARGET
109 static size_t mb_utf16be_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) __attribute__((target("avx2")));
110 static void mb_wchar_to_utf16be_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) __attribute__((target("avx2")));
111 static size_t mb_utf16le_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) __attribute__((target("avx2")));
112 static void mb_wchar_to_utf16le_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) __attribute__((target("avx2")));
113 #else
114 static size_t mb_utf16be_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
115 static void mb_wchar_to_utf16be_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
116 static size_t mb_utf16le_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
117 static void mb_wchar_to_utf16le_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
118 #endif
119
120 static mb_to_wchar_fn utf16be_to_wchar_ptr = NULL;
121 static mb_from_wchar_fn wchar_to_utf16be_ptr = NULL;
122 static mb_to_wchar_fn utf16le_to_wchar_ptr = NULL;
123 static mb_from_wchar_fn wchar_to_utf16le_ptr = NULL;
124
mb_utf16be_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)125 static size_t mb_utf16be_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
126 {
127 return utf16be_to_wchar_ptr(in, in_len, buf, bufsize, NULL);
128 }
129
mb_wchar_to_utf16be(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)130 static void mb_wchar_to_utf16be(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
131 {
132 wchar_to_utf16be_ptr(in, len, buf, end);
133 }
134
mb_utf16le_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)135 static size_t mb_utf16le_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
136 {
137 return utf16le_to_wchar_ptr(in, in_len, buf, bufsize, NULL);
138 }
139
mb_wchar_to_utf16le(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)140 static void mb_wchar_to_utf16le(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
141 {
142 wchar_to_utf16le_ptr(in, len, buf, end);
143 }
144
init_convert_utf16(void)145 void init_convert_utf16(void)
146 {
147 if (zend_cpu_supports_avx2()) {
148 utf16be_to_wchar_ptr = mb_utf16be_to_wchar_avx2;
149 wchar_to_utf16be_ptr = mb_wchar_to_utf16be_avx2;
150 utf16le_to_wchar_ptr = mb_utf16le_to_wchar_avx2;
151 wchar_to_utf16le_ptr = mb_wchar_to_utf16le_avx2;
152 } else {
153 utf16be_to_wchar_ptr = mb_utf16be_to_wchar_default;
154 wchar_to_utf16be_ptr = mb_wchar_to_utf16be_default;
155 utf16le_to_wchar_ptr = mb_utf16le_to_wchar_default;
156 wchar_to_utf16le_ptr = mb_wchar_to_utf16le_default;
157 }
158 }
159 # endif
160
161 #else
162
163 /* No AVX2 support */
164 # define mb_utf16be_to_wchar mb_utf16be_to_wchar_default
165 # define mb_utf16le_to_wchar mb_utf16le_to_wchar_default
166 # define mb_wchar_to_utf16be mb_wchar_to_utf16be_default
167 # define mb_wchar_to_utf16le mb_wchar_to_utf16le_default
168
169 static size_t mb_utf16be_to_wchar_default(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
170 static void mb_wchar_to_utf16be_default(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
171 static size_t mb_utf16le_to_wchar_default(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
172 static void mb_wchar_to_utf16le_default(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
173
174 #endif
175
176 static int mbfl_filt_conv_utf16_wchar_flush(mbfl_convert_filter *filter);
177 static size_t mb_utf16_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
178 static zend_string* mb_cut_utf16(unsigned char *str, size_t from, size_t len, unsigned char *end);
179 static zend_string* mb_cut_utf16be(unsigned char *str, size_t from, size_t len, unsigned char *end);
180 static zend_string* mb_cut_utf16le(unsigned char *str, size_t from, size_t len, unsigned char *end);
181
182 static const char *mbfl_encoding_utf16_aliases[] = {"utf16", NULL};
183
184 const mbfl_encoding mbfl_encoding_utf16 = {
185 mbfl_no_encoding_utf16,
186 "UTF-16",
187 "UTF-16",
188 mbfl_encoding_utf16_aliases,
189 NULL,
190 0,
191 &vtbl_utf16_wchar,
192 &vtbl_wchar_utf16,
193 mb_utf16_to_wchar,
194 mb_wchar_to_utf16be,
195 NULL,
196 mb_cut_utf16
197 };
198
199 const mbfl_encoding mbfl_encoding_utf16be = {
200 mbfl_no_encoding_utf16be,
201 "UTF-16BE",
202 "UTF-16BE",
203 NULL,
204 NULL,
205 0,
206 &vtbl_utf16be_wchar,
207 &vtbl_wchar_utf16be,
208 mb_utf16be_to_wchar,
209 mb_wchar_to_utf16be,
210 NULL,
211 mb_cut_utf16be
212 };
213
214 const mbfl_encoding mbfl_encoding_utf16le = {
215 mbfl_no_encoding_utf16le,
216 "UTF-16LE",
217 "UTF-16LE",
218 NULL,
219 NULL,
220 0,
221 &vtbl_utf16le_wchar,
222 &vtbl_wchar_utf16le,
223 mb_utf16le_to_wchar,
224 mb_wchar_to_utf16le,
225 NULL,
226 mb_cut_utf16le
227 };
228
229 const struct mbfl_convert_vtbl vtbl_utf16_wchar = {
230 mbfl_no_encoding_utf16,
231 mbfl_no_encoding_wchar,
232 mbfl_filt_conv_common_ctor,
233 NULL,
234 mbfl_filt_conv_utf16_wchar,
235 mbfl_filt_conv_utf16_wchar_flush,
236 NULL,
237 };
238
239 const struct mbfl_convert_vtbl vtbl_wchar_utf16 = {
240 mbfl_no_encoding_wchar,
241 mbfl_no_encoding_utf16,
242 mbfl_filt_conv_common_ctor,
243 NULL,
244 mbfl_filt_conv_wchar_utf16be,
245 mbfl_filt_conv_common_flush,
246 NULL,
247 };
248
249 const struct mbfl_convert_vtbl vtbl_utf16be_wchar = {
250 mbfl_no_encoding_utf16be,
251 mbfl_no_encoding_wchar,
252 mbfl_filt_conv_common_ctor,
253 NULL,
254 mbfl_filt_conv_utf16be_wchar,
255 mbfl_filt_conv_utf16_wchar_flush,
256 NULL,
257 };
258
259 const struct mbfl_convert_vtbl vtbl_wchar_utf16be = {
260 mbfl_no_encoding_wchar,
261 mbfl_no_encoding_utf16be,
262 mbfl_filt_conv_common_ctor,
263 NULL,
264 mbfl_filt_conv_wchar_utf16be,
265 mbfl_filt_conv_common_flush,
266 NULL,
267 };
268
269 const struct mbfl_convert_vtbl vtbl_utf16le_wchar = {
270 mbfl_no_encoding_utf16le,
271 mbfl_no_encoding_wchar,
272 mbfl_filt_conv_common_ctor,
273 NULL,
274 mbfl_filt_conv_utf16le_wchar,
275 mbfl_filt_conv_utf16_wchar_flush,
276 NULL,
277 };
278
279 const struct mbfl_convert_vtbl vtbl_wchar_utf16le = {
280 mbfl_no_encoding_wchar,
281 mbfl_no_encoding_utf16le,
282 mbfl_filt_conv_common_ctor,
283 NULL,
284 mbfl_filt_conv_wchar_utf16le,
285 mbfl_filt_conv_common_flush,
286 NULL,
287 };
288
289 #define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
290
mbfl_filt_conv_utf16_wchar(int c,mbfl_convert_filter * filter)291 int mbfl_filt_conv_utf16_wchar(int c, mbfl_convert_filter *filter)
292 {
293 /* Start with the assumption that the string is big-endian;
294 * If we find a little-endian BOM, then we will change that assumption */
295 if (filter->status == 0) {
296 filter->cache = c & 0xFF;
297 filter->status = 1;
298 } else {
299 int n = (filter->cache << 8) | (c & 0xFF);
300 filter->cache = filter->status = 0;
301 if (n == 0xFFFE) {
302 /* Switch to little-endian mode */
303 filter->filter_function = mbfl_filt_conv_utf16le_wchar;
304 } else {
305 filter->filter_function = mbfl_filt_conv_utf16be_wchar;
306 if (n >= 0xD800 && n <= 0xDBFF) {
307 filter->cache = n & 0x3FF; /* Pick out 10 data bits */
308 filter->status = 2;
309 return 0;
310 } else if (n >= 0xDC00 && n <= 0xDFFF) {
311 /* This is wrong; second part of surrogate pair has come first */
312 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
313 } else if (n != 0xFEFF) {
314 CK((*filter->output_function)(n, filter->data));
315 }
316 }
317 }
318
319 return 0;
320 }
321
mbfl_filt_conv_utf16be_wchar(int c,mbfl_convert_filter * filter)322 int mbfl_filt_conv_utf16be_wchar(int c, mbfl_convert_filter *filter)
323 {
324 int n;
325
326 switch (filter->status) {
327 case 0: /* First byte */
328 filter->cache = c & 0xFF;
329 filter->status = 1;
330 break;
331
332 case 1: /* Second byte */
333 n = (filter->cache << 8) | (c & 0xFF);
334 if (n >= 0xD800 && n <= 0xDBFF) {
335 filter->cache = n & 0x3FF; /* Pick out 10 data bits */
336 filter->status = 2;
337 } else if (n >= 0xDC00 && n <= 0xDFFF) {
338 /* This is wrong; second part of surrogate pair has come first */
339 filter->status = 0;
340 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
341 } else {
342 filter->status = 0;
343 CK((*filter->output_function)(n, filter->data));
344 }
345 break;
346
347 case 2: /* Second part of surrogate, first byte */
348 filter->cache = (filter->cache << 8) | (c & 0xFF);
349 filter->status = 3;
350 break;
351
352 case 3: /* Second part of surrogate, second byte */
353 n = ((filter->cache & 0xFF) << 8) | (c & 0xFF);
354 if (n >= 0xD800 && n <= 0xDBFF) {
355 /* Wrong; that's the first half of a surrogate pair, not the second */
356 filter->cache = n & 0x3FF;
357 filter->status = 2;
358 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
359 } else if (n >= 0xDC00 && n <= 0xDFFF) {
360 filter->status = 0;
361 n = ((filter->cache & 0x3FF00) << 2) + (n & 0x3FF) + 0x10000;
362 CK((*filter->output_function)(n, filter->data));
363 } else {
364 filter->status = 0;
365 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
366 CK((*filter->output_function)(n, filter->data));
367 }
368 }
369
370 return 0;
371 }
372
mbfl_filt_conv_wchar_utf16be(int c,mbfl_convert_filter * filter)373 int mbfl_filt_conv_wchar_utf16be(int c, mbfl_convert_filter *filter)
374 {
375 int n;
376
377 if (c >= 0 && c < MBFL_WCSPLANE_UCS2MAX) {
378 CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
379 CK((*filter->output_function)(c & 0xff, filter->data));
380 } else if (c >= MBFL_WCSPLANE_SUPMIN && c < MBFL_WCSPLANE_SUPMAX) {
381 n = ((c >> 10) - 0x40) | 0xd800;
382 CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
383 CK((*filter->output_function)(n & 0xff, filter->data));
384 n = (c & 0x3ff) | 0xdc00;
385 CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
386 CK((*filter->output_function)(n & 0xff, filter->data));
387 } else {
388 CK(mbfl_filt_conv_illegal_output(c, filter));
389 }
390
391 return 0;
392 }
393
mbfl_filt_conv_utf16le_wchar(int c,mbfl_convert_filter * filter)394 int mbfl_filt_conv_utf16le_wchar(int c, mbfl_convert_filter *filter)
395 {
396 int n;
397
398 switch (filter->status) {
399 case 0:
400 filter->cache = c & 0xff;
401 filter->status = 1;
402 break;
403
404 case 1:
405 if ((c & 0xfc) == 0xd8) {
406 /* Looks like we have a surrogate pair here */
407 filter->cache += ((c & 0x3) << 8);
408 filter->status = 2;
409 } else if ((c & 0xfc) == 0xdc) {
410 /* This is wrong; the second part of the surrogate pair has come first */
411 filter->status = 0;
412 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
413 } else {
414 filter->status = 0;
415 CK((*filter->output_function)(filter->cache + ((c & 0xff) << 8), filter->data));
416 }
417 break;
418
419 case 2:
420 filter->cache = (filter->cache << 10) + (c & 0xff);
421 filter->status = 3;
422 break;
423
424 case 3:
425 n = (filter->cache & 0xFF) | ((c & 0xFF) << 8);
426 if (n >= 0xD800 && n <= 0xDBFF) {
427 /* We previously saw the first part of a surrogate pair and were
428 * expecting the second part; this is another first part */
429 filter->cache = n & 0x3FF;
430 filter->status = 2;
431 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
432 } else if (n >= 0xDC00 && n <= 0xDFFF) {
433 n = filter->cache + ((c & 0x3) << 8) + 0x10000;
434 filter->status = 0;
435 CK((*filter->output_function)(n, filter->data));
436 } else {
437 /* The first part of a surrogate pair was followed by some other codepoint
438 * which is not part of a surrogate pair at all */
439 filter->status = 0;
440 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
441 CK((*filter->output_function)(n, filter->data));
442 }
443 break;
444 }
445
446 return 0;
447 }
448
mbfl_filt_conv_wchar_utf16le(int c,mbfl_convert_filter * filter)449 int mbfl_filt_conv_wchar_utf16le(int c, mbfl_convert_filter *filter)
450 {
451 int n;
452
453 if (c >= 0 && c < MBFL_WCSPLANE_UCS2MAX) {
454 CK((*filter->output_function)(c & 0xff, filter->data));
455 CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
456 } else if (c >= MBFL_WCSPLANE_SUPMIN && c < MBFL_WCSPLANE_SUPMAX) {
457 n = ((c >> 10) - 0x40) | 0xd800;
458 CK((*filter->output_function)(n & 0xff, filter->data));
459 CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
460 n = (c & 0x3ff) | 0xdc00;
461 CK((*filter->output_function)(n & 0xff, filter->data));
462 CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
463 } else {
464 CK(mbfl_filt_conv_illegal_output(c, filter));
465 }
466
467 return 0;
468 }
469
mbfl_filt_conv_utf16_wchar_flush(mbfl_convert_filter * filter)470 static int mbfl_filt_conv_utf16_wchar_flush(mbfl_convert_filter *filter)
471 {
472 if (filter->status) {
473 /* Input string was truncated */
474 filter->status = 0;
475 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
476 }
477
478 if (filter->flush_function) {
479 (*filter->flush_function)(filter->data);
480 }
481
482 return 0;
483 }
484
485 #define DETECTED_BE 1
486 #define DETECTED_LE 2
487
mb_utf16_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)488 static size_t mb_utf16_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
489 {
490 if (*state == DETECTED_BE) {
491 return mb_utf16be_to_wchar(in, in_len, buf, bufsize, NULL);
492 } else if (*state == DETECTED_LE) {
493 return mb_utf16le_to_wchar(in, in_len, buf, bufsize, NULL);
494 } else if (*in_len >= 2) {
495 unsigned char *p = *in;
496 unsigned char c1 = *p++;
497 unsigned char c2 = *p++;
498 uint16_t n = (c1 << 8) | c2;
499
500 if (n == 0xFFFE) {
501 /* Little-endian BOM */
502 *in = p;
503 *in_len -= 2;
504 *state = DETECTED_LE;
505 return mb_utf16le_to_wchar(in, in_len, buf, bufsize, NULL);
506 } if (n == 0xFEFF) {
507 /* Big-endian BOM; don't send to output */
508 *in = p;
509 *in_len -= 2;
510 }
511 }
512
513 *state = DETECTED_BE;
514 return mb_utf16be_to_wchar(in, in_len, buf, bufsize, NULL);
515 }
516
mb_utf16be_to_wchar_default(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)517 static size_t mb_utf16be_to_wchar_default(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
518 {
519 /* We only want to read 16-bit words out of `str`; any trailing byte will be handled at the end */
520 unsigned char *p = *in, *e = p + (*in_len & ~1);
521 /* Set `limit` to one less than the actual amount of space in the buffer; this is because
522 * on some iterations of the below loop, we might produce two output words */
523 uint32_t *out = buf, *limit = buf + bufsize - 1;
524
525 while (p < e && out < limit) {
526 unsigned char c1 = *p++;
527 unsigned char c2 = *p++;
528 uint16_t n = (c1 << 8) | c2;
529
530 if (n >= 0xD800 && n <= 0xDBFF) {
531 /* Handle surrogate */
532 if (p < e) {
533 unsigned char c3 = *p++;
534 unsigned char c4 = *p++;
535 uint16_t n2 = (c3 << 8) | c4;
536
537 if (n2 >= 0xD800 && n2 <= 0xDBFF) {
538 /* Wrong; that's the first half of a surrogate pair, when we were expecting the second */
539 *out++ = MBFL_BAD_INPUT;
540 p -= 2;
541 } else if (n2 >= 0xDC00 && n2 <= 0xDFFF) {
542 *out++ = (((n & 0x3FF) << 10) | (n2 & 0x3FF)) + 0x10000;
543 } else {
544 /* The first half of a surrogate pair was followed by a 'normal' codepoint */
545 *out++ = MBFL_BAD_INPUT;
546 *out++ = n2;
547 }
548 } else {
549 *out++ = MBFL_BAD_INPUT;
550 }
551 } else if (n >= 0xDC00 && n <= 0xDFFF) {
552 /* This is wrong; second part of surrogate pair has come first */
553 *out++ = MBFL_BAD_INPUT;
554 } else {
555 *out++ = n;
556 }
557 }
558
559 if (p == e && (*in_len & 0x1) && out < limit) {
560 /* There is an extra trailing byte (which shouldn't be there) */
561 *out++ = MBFL_BAD_INPUT;
562 p++;
563 }
564
565 *in_len -= (p - *in);
566 *in = p;
567 return out - buf;
568 }
569
mb_wchar_to_utf16be_default(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)570 static void mb_wchar_to_utf16be_default(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
571 {
572 unsigned char *out, *limit;
573 MB_CONVERT_BUF_LOAD(buf, out, limit);
574 MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
575
576 while (len--) {
577 uint32_t w = *in++;
578
579 if (w < MBFL_WCSPLANE_UCS2MAX) {
580 out = mb_convert_buf_add2(out, (w >> 8) & 0xFF, w & 0xFF);
581 } else if (w < MBFL_WCSPLANE_UTF32MAX) {
582 uint16_t n1 = ((w >> 10) - 0x40) | 0xD800;
583 uint16_t n2 = (w & 0x3FF) | 0xDC00;
584 MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4);
585 out = mb_convert_buf_add4(out, (n1 >> 8) & 0xFF, n1 & 0xFF, (n2 >> 8) & 0xFF, n2 & 0xFF);
586 } else {
587 MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_utf16be_default);
588 MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
589 }
590 }
591
592 MB_CONVERT_BUF_STORE(buf, out, limit);
593 }
594
mb_utf16le_to_wchar_default(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)595 static size_t mb_utf16le_to_wchar_default(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
596 {
597 /* We only want to read 16-bit words out of `str`; any trailing byte will be handled at the end */
598 unsigned char *p = *in, *e = p + (*in_len & ~1);
599 /* Set `limit` to one less than the actual amount of space in the buffer; this is because
600 * on some iterations of the below loop, we might produce two output words */
601 uint32_t *out = buf, *limit = buf + bufsize - 1;
602
603 while (p < e && out < limit) {
604 unsigned char c1 = *p++;
605 unsigned char c2 = *p++;
606 uint16_t n = (c2 << 8) | c1;
607
608 if (n >= 0xD800 && n <= 0xDBFF) {
609 /* Handle surrogate */
610 if (p < e) {
611 unsigned char c3 = *p++;
612 unsigned char c4 = *p++;
613 uint16_t n2 = (c4 << 8) | c3;
614
615 if (n2 >= 0xD800 && n2 <= 0xDBFF) {
616 /* Wrong; that's the first half of a surrogate pair, when we were expecting the second */
617 *out++ = MBFL_BAD_INPUT;
618 p -= 2;
619 } else if (n2 >= 0xDC00 && n2 <= 0xDFFF) {
620 *out++ = (((n & 0x3FF) << 10) | (n2 & 0x3FF)) + 0x10000;
621 } else {
622 /* The first half of a surrogate pair was followed by a 'normal' codepoint */
623 *out++ = MBFL_BAD_INPUT;
624 *out++ = n2;
625 }
626 } else {
627 *out++ = MBFL_BAD_INPUT;
628 }
629 } else if (n >= 0xDC00 && n <= 0xDFFF) {
630 /* This is wrong; second part of surrogate pair has come first */
631 *out++ = MBFL_BAD_INPUT;
632 } else {
633 *out++ = n;
634 }
635 }
636
637 if (p == e && (*in_len & 0x1) && out < limit) {
638 /* There is an extra trailing byte (which shouldn't be there) */
639 *out++ = MBFL_BAD_INPUT;
640 p++;
641 }
642
643 *in_len -= (p - *in);
644 *in = p;
645 return out - buf;
646 }
647
mb_wchar_to_utf16le_default(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)648 static void mb_wchar_to_utf16le_default(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
649 {
650 unsigned char *out, *limit;
651 MB_CONVERT_BUF_LOAD(buf, out, limit);
652 MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
653
654 while (len--) {
655 uint32_t w = *in++;
656
657 if (w < MBFL_WCSPLANE_UCS2MAX) {
658 out = mb_convert_buf_add2(out, w & 0xFF, (w >> 8) & 0xFF);
659 } else if (w < MBFL_WCSPLANE_UTF32MAX) {
660 uint16_t n1 = ((w >> 10) - 0x40) | 0xD800;
661 uint16_t n2 = (w & 0x3FF) | 0xDC00;
662 MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4);
663 out = mb_convert_buf_add4(out, n1 & 0xFF, (n1 >> 8) & 0xFF, n2 & 0xFF, (n2 >> 8) & 0xFF);
664 } else {
665 MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_utf16le_default);
666 MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
667 }
668 }
669
670 MB_CONVERT_BUF_STORE(buf, out, limit);
671 }
672
673 #if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER)
674
675 #ifdef ZEND_INTRIN_AVX2_FUNC_PROTO
mb_utf16be_to_wchar_avx2(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)676 size_t mb_utf16be_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
677 #else
678 static size_t mb_utf16be_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
679 #endif
680 {
681 size_t len = *in_len;
682
683 if (len >= 32 && bufsize >= 16) {
684 unsigned char *p = *in;
685 uint32_t *out = buf;
686
687 /* Used to determine if a block of input bytes contains any surrogates */
688 const __m256i _f8 = _mm256_set1_epi16(0xF8);
689 const __m256i _d8 = _mm256_set1_epi16(0xD8);
690 /* wchars must be in host byte order, which is little-endian on x86;
691 * Since we are reading in (big-endian) UTF-16BE, use this vector to swap byte order for output */
692 const __m256i swap_bytes = _mm256_set_epi8(14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1);
693
694 do {
695 __m256i operand = _mm256_loadu_si256((__m256i*)p); /* Load 32 bytes */
696
697 uint32_t surrogate_bitvec = _mm256_movemask_epi8(_mm256_cmpeq_epi16(_mm256_and_si256(operand, _f8), _d8));
698 if (surrogate_bitvec == 0) {
699 /* There are no surrogates among these 16 characters
700 * So converting the UTF-16 input to wchars is very simple; just extend each 16-bit value
701 * to a 32-bit value, filling in zero bits in the high end */
702 operand = _mm256_shuffle_epi8(operand, swap_bytes);
703 _mm256_storeu_si256((__m256i*)out, _mm256_cvtepu16_epi32(_mm256_castsi256_si128(operand)));
704 _mm256_storeu_si256((__m256i*)(out + 8), _mm256_cvtepu16_epi32(_mm256_extracti128_si256(operand, 1)));
705 out += 16;
706 bufsize -= 16;
707 p += sizeof(__m256i);
708 len -= sizeof(__m256i);
709 } else if ((surrogate_bitvec & 1) == 0) {
710 /* Some prefix of the current block is non-surrogates; output those */
711 uint8_t n_chars = zend_ulong_ntz(surrogate_bitvec) >> 1;
712 operand = _mm256_shuffle_epi8(operand, swap_bytes);
713 /* We know that the output buffer has at least 64 bytes of space available
714 * So don't bother trimming the output down to only include the non-surrogate prefix;
715 * rather, write out an entire block of 64 (or 32) bytes, then bump our output pointer
716 * forward just past the 'good part', so the 'bad part' will be overwritten on the next
717 * iteration of this loop */
718 _mm256_storeu_si256((__m256i*)out, _mm256_cvtepu16_epi32(_mm256_castsi256_si128(operand)));
719 if (n_chars > 8) {
720 _mm256_storeu_si256((__m256i*)(out + 8), _mm256_cvtepu16_epi32(_mm256_extracti128_si256(operand, 1)));
721 }
722 out += n_chars;
723 bufsize -= n_chars;
724 p += n_chars * 2;
725 len -= n_chars * 2;
726 } else {
727 /* Some prefix of the current block is (valid or invalid) surrogates
728 * Handle those using non-vectorized code */
729 surrogate_bitvec = ~surrogate_bitvec;
730 unsigned int n_chars = surrogate_bitvec ? zend_ulong_ntz(surrogate_bitvec) >> 1 : 16;
731 do {
732 unsigned char c1 = *p++;
733 unsigned char c2 = *p++;
734
735 if (c1 & 0x4 || len < 4) {
736 /* 2nd part of surrogate pair has come first OR string ended abruptly
737 * after 1st part of surrogate pair */
738 *out++ = MBFL_BAD_INPUT;
739 bufsize--;
740 n_chars--;
741 len -= 2;
742 continue;
743 }
744
745 uint16_t n = (c1 << 8) | c2;
746 unsigned char c3 = *p++;
747 unsigned char c4 = *p++;
748
749 if ((c3 & 0xFC) == 0xDC) {
750 /* Valid surrogate pair */
751 uint16_t n2 = (c3 << 8) | c4;
752 *out++ = (((n & 0x3FF) << 10) | (n2 & 0x3FF)) + 0x10000;
753 bufsize--;
754 len -= 4;
755 #if defined(PHP_HAVE_BUILTIN_USUB_OVERFLOW) && PHP_HAVE_BUILTIN_USUB_OVERFLOW
756 /* Subtracting 2 from `n_chars` will automatically set the CPU's flags;
757 * branch directly off the appropriate flag (CF on x86) rather than using
758 * another instruction (CMP on x86) to check for underflow */
759 if (__builtin_usub_overflow(n_chars, 2, &n_chars)) {
760 /* The last 2 bytes of this block and the first 2 bytes of the following
761 * block form a valid surrogate pair; now just make sure we don't get
762 * stuck in this loop due to underflow of the loop index */
763 break;
764 }
765 #else
766 n_chars -= 2;
767 if (n_chars == UINT_MAX) {
768 break;
769 }
770 #endif
771 } else {
772 /* First half of surrogate pair was followed by another first half
773 * OR by a non-surrogate character */
774 *out++ = MBFL_BAD_INPUT;
775 bufsize--;
776 n_chars--;
777 len -= 2;
778 p -= 2; /* Back up so the last 2 bytes will be processed again */
779 }
780 } while (n_chars);
781 }
782 } while (len >= 32 && bufsize >= 16);
783
784 if (len && bufsize >= 4) {
785 /* Finish up trailing bytes which don't fill a 32-byte block */
786 out += mb_utf16be_to_wchar_default(&p, &len, out, bufsize, NULL);
787 }
788
789 *in = p;
790 *in_len = len;
791 return out - buf;
792 } else if (len) {
793 return mb_utf16be_to_wchar_default(in, in_len, buf, bufsize, NULL);
794 } else {
795 return 0;
796 }
797 }
798
799 #ifdef ZEND_INTRIN_AVX2_FUNC_PROTO
mb_wchar_to_utf16be_avx2(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)800 void mb_wchar_to_utf16be_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
801 #else
802 static void mb_wchar_to_utf16be_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
803 #endif
804 {
805 if (len >= 8) {
806 unsigned char *out, *limit;
807 MB_CONVERT_BUF_LOAD(buf, out, limit);
808 MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
809
810 /* Used to find wchars which are outside the Unicode BMP (Basic Multilingual Plane) */
811 const __m256i bmp_mask = _mm256_set1_epi32(0xFFFF);
812 /* Used to extract 16 bits which we want from each of eight 32-bit values */
813 const __m256i pack_8x16 = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 12, 13, 8, 9, 4, 5, 0, 1, 12, 13, 8, 9, 4, 5, 0, 1, -1, -1, -1, -1, -1, -1, -1, -1);
814
815 do {
816 __m256i operand = _mm256_loadu_si256((__m256i*)in); /* Load 32 bytes */
817
818 uint32_t bmp_bitvec = _mm256_movemask_epi8(_mm256_cmpeq_epi32(_mm256_and_si256(operand, bmp_mask), operand));
819 if (bmp_bitvec == 0xFFFFFFFF) {
820 /* All eight wchars are in the BMP
821 * Shuffle bytes around to get the 16 bytes we want into the low 16 bytes of YMM register
822 * (which is equivalent to an XMM register) */
823 operand = _mm256_shuffle_epi8(operand, pack_8x16);
824 __m256i operand2 = _mm256_permute2x128_si256(operand, operand, 1);
825 operand = _mm256_alignr_epi8(operand2, operand, 8);
826 _mm_storeu_si128((__m128i*)out, _mm256_castsi256_si128(operand)); /* Store 16 bytes */
827 out += 16;
828 len -= 8;
829 in += 8;
830 } else if (bmp_bitvec & 1) {
831 /* Some prefix of this block are codepoints in the BMP */
832 unsigned int n_bytes = zend_ulong_ntz(~bmp_bitvec);
833 operand = _mm256_shuffle_epi8(operand, pack_8x16);
834 __m256i operand2 = _mm256_permute2x128_si256(operand, operand, 1);
835 operand = _mm256_alignr_epi8(operand2, operand, 8);
836 /* Store 16 bytes, but bump output pointer forward just past the 'good part',
837 * so the 'bad part' will be overwritten on the next iteration of this loop */
838 _mm_storeu_si128((__m128i*)out, _mm256_castsi256_si128(operand));
839 out += n_bytes >> 1;
840 len -= n_bytes >> 2;
841 in += n_bytes >> 2;
842 } else {
843 /* Some prefix of this block is codepoints outside the BMP OR error markers
844 * Handle them using non-vectorized code */
845 unsigned int n_words = bmp_bitvec ? zend_ulong_ntz(bmp_bitvec) >> 2 : 8;
846 do {
847 uint32_t w = *in++;
848 n_words--;
849 len--;
850
851 if (w < MBFL_WCSPLANE_UTF32MAX) {
852 uint16_t n1 = ((w >> 10) - 0x40) | 0xD800;
853 uint16_t n2 = (w & 0x3FF) | 0xDC00;
854 MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4);
855 out = mb_convert_buf_add4(out, (n1 >> 8) & 0xFF, n1 & 0xFF, (n2 >> 8) & 0xFF, n2 & 0xFF);
856 } else {
857 MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_utf16be_default);
858 MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
859 }
860 } while (n_words);
861 }
862 } while (len >= 8);
863
864 MB_CONVERT_BUF_STORE(buf, out, limit);
865 }
866
867 if (len) {
868 mb_wchar_to_utf16be_default(in, len, buf, end);
869 }
870 }
871
872 #ifdef ZEND_INTRIN_AVX2_FUNC_PROTO
mb_utf16le_to_wchar_avx2(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)873 size_t mb_utf16le_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
874 #else
875 static size_t mb_utf16le_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
876 #endif
877 {
878 /* Most of this function is the same as `mb_utf16be_to_wchar_avx2`, above;
879 * See it for more detailed code comments */
880
881 size_t len = *in_len;
882
883 if (len >= 32 && bufsize >= 16) {
884 unsigned char *p = *in;
885 uint32_t *out = buf;
886
887 const __m256i _f8 = _mm256_set1_epi16(0xF800);
888 const __m256i _d8 = _mm256_set1_epi16(0xD800);
889
890 do {
891 __m256i operand = _mm256_loadu_si256((__m256i*)p);
892
893 uint32_t surrogate_bitvec = _mm256_movemask_epi8(_mm256_cmpeq_epi16(_mm256_and_si256(operand, _f8), _d8));
894 if (surrogate_bitvec == 0) {
895 /* There are no surrogates among these 16 characters */
896 _mm256_storeu_si256((__m256i*)out, _mm256_cvtepu16_epi32(_mm256_castsi256_si128(operand)));
897 _mm256_storeu_si256((__m256i*)(out + 8), _mm256_cvtepu16_epi32(_mm256_extracti128_si256(operand, 1)));
898 out += 16;
899 bufsize -= 16;
900 p += sizeof(__m256i);
901 len -= sizeof(__m256i);
902 } else if ((surrogate_bitvec & 1) == 0) {
903 /* Some prefix of the current block is non-surrogates */
904 uint8_t n_chars = zend_ulong_ntz(surrogate_bitvec) >> 1;
905 _mm256_storeu_si256((__m256i*)out, _mm256_cvtepu16_epi32(_mm256_castsi256_si128(operand)));
906 if (n_chars > 8) {
907 _mm256_storeu_si256((__m256i*)(out + 8), _mm256_cvtepu16_epi32(_mm256_extracti128_si256(operand, 1)));
908 }
909 out += n_chars;
910 bufsize -= n_chars;
911 p += n_chars * 2;
912 len -= n_chars * 2;
913 } else {
914 /* Some prefix of the current block is (valid or invalid) surrogates */
915 surrogate_bitvec = ~surrogate_bitvec;
916 unsigned int n_chars = surrogate_bitvec ? zend_ulong_ntz(surrogate_bitvec) >> 1 : 16;
917 do {
918 unsigned char c1 = *p++;
919 unsigned char c2 = *p++;
920
921 if (c2 & 0x4 || len < 4) {
922 /* 2nd part of surrogate pair has come first OR string ended abruptly
923 * after 1st part of surrogate pair */
924 *out++ = MBFL_BAD_INPUT;
925 bufsize--;
926 n_chars--;
927 len -= 2;
928 continue;
929 }
930
931 uint16_t n = (c2 << 8) | c1;
932 unsigned char c3 = *p++;
933 unsigned char c4 = *p++;
934
935 if ((c4 & 0xFC) == 0xDC) {
936 /* Valid surrogate pair */
937 uint16_t n2 = (c4 << 8) | c3;
938 *out++ = (((n & 0x3FF) << 10) | (n2 & 0x3FF)) + 0x10000;
939 bufsize--;
940 len -= 4;
941 #if defined(PHP_HAVE_BUILTIN_USUB_OVERFLOW) && PHP_HAVE_BUILTIN_USUB_OVERFLOW
942 if (__builtin_usub_overflow(n_chars, 2, &n_chars)) {
943 break;
944 }
945 #else
946 n_chars -= 2;
947 if (n_chars == UINT_MAX) {
948 break;
949 }
950 #endif
951 } else {
952 /* First half of surrogate pair was followed by another first half
953 * OR by a non-surrogate character */
954 *out++ = MBFL_BAD_INPUT;
955 bufsize--;
956 n_chars--;
957 len -= 2;
958 p -= 2; /* Back up so the last 2 bytes will be processed again */
959 }
960 } while (n_chars);
961 }
962 } while (len >= 32 && bufsize >= 16);
963
964 if (len && bufsize >= 4) {
965 out += mb_utf16le_to_wchar_default(&p, &len, out, bufsize, NULL);
966 }
967
968 *in = p;
969 *in_len = len;
970 return out - buf;
971 } else if (len) {
972 return mb_utf16le_to_wchar_default(in, in_len, buf, bufsize, NULL);
973 } else {
974 return 0;
975 }
976 }
977
978 #ifdef ZEND_INTRIN_AVX2_FUNC_PROTO
mb_wchar_to_utf16le_avx2(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)979 void mb_wchar_to_utf16le_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
980 #else
981 static void mb_wchar_to_utf16le_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
982 #endif
983 {
984 if (len >= 8) {
985 unsigned char *out, *limit;
986 MB_CONVERT_BUF_LOAD(buf, out, limit);
987 MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
988
989 /* Used to find wchars which are outside the Unicode BMP (Basic Multilingual Plane) */
990 const __m256i bmp_mask = _mm256_set1_epi32(0xFFFF);
991 /* Used to extract 16 bits which we want from each of eight 32-bit values */
992 const __m256i pack_8x16 = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 13, 12, 9, 8, 5, 4, 1, 0, 13, 12, 9, 8, 5, 4, 1, 0, -1, -1, -1, -1, -1, -1, -1, -1);
993
994 do {
995 __m256i operand = _mm256_loadu_si256((__m256i*)in);
996
997 uint32_t bmp_bitvec = _mm256_movemask_epi8(_mm256_cmpeq_epi32(_mm256_and_si256(operand, bmp_mask), operand));
998 if (bmp_bitvec == 0xFFFFFFFF) {
999 /* All eight wchars are in the BMP
1000 * Shuffle bytes around to get the 16 bytes we want into the low 16 bytes of YMM register
1001 * (which is equivalent to an XMM register) */
1002 operand = _mm256_shuffle_epi8(operand, pack_8x16);
1003 __m256i operand2 = _mm256_permute2x128_si256(operand, operand, 1);
1004 operand = _mm256_alignr_epi8(operand2, operand, 8);
1005 _mm_storeu_si128((__m128i*)out, _mm256_castsi256_si128(operand));
1006 out += 16;
1007 len -= 8;
1008 in += 8;
1009 } else if (bmp_bitvec & 1) {
1010 /* Some prefix of this block are codepoints in the BMP */
1011 unsigned int n_bytes = zend_ulong_ntz(~bmp_bitvec);
1012 operand = _mm256_shuffle_epi8(operand, pack_8x16);
1013 __m256i operand2 = _mm256_permute2x128_si256(operand, operand, 1);
1014 operand = _mm256_alignr_epi8(operand2, operand, 8);
1015 _mm_storeu_si128((__m128i*)out, _mm256_castsi256_si128(operand));
1016 out += n_bytes >> 1;
1017 len -= n_bytes >> 2;
1018 in += n_bytes >> 2;
1019 } else {
1020 /* Some prefix of this block is codepoints outside the BMP OR error markers */
1021 unsigned int n_words = bmp_bitvec ? zend_ulong_ntz(bmp_bitvec) >> 2 : 8;
1022 do {
1023 uint32_t w = *in++;
1024 n_words--;
1025 len--;
1026
1027 if (w < MBFL_WCSPLANE_UTF32MAX) {
1028 uint16_t n1 = ((w >> 10) - 0x40) | 0xD800;
1029 uint16_t n2 = (w & 0x3FF) | 0xDC00;
1030 MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4);
1031 out = mb_convert_buf_add4(out, n1 & 0xFF, (n1 >> 8) & 0xFF, n2 & 0xFF, (n2 >> 8) & 0xFF);
1032 } else {
1033 MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_utf16le_default);
1034 MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
1035 }
1036 } while (n_words);
1037 }
1038 } while (len >= 8);
1039
1040 MB_CONVERT_BUF_STORE(buf, out, limit);
1041 }
1042
1043 if (len) {
1044 mb_wchar_to_utf16le_default(in, len, buf, end);
1045 }
1046 }
1047
1048 #endif /* defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER) */
1049
mb_cut_utf16be(unsigned char * str,size_t from,size_t len,unsigned char * end)1050 static zend_string* mb_cut_utf16be(unsigned char *str, size_t from, size_t len, unsigned char *end)
1051 {
1052 if (len > end - (str + from)) {
1053 len = end - (str + from);
1054 }
1055 from &= ~1;
1056 len &= ~1;
1057 unsigned char *start = str + from;
1058 if (len < 2 || (end - start) < 2) {
1059 return zend_empty_string;
1060 }
1061 /* Check if 1st codepoint is 2nd part of surrogate pair */
1062 if (from > 0) {
1063 uint32_t start_cp = (*start << 8) + *(start + 1);
1064 if (start_cp >= 0xDC00 && start_cp <= 0xDFFF) {
1065 uint32_t preceding_cp = (*(start - 2) << 8) + *(start - 1);
1066 if (preceding_cp >= 0xD800 && preceding_cp <= 0xDBFF) {
1067 from -= 2;
1068 }
1069 }
1070 }
1071 /* Same for ending cut point */
1072 unsigned char *_end = start + len;
1073 if (_end > end) {
1074 _end = end;
1075 }
1076 uint32_t ending_cp = (*(_end - 2) << 8) + *(_end - 1);
1077 if (ending_cp >= 0xD800 && ending_cp <= 0xDBFF) {
1078 _end -= 2;
1079 }
1080 return zend_string_init_fast((char*)start, _end - start);
1081 }
1082
mb_cut_utf16le(unsigned char * str,size_t from,size_t len,unsigned char * end)1083 static zend_string* mb_cut_utf16le(unsigned char *str, size_t from, size_t len, unsigned char *end)
1084 {
1085 if (len > end - (str + from)) {
1086 len = end - (str + from);
1087 }
1088 from &= ~1;
1089 len &= ~1;
1090 unsigned char *start = str + from;
1091 if (len < 2 || (end - start) < 2) {
1092 return zend_empty_string;
1093 }
1094 /* Check if 1st codepoint is 2nd part of surrogate pair */
1095 if (from > 0) {
1096 uint32_t start_cp = (*(start + 1) << 8) + *start;
1097 if (start_cp >= 0xDC00 && start_cp <= 0xDFFF) {
1098 uint32_t preceding_cp = (*(start - 1) << 8) + *(start - 2);
1099 if (preceding_cp >= 0xD800 && preceding_cp <= 0xDBFF) {
1100 from -= 2;
1101 }
1102 }
1103 }
1104 /* Same for ending cut point */
1105 unsigned char *_end = start + len;
1106 if (_end > end) {
1107 _end = end;
1108 }
1109 uint32_t ending_cp = (*(_end - 1) << 8) + *(_end - 2);
1110 if (ending_cp >= 0xD800 && ending_cp <= 0xDBFF) {
1111 _end -= 2;
1112 }
1113 return zend_string_init_fast((char*)start, _end - start);
1114 }
1115
mb_cut_utf16(unsigned char * str,size_t from,size_t len,unsigned char * end)1116 static zend_string* mb_cut_utf16(unsigned char *str, size_t from, size_t len, unsigned char *end)
1117 {
1118 if (len < 2 || (end - str) < 2) {
1119 return zend_empty_string;
1120 }
1121 uint32_t cp = (*str << 8) + *(str + 1);
1122 if (cp == 0xFFFE) {
1123 /* Little-endian BOM */
1124 if (from < 2) {
1125 from = 2;
1126 }
1127 return mb_cut_utf16le(str, from, len, end);
1128 } else {
1129 if (cp == 0xFEFF && from < 2) {
1130 from = 2;
1131 }
1132 return mb_cut_utf16be(str, from, len, end);
1133 }
1134 }
1135