1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9                     This module by Zoltan Herczeg
10      Original API code Copyright (c) 1997-2012 University of Cambridge
11           New API code Copyright (c) 2016-2019 University of Cambridge
12 
13 -----------------------------------------------------------------------------
14 Redistribution and use in source and binary forms, with or without
15 modification, are permitted provided that the following conditions are met:
16 
17     * Redistributions of source code must retain the above copyright notice,
18       this list of conditions and the following disclaimer.
19 
20     * Redistributions in binary form must reproduce the above copyright
21       notice, this list of conditions and the following disclaimer in the
22       documentation and/or other materials provided with the distribution.
23 
24     * Neither the name of the University of Cambridge nor the names of its
25       contributors may be used to endorse or promote products derived from
26       this software without specific prior written permission.
27 
28 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 POSSIBILITY OF SUCH DAMAGE.
39 -----------------------------------------------------------------------------
40 */
41 
42 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
43 
44 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
jump_if_utf_char_start(struct sljit_compiler * compiler,sljit_s32 reg)45 static struct sljit_jump *jump_if_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg)
46 {
47 #if PCRE2_CODE_UNIT_WIDTH == 8
48 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
49 return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0x80);
50 #elif PCRE2_CODE_UNIT_WIDTH == 16
51 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
52 return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00);
53 #else
54 #error "Unknown code width"
55 #endif
56 }
57 #endif
58 
character_to_int32(PCRE2_UCHAR chr)59 static sljit_s32 character_to_int32(PCRE2_UCHAR chr)
60 {
61 sljit_u32 value = chr;
62 #if PCRE2_CODE_UNIT_WIDTH == 8
63 #define SSE2_COMPARE_TYPE_INDEX 0
64 return (sljit_s32)((value << 24) | (value << 16) | (value << 8) | value);
65 #elif PCRE2_CODE_UNIT_WIDTH == 16
66 #define SSE2_COMPARE_TYPE_INDEX 1
67 return (sljit_s32)((value << 16) | value);
68 #elif PCRE2_CODE_UNIT_WIDTH == 32
69 #define SSE2_COMPARE_TYPE_INDEX 2
70 return (sljit_s32)(value);
71 #else
72 #error "Unsupported unit width"
73 #endif
74 }
75 
load_from_mem_sse2(struct sljit_compiler * compiler,sljit_s32 dst_xmm_reg,sljit_s32 src_general_reg,sljit_s8 offset)76 static void load_from_mem_sse2(struct sljit_compiler *compiler, sljit_s32 dst_xmm_reg, sljit_s32 src_general_reg, sljit_s8 offset)
77 {
78 sljit_u8 instruction[5];
79 
80 SLJIT_ASSERT(dst_xmm_reg < 8);
81 SLJIT_ASSERT(src_general_reg < 8);
82 
83 /* MOVDQA xmm1, xmm2/m128 */
84 instruction[0] = ((sljit_u8)offset & 0xf) == 0 ? 0x66 : 0xf3;
85 instruction[1] = 0x0f;
86 instruction[2] = 0x6f;
87 
88 if (offset == 0)
89   {
90   instruction[3] = (dst_xmm_reg << 3) | src_general_reg;
91   sljit_emit_op_custom(compiler, instruction, 4);
92   return;
93   }
94 
95 instruction[3] = 0x40 | (dst_xmm_reg << 3) | src_general_reg;
96 instruction[4] = (sljit_u8)offset;
97 sljit_emit_op_custom(compiler, instruction, 5);
98 }
99 
100 typedef enum {
101     sse2_compare_match1,
102     sse2_compare_match1i,
103     sse2_compare_match2,
104 } sse2_compare_type;
105 
fast_forward_char_pair_sse2_compare(struct sljit_compiler * compiler,sse2_compare_type compare_type,int step,sljit_s32 dst_ind,sljit_s32 cmp1_ind,sljit_s32 cmp2_ind,sljit_s32 tmp_ind)106 static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, sse2_compare_type compare_type,
107   int step, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind)
108 {
109 sljit_u8 instruction[4];
110 instruction[0] = 0x66;
111 instruction[1] = 0x0f;
112 
113 SLJIT_ASSERT(step >= 0 && step <= 3);
114 
115 if (compare_type != sse2_compare_match2)
116   {
117   if (step == 0)
118     {
119     if (compare_type == sse2_compare_match1i)
120       {
121       /* POR xmm1, xmm2/m128 */
122       /* instruction[0] = 0x66; */
123       /* instruction[1] = 0x0f; */
124       instruction[2] = 0xeb;
125       instruction[3] = 0xc0 | (dst_ind << 3) | cmp2_ind;
126       sljit_emit_op_custom(compiler, instruction, 4);
127       }
128     return;
129     }
130 
131   if (step != 2)
132     return;
133 
134   /* PCMPEQB/W/D xmm1, xmm2/m128 */
135   /* instruction[0] = 0x66; */
136   /* instruction[1] = 0x0f; */
137   instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
138   instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind;
139   sljit_emit_op_custom(compiler, instruction, 4);
140   return;
141   }
142 
143 switch (step)
144   {
145   case 0:
146   /* MOVDQA xmm1, xmm2/m128 */
147   /* instruction[0] = 0x66; */
148   /* instruction[1] = 0x0f; */
149   instruction[2] = 0x6f;
150   instruction[3] = 0xc0 | (tmp_ind << 3) | dst_ind;
151   sljit_emit_op_custom(compiler, instruction, 4);
152   return;
153 
154   case 1:
155   /* PCMPEQB/W/D xmm1, xmm2/m128 */
156   /* instruction[0] = 0x66; */
157   /* instruction[1] = 0x0f; */
158   instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
159   instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind;
160   sljit_emit_op_custom(compiler, instruction, 4);
161   return;
162 
163   case 2:
164   /* PCMPEQB/W/D xmm1, xmm2/m128 */
165   /* instruction[0] = 0x66; */
166   /* instruction[1] = 0x0f; */
167   instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
168   instruction[3] = 0xc0 | (tmp_ind << 3) | cmp2_ind;
169   sljit_emit_op_custom(compiler, instruction, 4);
170   return;
171 
172   case 3:
173   /* POR xmm1, xmm2/m128 */
174   /* instruction[0] = 0x66; */
175   /* instruction[1] = 0x0f; */
176   instruction[2] = 0xeb;
177   instruction[3] = 0xc0 | (dst_ind << 3) | tmp_ind;
178   sljit_emit_op_custom(compiler, instruction, 4);
179   return;
180   }
181 }
182 
183 #define JIT_HAS_FAST_FORWARD_CHAR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SSE2))
184 
fast_forward_char_simd(compiler_common * common,PCRE2_UCHAR char1,PCRE2_UCHAR char2,sljit_s32 offset)185 static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
186 {
187 DEFINE_COMPILER;
188 struct sljit_label *start;
189 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
190 struct sljit_label *restart;
191 #endif
192 struct sljit_jump *quit;
193 struct sljit_jump *partial_quit[2];
194 sse2_compare_type compare_type = sse2_compare_match1;
195 sljit_u8 instruction[8];
196 sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1);
197 sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR);
198 sljit_s32 data_ind = 0;
199 sljit_s32 tmp_ind = 1;
200 sljit_s32 cmp1_ind = 2;
201 sljit_s32 cmp2_ind = 3;
202 sljit_u32 bit = 0;
203 int i;
204 
205 SLJIT_UNUSED_ARG(offset);
206 
207 if (char1 != char2)
208   {
209   bit = char1 ^ char2;
210   compare_type = sse2_compare_match1i;
211 
212   if (!is_powerof2(bit))
213     {
214     bit = 0;
215     compare_type = sse2_compare_match2;
216     }
217   }
218 
219 partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
220 if (common->mode == PCRE2_JIT_COMPLETE)
221   add_jump(compiler, &common->failed_match, partial_quit[0]);
222 
223 /* First part (unaligned start) */
224 
225 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
226 
227 SLJIT_ASSERT(tmp1_reg_ind < 8);
228 
229 /* MOVD xmm, r/m32 */
230 instruction[0] = 0x66;
231 instruction[1] = 0x0f;
232 instruction[2] = 0x6e;
233 instruction[3] = 0xc0 | (cmp1_ind << 3) | tmp1_reg_ind;
234 sljit_emit_op_custom(compiler, instruction, 4);
235 
236 if (char1 != char2)
237   {
238   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
239 
240   /* MOVD xmm, r/m32 */
241   instruction[3] = 0xc0 | (cmp2_ind << 3) | tmp1_reg_ind;
242   sljit_emit_op_custom(compiler, instruction, 4);
243   }
244 
245 OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
246 
247 /* PSHUFD xmm1, xmm2/m128, imm8 */
248 /* instruction[0] = 0x66; */
249 /* instruction[1] = 0x0f; */
250 instruction[2] = 0x70;
251 instruction[3] = 0xc0 | (cmp1_ind << 3) | cmp1_ind;
252 instruction[4] = 0;
253 sljit_emit_op_custom(compiler, instruction, 5);
254 
255 if (char1 != char2)
256   {
257   /* PSHUFD xmm1, xmm2/m128, imm8 */
258   instruction[3] = 0xc0 | (cmp2_ind << 3) | cmp2_ind;
259   sljit_emit_op_custom(compiler, instruction, 5);
260   }
261 
262 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
263 restart = LABEL();
264 #endif
265 OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
266 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
267 
268 load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0);
269 for (i = 0; i < 4; i++)
270   fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
271 
272 /* PMOVMSKB reg, xmm */
273 /* instruction[0] = 0x66; */
274 /* instruction[1] = 0x0f; */
275 instruction[2] = 0xd7;
276 instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind;
277 sljit_emit_op_custom(compiler, instruction, 4);
278 
279 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
280 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
281 
282 quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
283 
284 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
285 
286 /* Second part (aligned) */
287 start = LABEL();
288 
289 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
290 
291 partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
292 if (common->mode == PCRE2_JIT_COMPLETE)
293   add_jump(compiler, &common->failed_match, partial_quit[1]);
294 
295 load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0);
296 for (i = 0; i < 4; i++)
297   fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
298 
299 /* PMOVMSKB reg, xmm */
300 /* instruction[0] = 0x66; */
301 /* instruction[1] = 0x0f; */
302 instruction[2] = 0xd7;
303 instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind;
304 sljit_emit_op_custom(compiler, instruction, 4);
305 
306 CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
307 
308 JUMPHERE(quit);
309 
310 /* BSF r32, r/m32 */
311 instruction[0] = 0x0f;
312 instruction[1] = 0xbc;
313 instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind;
314 sljit_emit_op_custom(compiler, instruction, 3);
315 
316 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
317 
318 if (common->mode != PCRE2_JIT_COMPLETE)
319   {
320   JUMPHERE(partial_quit[0]);
321   JUMPHERE(partial_quit[1]);
322   OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
323   CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
324   }
325 else
326   add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
327 
328 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
329 if (common->utf && offset > 0)
330   {
331   SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
332 
333   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
334 
335   quit = jump_if_utf_char_start(compiler, TMP1);
336 
337   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
338   add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
339   OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
340   JUMPTO(SLJIT_JUMP, restart);
341 
342   JUMPHERE(quit);
343   }
344 #endif
345 }
346 
347 #define JIT_HAS_FAST_REQUESTED_CHAR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SSE2))
348 
fast_requested_char_simd(compiler_common * common,PCRE2_UCHAR char1,PCRE2_UCHAR char2)349 static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2)
350 {
351 DEFINE_COMPILER;
352 struct sljit_label *start;
353 struct sljit_jump *quit;
354 jump_list *not_found = NULL;
355 sse2_compare_type compare_type = sse2_compare_match1;
356 sljit_u8 instruction[8];
357 sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1);
358 sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR);
359 sljit_s32 data_ind = 0;
360 sljit_s32 tmp_ind = 1;
361 sljit_s32 cmp1_ind = 2;
362 sljit_s32 cmp2_ind = 3;
363 sljit_u32 bit = 0;
364 int i;
365 
366 if (char1 != char2)
367   {
368   bit = char1 ^ char2;
369   compare_type = sse2_compare_match1i;
370 
371   if (!is_powerof2(bit))
372     {
373     bit = 0;
374     compare_type = sse2_compare_match2;
375     }
376   }
377 
378 add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
379 OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
380 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
381 
382 /* First part (unaligned start) */
383 
384 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
385 
386 SLJIT_ASSERT(tmp1_reg_ind < 8);
387 
388 /* MOVD xmm, r/m32 */
389 instruction[0] = 0x66;
390 instruction[1] = 0x0f;
391 instruction[2] = 0x6e;
392 instruction[3] = 0xc0 | (cmp1_ind << 3) | tmp1_reg_ind;
393 sljit_emit_op_custom(compiler, instruction, 4);
394 
395 if (char1 != char2)
396   {
397   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
398 
399   /* MOVD xmm, r/m32 */
400   instruction[3] = 0xc0 | (cmp2_ind << 3) | tmp1_reg_ind;
401   sljit_emit_op_custom(compiler, instruction, 4);
402   }
403 
404 OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
405 
406 /* PSHUFD xmm1, xmm2/m128, imm8 */
407 /* instruction[0] = 0x66; */
408 /* instruction[1] = 0x0f; */
409 instruction[2] = 0x70;
410 instruction[3] = 0xc0 | (cmp1_ind << 3) | cmp1_ind;
411 instruction[4] = 0;
412 sljit_emit_op_custom(compiler, instruction, 5);
413 
414 if (char1 != char2)
415   {
416   /* PSHUFD xmm1, xmm2/m128, imm8 */
417   instruction[3] = 0xc0 | (cmp2_ind << 3) | cmp2_ind;
418   sljit_emit_op_custom(compiler, instruction, 5);
419   }
420 
421 OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
422 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
423 
424 load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0);
425 for (i = 0; i < 4; i++)
426   fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
427 
428 /* PMOVMSKB reg, xmm */
429 /* instruction[0] = 0x66; */
430 /* instruction[1] = 0x0f; */
431 instruction[2] = 0xd7;
432 instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind;
433 sljit_emit_op_custom(compiler, instruction, 4);
434 
435 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
436 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
437 
438 quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
439 
440 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
441 
442 /* Second part (aligned) */
443 start = LABEL();
444 
445 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
446 
447 add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
448 
449 load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0);
450 for (i = 0; i < 4; i++)
451   fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
452 
453 /* PMOVMSKB reg, xmm */
454 /* instruction[0] = 0x66; */
455 /* instruction[1] = 0x0f; */
456 instruction[2] = 0xd7;
457 instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind;
458 sljit_emit_op_custom(compiler, instruction, 4);
459 
460 CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
461 
462 JUMPHERE(quit);
463 
464 /* BSF r32, r/m32 */
465 instruction[0] = 0x0f;
466 instruction[1] = 0xbc;
467 instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind;
468 sljit_emit_op_custom(compiler, instruction, 3);
469 
470 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, STR_PTR, 0);
471 add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
472 
473 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
474 return not_found;
475 }
476 
477 #ifndef _WIN64
478 
max_fast_forward_char_pair_offset(void)479 static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void)
480 {
481 #if PCRE2_CODE_UNIT_WIDTH == 8
482 return 15;
483 #elif PCRE2_CODE_UNIT_WIDTH == 16
484 return 7;
485 #elif PCRE2_CODE_UNIT_WIDTH == 32
486 return 3;
487 #else
488 #error "Unsupported unit width"
489 #endif
490 }
491 
492 #define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SSE2))
493 
fast_forward_char_pair_simd(compiler_common * common,sljit_s32 offs1,PCRE2_UCHAR char1a,PCRE2_UCHAR char1b,sljit_s32 offs2,PCRE2_UCHAR char2a,PCRE2_UCHAR char2b)494 static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1,
495   PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
496 {
497 DEFINE_COMPILER;
498 sse2_compare_type compare1_type = sse2_compare_match1;
499 sse2_compare_type compare2_type = sse2_compare_match1;
500 sljit_u32 bit1 = 0;
501 sljit_u32 bit2 = 0;
502 sljit_u32 diff = IN_UCHARS(offs1 - offs2);
503 sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1);
504 sljit_s32 tmp2_reg_ind = sljit_get_register_index(TMP2);
505 sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR);
506 sljit_s32 data1_ind = 0;
507 sljit_s32 data2_ind = 1;
508 sljit_s32 tmp1_ind = 2;
509 sljit_s32 tmp2_ind = 3;
510 sljit_s32 cmp1a_ind = 4;
511 sljit_s32 cmp1b_ind = 5;
512 sljit_s32 cmp2a_ind = 6;
513 sljit_s32 cmp2b_ind = 7;
514 struct sljit_label *start;
515 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
516 struct sljit_label *restart;
517 #endif
518 struct sljit_jump *jump[2];
519 sljit_u8 instruction[8];
520 int i;
521 
522 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
523 SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_offset()));
524 SLJIT_ASSERT(tmp1_reg_ind < 8 && tmp2_reg_ind == 1);
525 
526 /* Initialize. */
527 if (common->match_end_ptr != 0)
528   {
529   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
530   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
531   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
532 
533   OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0);
534   CMOV(SLJIT_LESS, STR_END, TMP1, 0);
535   }
536 
537 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
538 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
539 
540 /* MOVD xmm, r/m32 */
541 instruction[0] = 0x66;
542 instruction[1] = 0x0f;
543 instruction[2] = 0x6e;
544 
545 if (char1a == char1b)
546   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a));
547 else
548   {
549   bit1 = char1a ^ char1b;
550   if (is_powerof2(bit1))
551     {
552     compare1_type = sse2_compare_match1i;
553     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a | bit1));
554     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit1));
555     }
556   else
557     {
558     compare1_type = sse2_compare_match2;
559     bit1 = 0;
560     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a));
561     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char1b));
562     }
563   }
564 
565 instruction[3] = 0xc0 | (cmp1a_ind << 3) | tmp1_reg_ind;
566 sljit_emit_op_custom(compiler, instruction, 4);
567 
568 if (char1a != char1b)
569   {
570   instruction[3] = 0xc0 | (cmp1b_ind << 3) | tmp2_reg_ind;
571   sljit_emit_op_custom(compiler, instruction, 4);
572   }
573 
574 if (char2a == char2b)
575   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a));
576 else
577   {
578   bit2 = char2a ^ char2b;
579   if (is_powerof2(bit2))
580     {
581     compare2_type = sse2_compare_match1i;
582     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a | bit2));
583     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit2));
584     }
585   else
586     {
587     compare2_type = sse2_compare_match2;
588     bit2 = 0;
589     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a));
590     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char2b));
591     }
592   }
593 
594 instruction[3] = 0xc0 | (cmp2a_ind << 3) | tmp1_reg_ind;
595 sljit_emit_op_custom(compiler, instruction, 4);
596 
597 if (char2a != char2b)
598   {
599   instruction[3] = 0xc0 | (cmp2b_ind << 3) | tmp2_reg_ind;
600   sljit_emit_op_custom(compiler, instruction, 4);
601   }
602 
603 /* PSHUFD xmm1, xmm2/m128, imm8 */
604 /* instruction[0] = 0x66; */
605 /* instruction[1] = 0x0f; */
606 instruction[2] = 0x70;
607 instruction[4] = 0;
608 
609 instruction[3] = 0xc0 | (cmp1a_ind << 3) | cmp1a_ind;
610 sljit_emit_op_custom(compiler, instruction, 5);
611 
612 if (char1a != char1b)
613   {
614   instruction[3] = 0xc0 | (cmp1b_ind << 3) | cmp1b_ind;
615   sljit_emit_op_custom(compiler, instruction, 5);
616   }
617 
618 instruction[3] = 0xc0 | (cmp2a_ind << 3) | cmp2a_ind;
619 sljit_emit_op_custom(compiler, instruction, 5);
620 
621 if (char2a != char2b)
622   {
623   instruction[3] = 0xc0 | (cmp2b_ind << 3) | cmp2b_ind;
624   sljit_emit_op_custom(compiler, instruction, 5);
625   }
626 
627 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
628 restart = LABEL();
629 #endif
630 
631 OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, diff);
632 OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
633 OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
634 
635 load_from_mem_sse2(compiler, data1_ind, str_ptr_reg_ind, 0);
636 
637 jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_PTR, 0);
638 
639 load_from_mem_sse2(compiler, data2_ind, str_ptr_reg_ind, -(sljit_s8)diff);
640 jump[1] = JUMP(SLJIT_JUMP);
641 
642 JUMPHERE(jump[0]);
643 
644 /* MOVDQA xmm1, xmm2/m128 */
645 /* instruction[0] = 0x66; */
646 /* instruction[1] = 0x0f; */
647 instruction[2] = 0x6f;
648 instruction[3] = 0xc0 | (data2_ind << 3) | data1_ind;
649 sljit_emit_op_custom(compiler, instruction, 4);
650 
651 /* PSLLDQ xmm1, imm8 */
652 /* instruction[0] = 0x66; */
653 /* instruction[1] = 0x0f; */
654 instruction[2] = 0x73;
655 instruction[3] = 0xc0 | (7 << 3) | data2_ind;
656 instruction[4] = diff;
657 sljit_emit_op_custom(compiler, instruction, 5);
658 
659 JUMPHERE(jump[1]);
660 
661 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
662 
663 for (i = 0; i < 4; i++)
664   {
665   fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind);
666   fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
667   }
668 
669 /* PAND xmm1, xmm2/m128 */
670 /* instruction[0] = 0x66; */
671 /* instruction[1] = 0x0f; */
672 instruction[2] = 0xdb;
673 instruction[3] = 0xc0 | (data1_ind << 3) | data2_ind;
674 sljit_emit_op_custom(compiler, instruction, 4);
675 
676 /* PMOVMSKB reg, xmm */
677 /* instruction[0] = 0x66; */
678 /* instruction[1] = 0x0f; */
679 instruction[2] = 0xd7;
680 instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | 0;
681 sljit_emit_op_custom(compiler, instruction, 4);
682 
683 /* Ignore matches before the first STR_PTR. */
684 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
685 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
686 
687 jump[0] = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
688 
689 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
690 
691 /* Main loop. */
692 start = LABEL();
693 
694 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
695 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
696 
697 load_from_mem_sse2(compiler, data1_ind, str_ptr_reg_ind, 0);
698 load_from_mem_sse2(compiler, data2_ind, str_ptr_reg_ind, -(sljit_s8)diff);
699 
700 for (i = 0; i < 4; i++)
701   {
702   fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp2_ind);
703   fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp1_ind);
704   }
705 
706 /* PAND xmm1, xmm2/m128 */
707 /* instruction[0] = 0x66; */
708 /* instruction[1] = 0x0f; */
709 instruction[2] = 0xdb;
710 instruction[3] = 0xc0 | (data1_ind << 3) | data2_ind;
711 sljit_emit_op_custom(compiler, instruction, 4);
712 
713 /* PMOVMSKB reg, xmm */
714 /* instruction[0] = 0x66; */
715 /* instruction[1] = 0x0f; */
716 instruction[2] = 0xd7;
717 instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | 0;
718 sljit_emit_op_custom(compiler, instruction, 4);
719 
720 CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
721 
722 JUMPHERE(jump[0]);
723 
724 /* BSF r32, r/m32 */
725 instruction[0] = 0x0f;
726 instruction[1] = 0xbc;
727 instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind;
728 sljit_emit_op_custom(compiler, instruction, 3);
729 
730 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
731 
732 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
733 
734 if (common->match_end_ptr != 0)
735   OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
736 
737 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
738 if (common->utf)
739   {
740   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1));
741 
742   jump[0] = jump_if_utf_char_start(compiler, TMP1);
743 
744   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
745   CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, restart);
746 
747   add_jump(compiler, &common->failed_match, JUMP(SLJIT_JUMP));
748 
749   JUMPHERE(jump[0]);
750   }
751 #endif
752 
753 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
754 
755 if (common->match_end_ptr != 0)
756   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
757 }
758 
759 #endif /* !_WIN64 */
760 
761 #undef SSE2_COMPARE_TYPE_INDEX
762 
763 #endif /* SLJIT_CONFIG_X86 && !SUPPORT_VALGRIND */
764 
765 #if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 && (defined __ARM_NEON || defined __ARM_NEON__))
766 
767 #include <arm_neon.h>
768 
769 typedef union {
770   unsigned int x;
771   struct { unsigned char c1, c2, c3, c4; } c;
772 } int_char;
773 
774 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
utf_continue(sljit_u8 * s)775 static SLJIT_INLINE int utf_continue(sljit_u8 *s)
776 {
777 #if PCRE2_CODE_UNIT_WIDTH == 8
778 return (*s & 0xc0) == 0x80;
779 #elif PCRE2_CODE_UNIT_WIDTH == 16
780 return (*s & 0xfc00) == 0xdc00;
781 #else
782 #error "Unknown code width"
783 #endif
784 }
785 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
786 
787 #if PCRE2_CODE_UNIT_WIDTH == 8
788 # define VECTOR_FACTOR 16
789 # define vect_t uint8x16_t
790 # define VLD1Q(X) vld1q_u8((sljit_u8 *)(X))
791 # define VCEQQ vceqq_u8
792 # define VORRQ vorrq_u8
793 # define VST1Q vst1q_u8
794 # define VDUPQ vdupq_n_u8
795 # define VEXTQ vextq_u8
796 # define VANDQ vandq_u8
797 typedef union {
798        uint8_t mem[16];
799        uint64_t dw[2];
800 } quad_word;
801 #elif PCRE2_CODE_UNIT_WIDTH == 16
802 # define VECTOR_FACTOR 8
803 # define vect_t uint16x8_t
804 # define VLD1Q(X) vld1q_u16((sljit_u16 *)(X))
805 # define VCEQQ vceqq_u16
806 # define VORRQ vorrq_u16
807 # define VST1Q vst1q_u16
808 # define VDUPQ vdupq_n_u16
809 # define VEXTQ vextq_u16
810 # define VANDQ vandq_u16
811 typedef union {
812        uint16_t mem[8];
813        uint64_t dw[2];
814 } quad_word;
815 #else
816 # define VECTOR_FACTOR 4
817 # define vect_t uint32x4_t
818 # define VLD1Q(X) vld1q_u32((sljit_u32 *)(X))
819 # define VCEQQ vceqq_u32
820 # define VORRQ vorrq_u32
821 # define VST1Q vst1q_u32
822 # define VDUPQ vdupq_n_u32
823 # define VEXTQ vextq_u32
824 # define VANDQ vandq_u32
825 typedef union {
826        uint32_t mem[4];
827        uint64_t dw[2];
828 } quad_word;
829 #endif
830 
831 #define FFCS
832 #include "pcre2_jit_neon_inc.h"
833 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
834 # define FF_UTF
835 # include "pcre2_jit_neon_inc.h"
836 # undef FF_UTF
837 #endif
838 #undef FFCS
839 
840 #define FFCS_2
841 #include "pcre2_jit_neon_inc.h"
842 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
843 # define FF_UTF
844 # include "pcre2_jit_neon_inc.h"
845 # undef FF_UTF
846 #endif
847 #undef FFCS_2
848 
849 #define FFCS_MASK
850 #include "pcre2_jit_neon_inc.h"
851 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
852 # define FF_UTF
853 # include "pcre2_jit_neon_inc.h"
854 # undef FF_UTF
855 #endif
856 #undef FFCS_MASK
857 
858 #define JIT_HAS_FAST_FORWARD_CHAR_SIMD 1
859 
fast_forward_char_simd(compiler_common * common,PCRE2_UCHAR char1,PCRE2_UCHAR char2,sljit_s32 offset)860 static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
861 {
862 DEFINE_COMPILER;
863 int_char ic;
864 struct sljit_jump *partial_quit;
865 /* Save temporary registers. */
866 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
867 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP3, 0);
868 
869 /* Prepare function arguments */
870 OP1(SLJIT_MOV, SLJIT_R0, 0, STR_END, 0);
871 OP1(SLJIT_MOV, SLJIT_R1, 0, STR_PTR, 0);
872 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, offset);
873 
874 if (char1 == char2)
875   {
876     ic.c.c1 = char1;
877     ic.c.c2 = char2;
878     OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x);
879 
880 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
881   if (common->utf && offset > 0)
882     sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
883                      SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_utf));
884   else
885     sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
886                      SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs));
887 #else
888   sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
889                    SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs));
890 #endif
891   }
892 else
893   {
894   PCRE2_UCHAR mask = char1 ^ char2;
895   if (is_powerof2(mask))
896     {
897     ic.c.c1 = char1 | mask;
898     ic.c.c2 = mask;
899     OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x);
900 
901 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
902     if (common->utf && offset > 0)
903       sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
904                        SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_mask_utf));
905     else
906       sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
907                        SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_mask));
908 #else
909     sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
910                      SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_mask));
911 #endif
912     }
913   else
914     {
915       ic.c.c1 = char1;
916       ic.c.c2 = char2;
917       OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x);
918 
919 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
920     if (common->utf && offset > 0)
921       sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
922                        SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_2_utf));
923     else
924       sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
925                        SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_2));
926 #else
927     sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
928                      SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_2));
929 #endif
930     }
931   }
932 /* Restore registers. */
933 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
934 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
935 
936 /* Check return value. */
937 partial_quit = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
938 if (common->mode == PCRE2_JIT_COMPLETE)
939   add_jump(compiler, &common->failed_match, partial_quit);
940 
941 /* Fast forward STR_PTR to the result of memchr. */
942 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
943 
944 if (common->mode != PCRE2_JIT_COMPLETE)
945   JUMPHERE(partial_quit);
946 }
947 
948 typedef enum {
949   compare_match1,
950   compare_match1i,
951   compare_match2,
952 } compare_type;
953 
fast_forward_char_pair_compare(compare_type ctype,vect_t dst,vect_t cmp1,vect_t cmp2)954 static inline vect_t fast_forward_char_pair_compare(compare_type ctype, vect_t dst, vect_t cmp1, vect_t cmp2)
955 {
956 if (ctype == compare_match2)
957   {
958   vect_t tmp = dst;
959   dst = VCEQQ(dst, cmp1);
960   tmp = VCEQQ(tmp, cmp2);
961   dst = VORRQ(dst, tmp);
962   return dst;
963   }
964 
965 if (ctype == compare_match1i)
966   dst = VORRQ(dst, cmp2);
967 dst = VCEQQ(dst, cmp1);
968 return dst;
969 }
970 
max_fast_forward_char_pair_offset(void)971 static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void)
972 {
973 #if PCRE2_CODE_UNIT_WIDTH == 8
974 return 15;
975 #elif PCRE2_CODE_UNIT_WIDTH == 16
976 return 7;
977 #elif PCRE2_CODE_UNIT_WIDTH == 32
978 return 3;
979 #else
980 #error "Unsupported unit width"
981 #endif
982 }
983 
984 /* ARM doesn't have a shift left across lanes. */
shift_left_n_lanes(vect_t a,sljit_u8 n)985 static SLJIT_INLINE vect_t shift_left_n_lanes(vect_t a, sljit_u8 n)
986 {
987 vect_t zero = VDUPQ(0);
988 SLJIT_ASSERT(0 < n && n < VECTOR_FACTOR);
989 /* VEXTQ takes an immediate as last argument. */
990 #define C(X) case X: return VEXTQ(zero, a, VECTOR_FACTOR - X);
991 switch (n)
992   {
993   C(1); C(2); C(3);
994 #if PCRE2_CODE_UNIT_WIDTH != 32
995   C(4); C(5); C(6); C(7);
996 # if PCRE2_CODE_UNIT_WIDTH != 16
997   C(8); C(9); C(10); C(11); C(12); C(13); C(14); C(15);
998 # endif
999 #endif
1000   default:
1001     /* Based on the ASSERT(0 < n && n < VECTOR_FACTOR) above, this won't
1002        happen. The return is still here for compilers to not warn. */
1003     return a;
1004   }
1005 }
1006 
1007 #define FFCPS
1008 #define FFCPS_DIFF1
1009 #define FFCPS_CHAR1A2A
1010 
1011 #define FFCPS_0
1012 #include "pcre2_jit_neon_inc.h"
1013 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1014 # define FF_UTF
1015 # include "pcre2_jit_neon_inc.h"
1016 # undef FF_UTF
1017 #endif
1018 #undef FFCPS_0
1019 
1020 #undef FFCPS_CHAR1A2A
1021 
1022 #define FFCPS_1
1023 #include "pcre2_jit_neon_inc.h"
1024 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1025 # define FF_UTF
1026 # include "pcre2_jit_neon_inc.h"
1027 # undef FF_UTF
1028 #endif
1029 #undef FFCPS_1
1030 
1031 #undef FFCPS_DIFF1
1032 
1033 #define FFCPS_DEFAULT
1034 #include "pcre2_jit_neon_inc.h"
1035 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1036 # define FF_UTF
1037 # include "pcre2_jit_neon_inc.h"
1038 # undef FF_UTF
1039 #endif
1040 #undef FFCPS
1041 
1042 #define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD 1
1043 
fast_forward_char_pair_simd(compiler_common * common,sljit_s32 offs1,PCRE2_UCHAR char1a,PCRE2_UCHAR char1b,sljit_s32 offs2,PCRE2_UCHAR char2a,PCRE2_UCHAR char2b)1044 static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1,
1045   PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
1046 {
1047 DEFINE_COMPILER;
1048 sljit_u32 diff = IN_UCHARS(offs1 - offs2);
1049 struct sljit_jump *partial_quit;
1050 int_char ic;
1051 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
1052 SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_offset()));
1053 SLJIT_ASSERT(compiler->scratches == 5);
1054 
1055 /* Save temporary register STR_PTR. */
1056 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
1057 
1058 /* Prepare arguments for the function call. */
1059 if (common->match_end_ptr == 0)
1060    OP1(SLJIT_MOV, SLJIT_R0, 0, STR_END, 0);
1061 else
1062   {
1063   OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
1064   OP2(SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
1065 
1066   OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, STR_END, 0, SLJIT_R0, 0);
1067   CMOV(SLJIT_LESS, SLJIT_R0, STR_END, 0);
1068   }
1069 
1070 OP1(SLJIT_MOV, SLJIT_R1, 0, STR_PTR, 0);
1071 OP1(SLJIT_MOV_S32, SLJIT_R2, 0, SLJIT_IMM, offs1);
1072 OP1(SLJIT_MOV_S32, SLJIT_R3, 0, SLJIT_IMM, offs2);
1073 ic.c.c1 = char1a;
1074 ic.c.c2 = char1b;
1075 ic.c.c3 = char2a;
1076 ic.c.c4 = char2b;
1077 OP1(SLJIT_MOV_U32, SLJIT_R4, 0, SLJIT_IMM, ic.x);
1078 
1079 if (diff == 1) {
1080   if (char1a == char1b && char2a == char2b) {
1081 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1082     if (common->utf)
1083       sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW),
1084                        SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_0_utf));
1085     else
1086 #endif
1087       sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW),
1088                        SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_0));
1089   } else {
1090 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1091     if (common->utf)
1092       sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW),
1093                        SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_1_utf));
1094     else
1095 #endif
1096       sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW),
1097                        SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_1));
1098   }
1099 } else {
1100 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1101   if (common->utf)
1102     sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW),
1103                      SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_default_utf));
1104   else
1105 #endif
1106     sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW),
1107                      SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_default));
1108 }
1109 
1110 /* Restore STR_PTR register. */
1111 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
1112 
1113 /* Check return value. */
1114 partial_quit = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
1115 add_jump(compiler, &common->failed_match, partial_quit);
1116 
1117 /* Fast forward STR_PTR to the result of memchr. */
1118 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
1119 
1120 JUMPHERE(partial_quit);
1121 }
1122 
1123 #endif /* SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 */
1124