1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 This module by Zoltan Herczeg
10 Original API code Copyright (c) 1997-2012 University of Cambridge
11 New API code Copyright (c) 2016-2021 University of Cambridge
12
13 -----------------------------------------------------------------------------
14 Redistribution and use in source and binary forms, with or without
15 modification, are permitted provided that the following conditions are met:
16
17 * Redistributions of source code must retain the above copyright notice,
18 this list of conditions and the following disclaimer.
19
20 * Redistributions in binary form must reproduce the above copyright
21 notice, this list of conditions and the following disclaimer in the
22 documentation and/or other materials provided with the distribution.
23
24 * Neither the name of the University of Cambridge nor the names of its
25 contributors may be used to endorse or promote products derived from
26 this software without specific prior written permission.
27
28 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 POSSIBILITY OF SUCH DAMAGE.
39 -----------------------------------------------------------------------------
40 */
41
42 #ifdef HAVE_CONFIG_H
43 #include "config.h"
44 #endif
45
46 #include "pcre2_internal.h"
47
48 #ifdef SUPPORT_JIT
49
50 /* All-in-one: Since we use the JIT compiler only from here,
51 we just include it. This way we don't need to touch the build
52 system files. */
53
54 #define SLJIT_CONFIG_AUTO 1
55 #define SLJIT_CONFIG_STATIC 1
56 #define SLJIT_VERBOSE 0
57
58 #ifdef PCRE2_DEBUG
59 #define SLJIT_DEBUG 1
60 #else
61 #define SLJIT_DEBUG 0
62 #endif
63
64 #define SLJIT_MALLOC(size, allocator_data) pcre2_jit_malloc(size, allocator_data)
65 #define SLJIT_FREE(ptr, allocator_data) pcre2_jit_free(ptr, allocator_data)
66
pcre2_jit_malloc(size_t size,void * allocator_data)67 static void * pcre2_jit_malloc(size_t size, void *allocator_data)
68 {
69 pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
70 return allocator->malloc(size, allocator->memory_data);
71 }
72
pcre2_jit_free(void * ptr,void * allocator_data)73 static void pcre2_jit_free(void *ptr, void *allocator_data)
74 {
75 pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
76 allocator->free(ptr, allocator->memory_data);
77 }
78
79 #include "sljit/sljitLir.c"
80
81 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
82 #error Unsupported architecture
83 #endif
84
85 /* Defines for debugging purposes. */
86
87 /* 1 - Use unoptimized capturing brackets.
88 2 - Enable capture_last_ptr (includes option 1). */
89 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
90
91 /* 1 - Always have a control head. */
92 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
93
94 /* Allocate memory for the regex stack on the real machine stack.
95 Fast, but limited size. */
96 #define MACHINE_STACK_SIZE 32768
97
98 /* Growth rate for stack allocated by the OS. Should be the multiply
99 of page size. */
100 #define STACK_GROWTH_RATE 8192
101
102 /* Enable to check that the allocation could destroy temporaries. */
103 #if defined SLJIT_DEBUG && SLJIT_DEBUG
104 #define DESTROY_REGISTERS 1
105 #endif
106
107 /*
108 Short summary about the backtracking mechanism empolyed by the jit code generator:
109
110 The code generator follows the recursive nature of the PERL compatible regular
111 expressions. The basic blocks of regular expressions are condition checkers
112 whose execute different commands depending on the result of the condition check.
113 The relationship between the operators can be horizontal (concatenation) and
114 vertical (sub-expression) (See struct backtrack_common for more details).
115
116 'ab' - 'a' and 'b' regexps are concatenated
117 'a+' - 'a' is the sub-expression of the '+' operator
118
119 The condition checkers are boolean (true/false) checkers. Machine code is generated
120 for the checker itself and for the actions depending on the result of the checker.
121 The 'true' case is called as the matching path (expected path), and the other is called as
122 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
123 branches on the matching path.
124
125 Greedy star operator (*) :
126 Matching path: match happens.
127 Backtrack path: match failed.
128 Non-greedy star operator (*?) :
129 Matching path: no need to perform a match.
130 Backtrack path: match is required.
131
132 The following example shows how the code generated for a capturing bracket
133 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
134 we have the following regular expression:
135
136 A(B|C)D
137
138 The generated code will be the following:
139
140 A matching path
141 '(' matching path (pushing arguments to the stack)
142 B matching path
143 ')' matching path (pushing arguments to the stack)
144 D matching path
145 return with successful match
146
147 D backtrack path
148 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
149 B backtrack path
150 C expected path
151 jump to D matching path
152 C backtrack path
153 A backtrack path
154
155 Notice, that the order of backtrack code paths are the opposite of the fast
156 code paths. In this way the topmost value on the stack is always belong
157 to the current backtrack code path. The backtrack path must check
158 whether there is a next alternative. If so, it needs to jump back to
159 the matching path eventually. Otherwise it needs to clear out its own stack
160 frame and continue the execution on the backtrack code paths.
161 */
162
163 /*
164 Saved stack frames:
165
166 Atomic blocks and asserts require reloading the values of private data
167 when the backtrack mechanism performed. Because of OP_RECURSE, the data
168 are not necessarly known in compile time, thus we need a dynamic restore
169 mechanism.
170
171 The stack frames are stored in a chain list, and have the following format:
172 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
173
174 Thus we can restore the private data to a particular point in the stack.
175 */
176
177 typedef struct jit_arguments {
178 /* Pointers first. */
179 struct sljit_stack *stack;
180 PCRE2_SPTR str;
181 PCRE2_SPTR begin;
182 PCRE2_SPTR end;
183 pcre2_match_data *match_data;
184 PCRE2_SPTR startchar_ptr;
185 PCRE2_UCHAR *mark_ptr;
186 int (*callout)(pcre2_callout_block *, void *);
187 void *callout_data;
188 /* Everything else after. */
189 sljit_uw offset_limit;
190 sljit_u32 limit_match;
191 sljit_u32 oveccount;
192 sljit_u32 options;
193 } jit_arguments;
194
195 #define JIT_NUMBER_OF_COMPILE_MODES 3
196
197 typedef struct executable_functions {
198 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
199 void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
200 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
201 sljit_u32 top_bracket;
202 sljit_u32 limit_match;
203 } executable_functions;
204
205 typedef struct jump_list {
206 struct sljit_jump *jump;
207 struct jump_list *next;
208 } jump_list;
209
210 typedef struct stub_list {
211 struct sljit_jump *start;
212 struct sljit_label *quit;
213 struct stub_list *next;
214 } stub_list;
215
216 enum frame_types {
217 no_frame = -1,
218 no_stack = -2
219 };
220
221 enum control_types {
222 type_mark = 0,
223 type_then_trap = 1
224 };
225
226 enum early_fail_types {
227 type_skip = 0,
228 type_fail = 1,
229 type_fail_range = 2
230 };
231
232 typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
233
234 /* The following structure is the key data type for the recursive
235 code generator. It is allocated by compile_matchingpath, and contains
236 the arguments for compile_backtrackingpath. Must be the first member
237 of its descendants. */
238 typedef struct backtrack_common {
239 /* Concatenation stack. */
240 struct backtrack_common *prev;
241 jump_list *nextbacktracks;
242 /* Internal stack (for component operators). */
243 struct backtrack_common *top;
244 jump_list *topbacktracks;
245 /* Opcode pointer. */
246 PCRE2_SPTR cc;
247 } backtrack_common;
248
249 typedef struct assert_backtrack {
250 backtrack_common common;
251 jump_list *condfailed;
252 /* Less than 0 if a frame is not needed. */
253 int framesize;
254 /* Points to our private memory word on the stack. */
255 int private_data_ptr;
256 /* For iterators. */
257 struct sljit_label *matchingpath;
258 } assert_backtrack;
259
260 typedef struct bracket_backtrack {
261 backtrack_common common;
262 /* Where to coninue if an alternative is successfully matched. */
263 struct sljit_label *alternative_matchingpath;
264 /* For rmin and rmax iterators. */
265 struct sljit_label *recursive_matchingpath;
266 /* For greedy ? operator. */
267 struct sljit_label *zero_matchingpath;
268 /* Contains the branches of a failed condition. */
269 union {
270 /* Both for OP_COND, OP_SCOND. */
271 jump_list *condfailed;
272 assert_backtrack *assert;
273 /* For OP_ONCE. Less than 0 if not needed. */
274 int framesize;
275 /* For brackets with >3 alternatives. */
276 struct sljit_put_label *matching_put_label;
277 } u;
278 /* Points to our private memory word on the stack. */
279 int private_data_ptr;
280 } bracket_backtrack;
281
282 typedef struct bracketpos_backtrack {
283 backtrack_common common;
284 /* Points to our private memory word on the stack. */
285 int private_data_ptr;
286 /* Reverting stack is needed. */
287 int framesize;
288 /* Allocated stack size. */
289 int stacksize;
290 } bracketpos_backtrack;
291
292 typedef struct braminzero_backtrack {
293 backtrack_common common;
294 struct sljit_label *matchingpath;
295 } braminzero_backtrack;
296
297 typedef struct char_iterator_backtrack {
298 backtrack_common common;
299 /* Next iteration. */
300 struct sljit_label *matchingpath;
301 union {
302 jump_list *backtracks;
303 struct {
304 unsigned int othercasebit;
305 PCRE2_UCHAR chr;
306 BOOL enabled;
307 } charpos;
308 } u;
309 } char_iterator_backtrack;
310
311 typedef struct ref_iterator_backtrack {
312 backtrack_common common;
313 /* Next iteration. */
314 struct sljit_label *matchingpath;
315 } ref_iterator_backtrack;
316
317 typedef struct recurse_entry {
318 struct recurse_entry *next;
319 /* Contains the function entry label. */
320 struct sljit_label *entry_label;
321 /* Contains the function entry label. */
322 struct sljit_label *backtrack_label;
323 /* Collects the entry calls until the function is not created. */
324 jump_list *entry_calls;
325 /* Collects the backtrack calls until the function is not created. */
326 jump_list *backtrack_calls;
327 /* Points to the starting opcode. */
328 sljit_sw start;
329 } recurse_entry;
330
331 typedef struct recurse_backtrack {
332 backtrack_common common;
333 /* Return to the matching path. */
334 struct sljit_label *matchingpath;
335 /* Recursive pattern. */
336 recurse_entry *entry;
337 /* Pattern is inlined. */
338 BOOL inlined_pattern;
339 } recurse_backtrack;
340
341 #define OP_THEN_TRAP OP_TABLE_LENGTH
342
343 typedef struct then_trap_backtrack {
344 backtrack_common common;
345 /* If then_trap is not NULL, this structure contains the real
346 then_trap for the backtracking path. */
347 struct then_trap_backtrack *then_trap;
348 /* Points to the starting opcode. */
349 sljit_sw start;
350 /* Exit point for the then opcodes of this alternative. */
351 jump_list *quit;
352 /* Frame size of the current alternative. */
353 int framesize;
354 } then_trap_backtrack;
355
356 #define MAX_N_CHARS 12
357 #define MAX_DIFF_CHARS 5
358
359 typedef struct fast_forward_char_data {
360 /* Number of characters in the chars array, 255 for any character. */
361 sljit_u8 count;
362 /* Number of last UTF-8 characters in the chars array. */
363 sljit_u8 last_count;
364 /* Available characters in the current position. */
365 PCRE2_UCHAR chars[MAX_DIFF_CHARS];
366 } fast_forward_char_data;
367
368 #define MAX_CLASS_RANGE_SIZE 4
369 #define MAX_CLASS_CHARS_SIZE 3
370
371 typedef struct compiler_common {
372 /* The sljit ceneric compiler. */
373 struct sljit_compiler *compiler;
374 /* Compiled regular expression. */
375 pcre2_real_code *re;
376 /* First byte code. */
377 PCRE2_SPTR start;
378 /* Maps private data offset to each opcode. */
379 sljit_s32 *private_data_ptrs;
380 /* Chain list of read-only data ptrs. */
381 void *read_only_data_head;
382 /* Tells whether the capturing bracket is optimized. */
383 sljit_u8 *optimized_cbracket;
384 /* Tells whether the starting offset is a target of then. */
385 sljit_u8 *then_offsets;
386 /* Current position where a THEN must jump. */
387 then_trap_backtrack *then_trap;
388 /* Starting offset of private data for capturing brackets. */
389 sljit_s32 cbra_ptr;
390 /* Output vector starting point. Must be divisible by 2. */
391 sljit_s32 ovector_start;
392 /* Points to the starting character of the current match. */
393 sljit_s32 start_ptr;
394 /* Last known position of the requested byte. */
395 sljit_s32 req_char_ptr;
396 /* Head of the last recursion. */
397 sljit_s32 recursive_head_ptr;
398 /* First inspected character for partial matching.
399 (Needed for avoiding zero length partial matches.) */
400 sljit_s32 start_used_ptr;
401 /* Starting pointer for partial soft matches. */
402 sljit_s32 hit_start;
403 /* Pointer of the match end position. */
404 sljit_s32 match_end_ptr;
405 /* Points to the marked string. */
406 sljit_s32 mark_ptr;
407 /* Recursive control verb management chain. */
408 sljit_s32 control_head_ptr;
409 /* Points to the last matched capture block index. */
410 sljit_s32 capture_last_ptr;
411 /* Fast forward skipping byte code pointer. */
412 PCRE2_SPTR fast_forward_bc_ptr;
413 /* Locals used by fast fail optimization. */
414 sljit_s32 early_fail_start_ptr;
415 sljit_s32 early_fail_end_ptr;
416 /* Variables used by recursive call generator. */
417 sljit_s32 recurse_bitset_size;
418 uint8_t *recurse_bitset;
419
420 /* Flipped and lower case tables. */
421 const sljit_u8 *fcc;
422 sljit_sw lcc;
423 /* Mode can be PCRE2_JIT_COMPLETE and others. */
424 int mode;
425 /* TRUE, when empty match is accepted for partial matching. */
426 BOOL allow_empty_partial;
427 /* TRUE, when minlength is greater than 0. */
428 BOOL might_be_empty;
429 /* \K is found in the pattern. */
430 BOOL has_set_som;
431 /* (*SKIP:arg) is found in the pattern. */
432 BOOL has_skip_arg;
433 /* (*THEN) is found in the pattern. */
434 BOOL has_then;
435 /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
436 BOOL has_skip_in_assert_back;
437 /* Quit is redirected by recurse, negative assertion, or positive assertion in conditional block. */
438 BOOL local_quit_available;
439 /* Currently in a positive assertion. */
440 BOOL in_positive_assertion;
441 /* Newline control. */
442 int nltype;
443 sljit_u32 nlmax;
444 sljit_u32 nlmin;
445 int newline;
446 int bsr_nltype;
447 sljit_u32 bsr_nlmax;
448 sljit_u32 bsr_nlmin;
449 /* Dollar endonly. */
450 int endonly;
451 /* Tables. */
452 sljit_sw ctypes;
453 /* Named capturing brackets. */
454 PCRE2_SPTR name_table;
455 sljit_sw name_count;
456 sljit_sw name_entry_size;
457
458 /* Labels and jump lists. */
459 struct sljit_label *partialmatchlabel;
460 struct sljit_label *quit_label;
461 struct sljit_label *abort_label;
462 struct sljit_label *accept_label;
463 struct sljit_label *ff_newline_shortcut;
464 stub_list *stubs;
465 recurse_entry *entries;
466 recurse_entry *currententry;
467 jump_list *partialmatch;
468 jump_list *quit;
469 jump_list *positive_assertion_quit;
470 jump_list *abort;
471 jump_list *failed_match;
472 jump_list *accept;
473 jump_list *calllimit;
474 jump_list *stackalloc;
475 jump_list *revertframes;
476 jump_list *wordboundary;
477 jump_list *anynewline;
478 jump_list *hspace;
479 jump_list *vspace;
480 jump_list *casefulcmp;
481 jump_list *caselesscmp;
482 jump_list *reset_match;
483 BOOL unset_backref;
484 BOOL alt_circumflex;
485 #ifdef SUPPORT_UNICODE
486 BOOL utf;
487 BOOL invalid_utf;
488 BOOL ucp;
489 /* Points to saving area for iref. */
490 sljit_s32 iref_ptr;
491 jump_list *getucd;
492 jump_list *getucdtype;
493 #if PCRE2_CODE_UNIT_WIDTH == 8
494 jump_list *utfreadchar;
495 jump_list *utfreadtype8;
496 jump_list *utfpeakcharback;
497 #endif
498 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
499 jump_list *utfreadchar_invalid;
500 jump_list *utfreadnewline_invalid;
501 jump_list *utfmoveback_invalid;
502 jump_list *utfpeakcharback_invalid;
503 #endif
504 #endif /* SUPPORT_UNICODE */
505 } compiler_common;
506
507 /* For byte_sequence_compare. */
508
509 typedef struct compare_context {
510 int length;
511 int sourcereg;
512 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
513 int ucharptr;
514 union {
515 sljit_s32 asint;
516 sljit_u16 asushort;
517 #if PCRE2_CODE_UNIT_WIDTH == 8
518 sljit_u8 asbyte;
519 sljit_u8 asuchars[4];
520 #elif PCRE2_CODE_UNIT_WIDTH == 16
521 sljit_u16 asuchars[2];
522 #elif PCRE2_CODE_UNIT_WIDTH == 32
523 sljit_u32 asuchars[1];
524 #endif
525 } c;
526 union {
527 sljit_s32 asint;
528 sljit_u16 asushort;
529 #if PCRE2_CODE_UNIT_WIDTH == 8
530 sljit_u8 asbyte;
531 sljit_u8 asuchars[4];
532 #elif PCRE2_CODE_UNIT_WIDTH == 16
533 sljit_u16 asuchars[2];
534 #elif PCRE2_CODE_UNIT_WIDTH == 32
535 sljit_u32 asuchars[1];
536 #endif
537 } oc;
538 #endif
539 } compare_context;
540
541 /* Undefine sljit macros. */
542 #undef CMP
543
544 /* Used for accessing the elements of the stack. */
545 #define STACK(i) ((i) * (int)sizeof(sljit_sw))
546
547 #ifdef SLJIT_PREF_SHIFT_REG
548 #if SLJIT_PREF_SHIFT_REG == SLJIT_R2
549 /* Nothing. */
550 #elif SLJIT_PREF_SHIFT_REG == SLJIT_R3
551 #define SHIFT_REG_IS_R3
552 #else
553 #error "Unsupported shift register"
554 #endif
555 #endif
556
557 #define TMP1 SLJIT_R0
558 #ifdef SHIFT_REG_IS_R3
559 #define TMP2 SLJIT_R3
560 #define TMP3 SLJIT_R2
561 #else
562 #define TMP2 SLJIT_R2
563 #define TMP3 SLJIT_R3
564 #endif
565 #define STR_PTR SLJIT_R1
566 #define STR_END SLJIT_S0
567 #define STACK_TOP SLJIT_S1
568 #define STACK_LIMIT SLJIT_S2
569 #define COUNT_MATCH SLJIT_S3
570 #define ARGUMENTS SLJIT_S4
571 #define RETURN_ADDR SLJIT_R4
572
573 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
574 #define HAS_VIRTUAL_REGISTERS 1
575 #else
576 #define HAS_VIRTUAL_REGISTERS 0
577 #endif
578
579 /* Local space layout. */
580 /* These two locals can be used by the current opcode. */
581 #define LOCALS0 (0 * sizeof(sljit_sw))
582 #define LOCALS1 (1 * sizeof(sljit_sw))
583 /* Two local variables for possessive quantifiers (char1 cannot use them). */
584 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
585 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
586 /* Max limit of recursions. */
587 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
588 /* The output vector is stored on the stack, and contains pointers
589 to characters. The vector data is divided into two groups: the first
590 group contains the start / end character pointers, and the second is
591 the start pointers when the end of the capturing group has not yet reached. */
592 #define OVECTOR_START (common->ovector_start)
593 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
594 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
595 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
596
597 #if PCRE2_CODE_UNIT_WIDTH == 8
598 #define MOV_UCHAR SLJIT_MOV_U8
599 #define IN_UCHARS(x) (x)
600 #elif PCRE2_CODE_UNIT_WIDTH == 16
601 #define MOV_UCHAR SLJIT_MOV_U16
602 #define UCHAR_SHIFT (1)
603 #define IN_UCHARS(x) ((x) * 2)
604 #elif PCRE2_CODE_UNIT_WIDTH == 32
605 #define MOV_UCHAR SLJIT_MOV_U32
606 #define UCHAR_SHIFT (2)
607 #define IN_UCHARS(x) ((x) * 4)
608 #else
609 #error Unsupported compiling mode
610 #endif
611
612 /* Shortcuts. */
613 #define DEFINE_COMPILER \
614 struct sljit_compiler *compiler = common->compiler
615 #define OP1(op, dst, dstw, src, srcw) \
616 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
617 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
618 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
619 #define OP2U(op, src1, src1w, src2, src2w) \
620 sljit_emit_op2u(compiler, (op), (src1), (src1w), (src2), (src2w))
621 #define OP_SRC(op, src, srcw) \
622 sljit_emit_op_src(compiler, (op), (src), (srcw))
623 #define LABEL() \
624 sljit_emit_label(compiler)
625 #define JUMP(type) \
626 sljit_emit_jump(compiler, (type))
627 #define JUMPTO(type, label) \
628 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
629 #define JUMPHERE(jump) \
630 sljit_set_label((jump), sljit_emit_label(compiler))
631 #define SET_LABEL(jump, label) \
632 sljit_set_label((jump), (label))
633 #define CMP(type, src1, src1w, src2, src2w) \
634 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
635 #define CMPTO(type, src1, src1w, src2, src2w, label) \
636 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
637 #define OP_FLAGS(op, dst, dstw, type) \
638 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
639 #define CMOV(type, dst_reg, src, srcw) \
640 sljit_emit_cmov(compiler, (type), (dst_reg), (src), (srcw))
641 #define GET_LOCAL_BASE(dst, dstw, offset) \
642 sljit_get_local_base(compiler, (dst), (dstw), (offset))
643
644 #define READ_CHAR_MAX 0x7fffffff
645
646 #define INVALID_UTF_CHAR -1
647 #define UNASSIGNED_UTF_CHAR 888
648
649 #if defined SUPPORT_UNICODE
650 #if PCRE2_CODE_UNIT_WIDTH == 8
651
652 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
653 { \
654 if (ptr[0] <= 0x7f) \
655 c = *ptr++; \
656 else if (ptr + 1 < end && ptr[1] >= 0x80 && ptr[1] < 0xc0) \
657 { \
658 c = ptr[1] - 0x80; \
659 \
660 if (ptr[0] >= 0xc2 && ptr[0] <= 0xdf) \
661 { \
662 c |= (ptr[0] - 0xc0) << 6; \
663 ptr += 2; \
664 } \
665 else if (ptr + 2 < end && ptr[2] >= 0x80 && ptr[2] < 0xc0) \
666 { \
667 c = c << 6 | (ptr[2] - 0x80); \
668 \
669 if (ptr[0] >= 0xe0 && ptr[0] <= 0xef) \
670 { \
671 c |= (ptr[0] - 0xe0) << 12; \
672 ptr += 3; \
673 \
674 if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
675 { \
676 invalid_action; \
677 } \
678 } \
679 else if (ptr + 3 < end && ptr[3] >= 0x80 && ptr[3] < 0xc0) \
680 { \
681 c = c << 6 | (ptr[3] - 0x80); \
682 \
683 if (ptr[0] >= 0xf0 && ptr[0] <= 0xf4) \
684 { \
685 c |= (ptr[0] - 0xf0) << 18; \
686 ptr += 4; \
687 \
688 if (c >= 0x110000 || c < 0x10000) \
689 { \
690 invalid_action; \
691 } \
692 } \
693 else \
694 { \
695 invalid_action; \
696 } \
697 } \
698 else \
699 { \
700 invalid_action; \
701 } \
702 } \
703 else \
704 { \
705 invalid_action; \
706 } \
707 } \
708 else \
709 { \
710 invalid_action; \
711 } \
712 }
713
714 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
715 { \
716 c = ptr[-1]; \
717 if (c <= 0x7f) \
718 ptr--; \
719 else if (ptr - 1 > start && ptr[-1] >= 0x80 && ptr[-1] < 0xc0) \
720 { \
721 c -= 0x80; \
722 \
723 if (ptr[-2] >= 0xc2 && ptr[-2] <= 0xdf) \
724 { \
725 c |= (ptr[-2] - 0xc0) << 6; \
726 ptr -= 2; \
727 } \
728 else if (ptr - 2 > start && ptr[-2] >= 0x80 && ptr[-2] < 0xc0) \
729 { \
730 c = c << 6 | (ptr[-2] - 0x80); \
731 \
732 if (ptr[-3] >= 0xe0 && ptr[-3] <= 0xef) \
733 { \
734 c |= (ptr[-3] - 0xe0) << 12; \
735 ptr -= 3; \
736 \
737 if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
738 { \
739 invalid_action; \
740 } \
741 } \
742 else if (ptr - 3 > start && ptr[-3] >= 0x80 && ptr[-3] < 0xc0) \
743 { \
744 c = c << 6 | (ptr[-3] - 0x80); \
745 \
746 if (ptr[-4] >= 0xf0 && ptr[-4] <= 0xf4) \
747 { \
748 c |= (ptr[-4] - 0xf0) << 18; \
749 ptr -= 4; \
750 \
751 if (c >= 0x110000 || c < 0x10000) \
752 { \
753 invalid_action; \
754 } \
755 } \
756 else \
757 { \
758 invalid_action; \
759 } \
760 } \
761 else \
762 { \
763 invalid_action; \
764 } \
765 } \
766 else \
767 { \
768 invalid_action; \
769 } \
770 } \
771 else \
772 { \
773 invalid_action; \
774 } \
775 }
776
777 #elif PCRE2_CODE_UNIT_WIDTH == 16
778
779 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
780 { \
781 if (ptr[0] < 0xd800 || ptr[0] >= 0xe000) \
782 c = *ptr++; \
783 else if (ptr[0] < 0xdc00 && ptr + 1 < end && ptr[1] >= 0xdc00 && ptr[1] < 0xe000) \
784 { \
785 c = (((ptr[0] - 0xd800) << 10) | (ptr[1] - 0xdc00)) + 0x10000; \
786 ptr += 2; \
787 } \
788 else \
789 { \
790 invalid_action; \
791 } \
792 }
793
794 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
795 { \
796 c = ptr[-1]; \
797 if (c < 0xd800 || c >= 0xe000) \
798 ptr--; \
799 else if (c >= 0xdc00 && ptr - 1 > start && ptr[-2] >= 0xd800 && ptr[-2] < 0xdc00) \
800 { \
801 c = (((ptr[-2] - 0xd800) << 10) | (c - 0xdc00)) + 0x10000; \
802 ptr -= 2; \
803 } \
804 else \
805 { \
806 invalid_action; \
807 } \
808 }
809
810
811 #elif PCRE2_CODE_UNIT_WIDTH == 32
812
813 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
814 { \
815 if (ptr[0] < 0xd800 || (ptr[0] >= 0xe000 && ptr[0] < 0x110000)) \
816 c = *ptr++; \
817 else \
818 { \
819 invalid_action; \
820 } \
821 }
822
823 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
824 { \
825 c = ptr[-1]; \
826 if (ptr[-1] < 0xd800 || (ptr[-1] >= 0xe000 && ptr[-1] < 0x110000)) \
827 ptr--; \
828 else \
829 { \
830 invalid_action; \
831 } \
832 }
833
834 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
835 #endif /* SUPPORT_UNICODE */
836
bracketend(PCRE2_SPTR cc)837 static PCRE2_SPTR bracketend(PCRE2_SPTR cc)
838 {
839 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
840 do cc += GET(cc, 1); while (*cc == OP_ALT);
841 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
842 cc += 1 + LINK_SIZE;
843 return cc;
844 }
845
no_alternatives(PCRE2_SPTR cc)846 static int no_alternatives(PCRE2_SPTR cc)
847 {
848 int count = 0;
849 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
850 do
851 {
852 cc += GET(cc, 1);
853 count++;
854 }
855 while (*cc == OP_ALT);
856 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
857 return count;
858 }
859
860 /* Functions whose might need modification for all new supported opcodes:
861 next_opcode
862 check_opcode_types
863 set_private_data_ptrs
864 get_framesize
865 init_frame
866 get_recurse_data_length
867 copy_recurse_data
868 compile_matchingpath
869 compile_backtrackingpath
870 */
871
next_opcode(compiler_common * common,PCRE2_SPTR cc)872 static PCRE2_SPTR next_opcode(compiler_common *common, PCRE2_SPTR cc)
873 {
874 SLJIT_UNUSED_ARG(common);
875 switch(*cc)
876 {
877 case OP_SOD:
878 case OP_SOM:
879 case OP_SET_SOM:
880 case OP_NOT_WORD_BOUNDARY:
881 case OP_WORD_BOUNDARY:
882 case OP_NOT_DIGIT:
883 case OP_DIGIT:
884 case OP_NOT_WHITESPACE:
885 case OP_WHITESPACE:
886 case OP_NOT_WORDCHAR:
887 case OP_WORDCHAR:
888 case OP_ANY:
889 case OP_ALLANY:
890 case OP_NOTPROP:
891 case OP_PROP:
892 case OP_ANYNL:
893 case OP_NOT_HSPACE:
894 case OP_HSPACE:
895 case OP_NOT_VSPACE:
896 case OP_VSPACE:
897 case OP_EXTUNI:
898 case OP_EODN:
899 case OP_EOD:
900 case OP_CIRC:
901 case OP_CIRCM:
902 case OP_DOLL:
903 case OP_DOLLM:
904 case OP_CRSTAR:
905 case OP_CRMINSTAR:
906 case OP_CRPLUS:
907 case OP_CRMINPLUS:
908 case OP_CRQUERY:
909 case OP_CRMINQUERY:
910 case OP_CRRANGE:
911 case OP_CRMINRANGE:
912 case OP_CRPOSSTAR:
913 case OP_CRPOSPLUS:
914 case OP_CRPOSQUERY:
915 case OP_CRPOSRANGE:
916 case OP_CLASS:
917 case OP_NCLASS:
918 case OP_REF:
919 case OP_REFI:
920 case OP_DNREF:
921 case OP_DNREFI:
922 case OP_RECURSE:
923 case OP_CALLOUT:
924 case OP_ALT:
925 case OP_KET:
926 case OP_KETRMAX:
927 case OP_KETRMIN:
928 case OP_KETRPOS:
929 case OP_REVERSE:
930 case OP_ASSERT:
931 case OP_ASSERT_NOT:
932 case OP_ASSERTBACK:
933 case OP_ASSERTBACK_NOT:
934 case OP_ASSERT_NA:
935 case OP_ASSERTBACK_NA:
936 case OP_ONCE:
937 case OP_SCRIPT_RUN:
938 case OP_BRA:
939 case OP_BRAPOS:
940 case OP_CBRA:
941 case OP_CBRAPOS:
942 case OP_COND:
943 case OP_SBRA:
944 case OP_SBRAPOS:
945 case OP_SCBRA:
946 case OP_SCBRAPOS:
947 case OP_SCOND:
948 case OP_CREF:
949 case OP_DNCREF:
950 case OP_RREF:
951 case OP_DNRREF:
952 case OP_FALSE:
953 case OP_TRUE:
954 case OP_BRAZERO:
955 case OP_BRAMINZERO:
956 case OP_BRAPOSZERO:
957 case OP_PRUNE:
958 case OP_SKIP:
959 case OP_THEN:
960 case OP_COMMIT:
961 case OP_FAIL:
962 case OP_ACCEPT:
963 case OP_ASSERT_ACCEPT:
964 case OP_CLOSE:
965 case OP_SKIPZERO:
966 return cc + PRIV(OP_lengths)[*cc];
967
968 case OP_CHAR:
969 case OP_CHARI:
970 case OP_NOT:
971 case OP_NOTI:
972 case OP_STAR:
973 case OP_MINSTAR:
974 case OP_PLUS:
975 case OP_MINPLUS:
976 case OP_QUERY:
977 case OP_MINQUERY:
978 case OP_UPTO:
979 case OP_MINUPTO:
980 case OP_EXACT:
981 case OP_POSSTAR:
982 case OP_POSPLUS:
983 case OP_POSQUERY:
984 case OP_POSUPTO:
985 case OP_STARI:
986 case OP_MINSTARI:
987 case OP_PLUSI:
988 case OP_MINPLUSI:
989 case OP_QUERYI:
990 case OP_MINQUERYI:
991 case OP_UPTOI:
992 case OP_MINUPTOI:
993 case OP_EXACTI:
994 case OP_POSSTARI:
995 case OP_POSPLUSI:
996 case OP_POSQUERYI:
997 case OP_POSUPTOI:
998 case OP_NOTSTAR:
999 case OP_NOTMINSTAR:
1000 case OP_NOTPLUS:
1001 case OP_NOTMINPLUS:
1002 case OP_NOTQUERY:
1003 case OP_NOTMINQUERY:
1004 case OP_NOTUPTO:
1005 case OP_NOTMINUPTO:
1006 case OP_NOTEXACT:
1007 case OP_NOTPOSSTAR:
1008 case OP_NOTPOSPLUS:
1009 case OP_NOTPOSQUERY:
1010 case OP_NOTPOSUPTO:
1011 case OP_NOTSTARI:
1012 case OP_NOTMINSTARI:
1013 case OP_NOTPLUSI:
1014 case OP_NOTMINPLUSI:
1015 case OP_NOTQUERYI:
1016 case OP_NOTMINQUERYI:
1017 case OP_NOTUPTOI:
1018 case OP_NOTMINUPTOI:
1019 case OP_NOTEXACTI:
1020 case OP_NOTPOSSTARI:
1021 case OP_NOTPOSPLUSI:
1022 case OP_NOTPOSQUERYI:
1023 case OP_NOTPOSUPTOI:
1024 cc += PRIV(OP_lengths)[*cc];
1025 #ifdef SUPPORT_UNICODE
1026 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1027 #endif
1028 return cc;
1029
1030 /* Special cases. */
1031 case OP_TYPESTAR:
1032 case OP_TYPEMINSTAR:
1033 case OP_TYPEPLUS:
1034 case OP_TYPEMINPLUS:
1035 case OP_TYPEQUERY:
1036 case OP_TYPEMINQUERY:
1037 case OP_TYPEUPTO:
1038 case OP_TYPEMINUPTO:
1039 case OP_TYPEEXACT:
1040 case OP_TYPEPOSSTAR:
1041 case OP_TYPEPOSPLUS:
1042 case OP_TYPEPOSQUERY:
1043 case OP_TYPEPOSUPTO:
1044 return cc + PRIV(OP_lengths)[*cc] - 1;
1045
1046 case OP_ANYBYTE:
1047 #ifdef SUPPORT_UNICODE
1048 if (common->utf) return NULL;
1049 #endif
1050 return cc + 1;
1051
1052 case OP_CALLOUT_STR:
1053 return cc + GET(cc, 1 + 2*LINK_SIZE);
1054
1055 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1056 case OP_XCLASS:
1057 return cc + GET(cc, 1);
1058 #endif
1059
1060 case OP_MARK:
1061 case OP_COMMIT_ARG:
1062 case OP_PRUNE_ARG:
1063 case OP_SKIP_ARG:
1064 case OP_THEN_ARG:
1065 return cc + 1 + 2 + cc[1];
1066
1067 default:
1068 SLJIT_UNREACHABLE();
1069 return NULL;
1070 }
1071 }
1072
check_opcode_types(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend)1073 static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend)
1074 {
1075 int count;
1076 PCRE2_SPTR slot;
1077 PCRE2_SPTR assert_back_end = cc - 1;
1078 PCRE2_SPTR assert_na_end = cc - 1;
1079
1080 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
1081 while (cc < ccend)
1082 {
1083 switch(*cc)
1084 {
1085 case OP_SET_SOM:
1086 common->has_set_som = TRUE;
1087 common->might_be_empty = TRUE;
1088 cc += 1;
1089 break;
1090
1091 case OP_REFI:
1092 #ifdef SUPPORT_UNICODE
1093 if (common->iref_ptr == 0)
1094 {
1095 common->iref_ptr = common->ovector_start;
1096 common->ovector_start += 3 * sizeof(sljit_sw);
1097 }
1098 #endif /* SUPPORT_UNICODE */
1099 /* Fall through. */
1100 case OP_REF:
1101 common->optimized_cbracket[GET2(cc, 1)] = 0;
1102 cc += 1 + IMM2_SIZE;
1103 break;
1104
1105 case OP_ASSERT_NA:
1106 case OP_ASSERTBACK_NA:
1107 slot = bracketend(cc);
1108 if (slot > assert_na_end)
1109 assert_na_end = slot;
1110 cc += 1 + LINK_SIZE;
1111 break;
1112
1113 case OP_CBRAPOS:
1114 case OP_SCBRAPOS:
1115 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
1116 cc += 1 + LINK_SIZE + IMM2_SIZE;
1117 break;
1118
1119 case OP_COND:
1120 case OP_SCOND:
1121 /* Only AUTO_CALLOUT can insert this opcode. We do
1122 not intend to support this case. */
1123 if (cc[1 + LINK_SIZE] == OP_CALLOUT || cc[1 + LINK_SIZE] == OP_CALLOUT_STR)
1124 return FALSE;
1125 cc += 1 + LINK_SIZE;
1126 break;
1127
1128 case OP_CREF:
1129 common->optimized_cbracket[GET2(cc, 1)] = 0;
1130 cc += 1 + IMM2_SIZE;
1131 break;
1132
1133 case OP_DNREF:
1134 case OP_DNREFI:
1135 case OP_DNCREF:
1136 count = GET2(cc, 1 + IMM2_SIZE);
1137 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
1138 while (count-- > 0)
1139 {
1140 common->optimized_cbracket[GET2(slot, 0)] = 0;
1141 slot += common->name_entry_size;
1142 }
1143 cc += 1 + 2 * IMM2_SIZE;
1144 break;
1145
1146 case OP_RECURSE:
1147 /* Set its value only once. */
1148 if (common->recursive_head_ptr == 0)
1149 {
1150 common->recursive_head_ptr = common->ovector_start;
1151 common->ovector_start += sizeof(sljit_sw);
1152 }
1153 cc += 1 + LINK_SIZE;
1154 break;
1155
1156 case OP_CALLOUT:
1157 case OP_CALLOUT_STR:
1158 if (common->capture_last_ptr == 0)
1159 {
1160 common->capture_last_ptr = common->ovector_start;
1161 common->ovector_start += sizeof(sljit_sw);
1162 }
1163 cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE);
1164 break;
1165
1166 case OP_ASSERTBACK:
1167 slot = bracketend(cc);
1168 if (slot > assert_back_end)
1169 assert_back_end = slot;
1170 cc += 1 + LINK_SIZE;
1171 break;
1172
1173 case OP_THEN_ARG:
1174 common->has_then = TRUE;
1175 common->control_head_ptr = 1;
1176 /* Fall through. */
1177
1178 case OP_COMMIT_ARG:
1179 case OP_PRUNE_ARG:
1180 if (cc < assert_na_end)
1181 return FALSE;
1182 /* Fall through */
1183 case OP_MARK:
1184 if (common->mark_ptr == 0)
1185 {
1186 common->mark_ptr = common->ovector_start;
1187 common->ovector_start += sizeof(sljit_sw);
1188 }
1189 cc += 1 + 2 + cc[1];
1190 break;
1191
1192 case OP_THEN:
1193 common->has_then = TRUE;
1194 common->control_head_ptr = 1;
1195 cc += 1;
1196 break;
1197
1198 case OP_SKIP:
1199 if (cc < assert_back_end)
1200 common->has_skip_in_assert_back = TRUE;
1201 if (cc < assert_na_end)
1202 return FALSE;
1203 cc += 1;
1204 break;
1205
1206 case OP_SKIP_ARG:
1207 common->control_head_ptr = 1;
1208 common->has_skip_arg = TRUE;
1209 if (cc < assert_back_end)
1210 common->has_skip_in_assert_back = TRUE;
1211 if (cc < assert_na_end)
1212 return FALSE;
1213 cc += 1 + 2 + cc[1];
1214 break;
1215
1216 case OP_PRUNE:
1217 case OP_COMMIT:
1218 case OP_ASSERT_ACCEPT:
1219 if (cc < assert_na_end)
1220 return FALSE;
1221 cc++;
1222 break;
1223
1224 default:
1225 cc = next_opcode(common, cc);
1226 if (cc == NULL)
1227 return FALSE;
1228 break;
1229 }
1230 }
1231 return TRUE;
1232 }
1233
1234 #define EARLY_FAIL_ENHANCE_MAX (1 + 3)
1235
1236 /*
1237 start:
1238 0 - skip / early fail allowed
1239 1 - only early fail with range allowed
1240 >1 - (start - 1) early fail is processed
1241
1242 return: current number of iterators enhanced with fast fail
1243 */
detect_early_fail(compiler_common * common,PCRE2_SPTR cc,int * private_data_start,sljit_s32 depth,int start,BOOL fast_forward_allowed)1244 static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start,
1245 sljit_s32 depth, int start, BOOL fast_forward_allowed)
1246 {
1247 PCRE2_SPTR begin = cc;
1248 PCRE2_SPTR next_alt;
1249 PCRE2_SPTR end;
1250 PCRE2_SPTR accelerated_start;
1251 BOOL prev_fast_forward_allowed;
1252 int result = 0;
1253 int count;
1254
1255 SLJIT_ASSERT(*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA);
1256 SLJIT_ASSERT(*cc != OP_CBRA || common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] != 0);
1257 SLJIT_ASSERT(start < EARLY_FAIL_ENHANCE_MAX);
1258
1259 next_alt = cc + GET(cc, 1);
1260 if (*next_alt == OP_ALT)
1261 fast_forward_allowed = FALSE;
1262
1263 do
1264 {
1265 count = start;
1266 cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1267
1268 while (TRUE)
1269 {
1270 accelerated_start = NULL;
1271
1272 switch(*cc)
1273 {
1274 case OP_SOD:
1275 case OP_SOM:
1276 case OP_SET_SOM:
1277 case OP_NOT_WORD_BOUNDARY:
1278 case OP_WORD_BOUNDARY:
1279 case OP_EODN:
1280 case OP_EOD:
1281 case OP_CIRC:
1282 case OP_CIRCM:
1283 case OP_DOLL:
1284 case OP_DOLLM:
1285 /* Zero width assertions. */
1286 cc++;
1287 continue;
1288
1289 case OP_NOT_DIGIT:
1290 case OP_DIGIT:
1291 case OP_NOT_WHITESPACE:
1292 case OP_WHITESPACE:
1293 case OP_NOT_WORDCHAR:
1294 case OP_WORDCHAR:
1295 case OP_ANY:
1296 case OP_ALLANY:
1297 case OP_ANYBYTE:
1298 case OP_NOT_HSPACE:
1299 case OP_HSPACE:
1300 case OP_NOT_VSPACE:
1301 case OP_VSPACE:
1302 fast_forward_allowed = FALSE;
1303 cc++;
1304 continue;
1305
1306 case OP_ANYNL:
1307 case OP_EXTUNI:
1308 fast_forward_allowed = FALSE;
1309 if (count == 0)
1310 count = 1;
1311 cc++;
1312 continue;
1313
1314 case OP_NOTPROP:
1315 case OP_PROP:
1316 fast_forward_allowed = FALSE;
1317 cc += 1 + 2;
1318 continue;
1319
1320 case OP_CHAR:
1321 case OP_CHARI:
1322 case OP_NOT:
1323 case OP_NOTI:
1324 fast_forward_allowed = FALSE;
1325 cc += 2;
1326 #ifdef SUPPORT_UNICODE
1327 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1328 #endif
1329 continue;
1330
1331 case OP_TYPESTAR:
1332 case OP_TYPEMINSTAR:
1333 case OP_TYPEPLUS:
1334 case OP_TYPEMINPLUS:
1335 case OP_TYPEPOSSTAR:
1336 case OP_TYPEPOSPLUS:
1337 /* The type or prop opcode is skipped in the next iteration. */
1338 cc += 1;
1339
1340 if (cc[0] != OP_ANYNL && cc[0] != OP_EXTUNI)
1341 {
1342 accelerated_start = cc - 1;
1343 break;
1344 }
1345
1346 if (count == 0)
1347 count = 1;
1348 fast_forward_allowed = FALSE;
1349 continue;
1350
1351 case OP_TYPEUPTO:
1352 case OP_TYPEMINUPTO:
1353 case OP_TYPEEXACT:
1354 case OP_TYPEPOSUPTO:
1355 cc += IMM2_SIZE;
1356 /* Fall through */
1357
1358 case OP_TYPEQUERY:
1359 case OP_TYPEMINQUERY:
1360 case OP_TYPEPOSQUERY:
1361 /* The type or prop opcode is skipped in the next iteration. */
1362 fast_forward_allowed = FALSE;
1363 if (count == 0)
1364 count = 1;
1365 cc += 1;
1366 continue;
1367
1368 case OP_STAR:
1369 case OP_MINSTAR:
1370 case OP_PLUS:
1371 case OP_MINPLUS:
1372 case OP_POSSTAR:
1373 case OP_POSPLUS:
1374
1375 case OP_STARI:
1376 case OP_MINSTARI:
1377 case OP_PLUSI:
1378 case OP_MINPLUSI:
1379 case OP_POSSTARI:
1380 case OP_POSPLUSI:
1381
1382 case OP_NOTSTAR:
1383 case OP_NOTMINSTAR:
1384 case OP_NOTPLUS:
1385 case OP_NOTMINPLUS:
1386 case OP_NOTPOSSTAR:
1387 case OP_NOTPOSPLUS:
1388
1389 case OP_NOTSTARI:
1390 case OP_NOTMINSTARI:
1391 case OP_NOTPLUSI:
1392 case OP_NOTMINPLUSI:
1393 case OP_NOTPOSSTARI:
1394 case OP_NOTPOSPLUSI:
1395 accelerated_start = cc;
1396 cc += 2;
1397 #ifdef SUPPORT_UNICODE
1398 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1399 #endif
1400 break;
1401
1402 case OP_UPTO:
1403 case OP_MINUPTO:
1404 case OP_EXACT:
1405 case OP_POSUPTO:
1406 case OP_UPTOI:
1407 case OP_MINUPTOI:
1408 case OP_EXACTI:
1409 case OP_POSUPTOI:
1410 case OP_NOTUPTO:
1411 case OP_NOTMINUPTO:
1412 case OP_NOTEXACT:
1413 case OP_NOTPOSUPTO:
1414 case OP_NOTUPTOI:
1415 case OP_NOTMINUPTOI:
1416 case OP_NOTEXACTI:
1417 case OP_NOTPOSUPTOI:
1418 cc += IMM2_SIZE;
1419 /* Fall through */
1420
1421 case OP_QUERY:
1422 case OP_MINQUERY:
1423 case OP_POSQUERY:
1424 case OP_QUERYI:
1425 case OP_MINQUERYI:
1426 case OP_POSQUERYI:
1427 case OP_NOTQUERY:
1428 case OP_NOTMINQUERY:
1429 case OP_NOTPOSQUERY:
1430 case OP_NOTQUERYI:
1431 case OP_NOTMINQUERYI:
1432 case OP_NOTPOSQUERYI:
1433 fast_forward_allowed = FALSE;
1434 if (count == 0)
1435 count = 1;
1436 cc += 2;
1437 #ifdef SUPPORT_UNICODE
1438 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1439 #endif
1440 continue;
1441
1442 case OP_CLASS:
1443 case OP_NCLASS:
1444 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1445 case OP_XCLASS:
1446 accelerated_start = cc;
1447 cc += ((*cc == OP_XCLASS) ? GET(cc, 1) : (unsigned int)(1 + (32 / sizeof(PCRE2_UCHAR))));
1448 #else
1449 accelerated_start = cc;
1450 cc += (1 + (32 / sizeof(PCRE2_UCHAR)));
1451 #endif
1452
1453 switch (*cc)
1454 {
1455 case OP_CRSTAR:
1456 case OP_CRMINSTAR:
1457 case OP_CRPLUS:
1458 case OP_CRMINPLUS:
1459 case OP_CRPOSSTAR:
1460 case OP_CRPOSPLUS:
1461 cc++;
1462 break;
1463
1464 case OP_CRRANGE:
1465 case OP_CRMINRANGE:
1466 case OP_CRPOSRANGE:
1467 cc += 2 * IMM2_SIZE;
1468 /* Fall through */
1469 case OP_CRQUERY:
1470 case OP_CRMINQUERY:
1471 case OP_CRPOSQUERY:
1472 cc++;
1473 if (count == 0)
1474 count = 1;
1475 /* Fall through */
1476 default:
1477 accelerated_start = NULL;
1478 fast_forward_allowed = FALSE;
1479 continue;
1480 }
1481 break;
1482
1483 case OP_ONCE:
1484 case OP_BRA:
1485 case OP_CBRA:
1486 end = cc + GET(cc, 1);
1487
1488 prev_fast_forward_allowed = fast_forward_allowed;
1489 fast_forward_allowed = FALSE;
1490 if (depth >= 4)
1491 break;
1492
1493 end = bracketend(cc) - (1 + LINK_SIZE);
1494 if (*end != OP_KET || (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0))
1495 break;
1496
1497 count = detect_early_fail(common, cc, private_data_start, depth + 1, count, prev_fast_forward_allowed);
1498
1499 if (PRIVATE_DATA(cc) != 0)
1500 common->private_data_ptrs[begin - common->start] = 1;
1501
1502 if (count < EARLY_FAIL_ENHANCE_MAX)
1503 {
1504 cc = end + (1 + LINK_SIZE);
1505 continue;
1506 }
1507 break;
1508
1509 case OP_KET:
1510 SLJIT_ASSERT(PRIVATE_DATA(cc) == 0);
1511 if (cc >= next_alt)
1512 break;
1513 cc += 1 + LINK_SIZE;
1514 continue;
1515 }
1516
1517 if (accelerated_start != NULL)
1518 {
1519 if (count == 0)
1520 {
1521 count++;
1522
1523 if (fast_forward_allowed)
1524 {
1525 common->fast_forward_bc_ptr = accelerated_start;
1526 common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_skip;
1527 *private_data_start += sizeof(sljit_sw);
1528 }
1529 else
1530 {
1531 common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail;
1532
1533 if (common->early_fail_start_ptr == 0)
1534 common->early_fail_start_ptr = *private_data_start;
1535
1536 *private_data_start += sizeof(sljit_sw);
1537 common->early_fail_end_ptr = *private_data_start;
1538
1539 if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1540 return EARLY_FAIL_ENHANCE_MAX;
1541 }
1542 }
1543 else
1544 {
1545 common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail_range;
1546
1547 if (common->early_fail_start_ptr == 0)
1548 common->early_fail_start_ptr = *private_data_start;
1549
1550 *private_data_start += 2 * sizeof(sljit_sw);
1551 common->early_fail_end_ptr = *private_data_start;
1552
1553 if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1554 return EARLY_FAIL_ENHANCE_MAX;
1555 }
1556
1557 /* Cannot be part of a repeat. */
1558 common->private_data_ptrs[begin - common->start] = 1;
1559 count++;
1560
1561 if (count < EARLY_FAIL_ENHANCE_MAX)
1562 continue;
1563 }
1564
1565 break;
1566 }
1567
1568 if (*cc != OP_ALT && *cc != OP_KET)
1569 result = EARLY_FAIL_ENHANCE_MAX;
1570 else if (result < count)
1571 result = count;
1572
1573 cc = next_alt;
1574 next_alt = cc + GET(cc, 1);
1575 }
1576 while (*cc == OP_ALT);
1577
1578 return result;
1579 }
1580
get_class_iterator_size(PCRE2_SPTR cc)1581 static int get_class_iterator_size(PCRE2_SPTR cc)
1582 {
1583 sljit_u32 min;
1584 sljit_u32 max;
1585 switch(*cc)
1586 {
1587 case OP_CRSTAR:
1588 case OP_CRPLUS:
1589 return 2;
1590
1591 case OP_CRMINSTAR:
1592 case OP_CRMINPLUS:
1593 case OP_CRQUERY:
1594 case OP_CRMINQUERY:
1595 return 1;
1596
1597 case OP_CRRANGE:
1598 case OP_CRMINRANGE:
1599 min = GET2(cc, 1);
1600 max = GET2(cc, 1 + IMM2_SIZE);
1601 if (max == 0)
1602 return (*cc == OP_CRRANGE) ? 2 : 1;
1603 max -= min;
1604 if (max > 2)
1605 max = 2;
1606 return max;
1607
1608 default:
1609 return 0;
1610 }
1611 }
1612
detect_repeat(compiler_common * common,PCRE2_SPTR begin)1613 static BOOL detect_repeat(compiler_common *common, PCRE2_SPTR begin)
1614 {
1615 PCRE2_SPTR end = bracketend(begin);
1616 PCRE2_SPTR next;
1617 PCRE2_SPTR next_end;
1618 PCRE2_SPTR max_end;
1619 PCRE2_UCHAR type;
1620 sljit_sw length = end - begin;
1621 sljit_s32 min, max, i;
1622
1623 /* Detect fixed iterations first. */
1624 if (end[-(1 + LINK_SIZE)] != OP_KET || PRIVATE_DATA(begin) != 0)
1625 return FALSE;
1626
1627 /* /(?:AB){4,6}/ is currently converted to /(?:AB){3}(?AB){1,3}/
1628 * Skip the check of the second part. */
1629 if (PRIVATE_DATA(end - LINK_SIZE) != 0)
1630 return TRUE;
1631
1632 next = end;
1633 min = 1;
1634 while (1)
1635 {
1636 if (*next != *begin)
1637 break;
1638 next_end = bracketend(next);
1639 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1640 break;
1641 next = next_end;
1642 min++;
1643 }
1644
1645 if (min == 2)
1646 return FALSE;
1647
1648 max = 0;
1649 max_end = next;
1650 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1651 {
1652 type = *next;
1653 while (1)
1654 {
1655 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1656 break;
1657 next_end = bracketend(next + 2 + LINK_SIZE);
1658 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1659 break;
1660 next = next_end;
1661 max++;
1662 }
1663
1664 if (next[0] == type && next[1] == *begin && max >= 1)
1665 {
1666 next_end = bracketend(next + 1);
1667 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1668 {
1669 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1670 if (*next_end != OP_KET)
1671 break;
1672
1673 if (i == max)
1674 {
1675 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1676 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1677 /* +2 the original and the last. */
1678 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1679 if (min == 1)
1680 return TRUE;
1681 min--;
1682 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1683 }
1684 }
1685 }
1686 }
1687
1688 if (min >= 3)
1689 {
1690 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1691 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1692 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1693 return TRUE;
1694 }
1695
1696 return FALSE;
1697 }
1698
1699 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1700 case OP_MINSTAR: \
1701 case OP_MINPLUS: \
1702 case OP_QUERY: \
1703 case OP_MINQUERY: \
1704 case OP_MINSTARI: \
1705 case OP_MINPLUSI: \
1706 case OP_QUERYI: \
1707 case OP_MINQUERYI: \
1708 case OP_NOTMINSTAR: \
1709 case OP_NOTMINPLUS: \
1710 case OP_NOTQUERY: \
1711 case OP_NOTMINQUERY: \
1712 case OP_NOTMINSTARI: \
1713 case OP_NOTMINPLUSI: \
1714 case OP_NOTQUERYI: \
1715 case OP_NOTMINQUERYI:
1716
1717 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1718 case OP_STAR: \
1719 case OP_PLUS: \
1720 case OP_STARI: \
1721 case OP_PLUSI: \
1722 case OP_NOTSTAR: \
1723 case OP_NOTPLUS: \
1724 case OP_NOTSTARI: \
1725 case OP_NOTPLUSI:
1726
1727 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1728 case OP_UPTO: \
1729 case OP_MINUPTO: \
1730 case OP_UPTOI: \
1731 case OP_MINUPTOI: \
1732 case OP_NOTUPTO: \
1733 case OP_NOTMINUPTO: \
1734 case OP_NOTUPTOI: \
1735 case OP_NOTMINUPTOI:
1736
1737 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1738 case OP_TYPEMINSTAR: \
1739 case OP_TYPEMINPLUS: \
1740 case OP_TYPEQUERY: \
1741 case OP_TYPEMINQUERY:
1742
1743 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1744 case OP_TYPESTAR: \
1745 case OP_TYPEPLUS:
1746
1747 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1748 case OP_TYPEUPTO: \
1749 case OP_TYPEMINUPTO:
1750
set_private_data_ptrs(compiler_common * common,int * private_data_start,PCRE2_SPTR ccend)1751 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, PCRE2_SPTR ccend)
1752 {
1753 PCRE2_SPTR cc = common->start;
1754 PCRE2_SPTR alternative;
1755 PCRE2_SPTR end = NULL;
1756 int private_data_ptr = *private_data_start;
1757 int space, size, bracketlen;
1758 BOOL repeat_check = TRUE;
1759
1760 while (cc < ccend)
1761 {
1762 space = 0;
1763 size = 0;
1764 bracketlen = 0;
1765 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1766 break;
1767
1768 /* When the bracket is prefixed by a zero iteration, skip the repeat check (at this point). */
1769 if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1770 {
1771 if (detect_repeat(common, cc))
1772 {
1773 /* These brackets are converted to repeats, so no global
1774 based single character repeat is allowed. */
1775 if (cc >= end)
1776 end = bracketend(cc);
1777 }
1778 }
1779 repeat_check = TRUE;
1780
1781 switch(*cc)
1782 {
1783 case OP_KET:
1784 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1785 {
1786 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1787 private_data_ptr += sizeof(sljit_sw);
1788 cc += common->private_data_ptrs[cc + 1 - common->start];
1789 }
1790 cc += 1 + LINK_SIZE;
1791 break;
1792
1793 case OP_ASSERT:
1794 case OP_ASSERT_NOT:
1795 case OP_ASSERTBACK:
1796 case OP_ASSERTBACK_NOT:
1797 case OP_ASSERT_NA:
1798 case OP_ASSERTBACK_NA:
1799 case OP_ONCE:
1800 case OP_SCRIPT_RUN:
1801 case OP_BRAPOS:
1802 case OP_SBRA:
1803 case OP_SBRAPOS:
1804 case OP_SCOND:
1805 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1806 private_data_ptr += sizeof(sljit_sw);
1807 bracketlen = 1 + LINK_SIZE;
1808 break;
1809
1810 case OP_CBRAPOS:
1811 case OP_SCBRAPOS:
1812 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1813 private_data_ptr += sizeof(sljit_sw);
1814 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1815 break;
1816
1817 case OP_COND:
1818 /* Might be a hidden SCOND. */
1819 common->private_data_ptrs[cc - common->start] = 0;
1820 alternative = cc + GET(cc, 1);
1821 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1822 {
1823 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1824 private_data_ptr += sizeof(sljit_sw);
1825 }
1826 bracketlen = 1 + LINK_SIZE;
1827 break;
1828
1829 case OP_BRA:
1830 bracketlen = 1 + LINK_SIZE;
1831 break;
1832
1833 case OP_CBRA:
1834 case OP_SCBRA:
1835 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1836 break;
1837
1838 case OP_BRAZERO:
1839 case OP_BRAMINZERO:
1840 case OP_BRAPOSZERO:
1841 size = 1;
1842 repeat_check = FALSE;
1843 break;
1844
1845 CASE_ITERATOR_PRIVATE_DATA_1
1846 size = -2;
1847 space = 1;
1848 break;
1849
1850 CASE_ITERATOR_PRIVATE_DATA_2A
1851 size = -2;
1852 space = 2;
1853 break;
1854
1855 CASE_ITERATOR_PRIVATE_DATA_2B
1856 size = -(2 + IMM2_SIZE);
1857 space = 2;
1858 break;
1859
1860 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1861 size = 1;
1862 space = 1;
1863 break;
1864
1865 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1866 size = 1;
1867 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1868 space = 2;
1869 break;
1870
1871 case OP_TYPEUPTO:
1872 size = 1 + IMM2_SIZE;
1873 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1874 space = 2;
1875 break;
1876
1877 case OP_TYPEMINUPTO:
1878 size = 1 + IMM2_SIZE;
1879 space = 2;
1880 break;
1881
1882 case OP_CLASS:
1883 case OP_NCLASS:
1884 size = 1 + 32 / sizeof(PCRE2_UCHAR);
1885 space = get_class_iterator_size(cc + size);
1886 break;
1887
1888 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1889 case OP_XCLASS:
1890 size = GET(cc, 1);
1891 space = get_class_iterator_size(cc + size);
1892 break;
1893 #endif
1894
1895 default:
1896 cc = next_opcode(common, cc);
1897 SLJIT_ASSERT(cc != NULL);
1898 break;
1899 }
1900
1901 /* Character iterators, which are not inside a repeated bracket,
1902 gets a private slot instead of allocating it on the stack. */
1903 if (space > 0 && cc >= end)
1904 {
1905 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1906 private_data_ptr += sizeof(sljit_sw) * space;
1907 }
1908
1909 if (size != 0)
1910 {
1911 if (size < 0)
1912 {
1913 cc += -size;
1914 #ifdef SUPPORT_UNICODE
1915 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1916 #endif
1917 }
1918 else
1919 cc += size;
1920 }
1921
1922 if (bracketlen > 0)
1923 {
1924 if (cc >= end)
1925 {
1926 end = bracketend(cc);
1927 if (end[-1 - LINK_SIZE] == OP_KET)
1928 end = NULL;
1929 }
1930 cc += bracketlen;
1931 }
1932 }
1933 *private_data_start = private_data_ptr;
1934 }
1935
1936 /* Returns with a frame_types (always < 0) if no need for frame. */
get_framesize(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,BOOL recursive,BOOL * needs_control_head)1937 static int get_framesize(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL recursive, BOOL *needs_control_head)
1938 {
1939 int length = 0;
1940 int possessive = 0;
1941 BOOL stack_restore = FALSE;
1942 BOOL setsom_found = recursive;
1943 BOOL setmark_found = recursive;
1944 /* The last capture is a local variable even for recursions. */
1945 BOOL capture_last_found = FALSE;
1946
1947 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1948 SLJIT_ASSERT(common->control_head_ptr != 0);
1949 *needs_control_head = TRUE;
1950 #else
1951 *needs_control_head = FALSE;
1952 #endif
1953
1954 if (ccend == NULL)
1955 {
1956 ccend = bracketend(cc) - (1 + LINK_SIZE);
1957 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1958 {
1959 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1960 /* This is correct regardless of common->capture_last_ptr. */
1961 capture_last_found = TRUE;
1962 }
1963 cc = next_opcode(common, cc);
1964 }
1965
1966 SLJIT_ASSERT(cc != NULL);
1967 while (cc < ccend)
1968 switch(*cc)
1969 {
1970 case OP_SET_SOM:
1971 SLJIT_ASSERT(common->has_set_som);
1972 stack_restore = TRUE;
1973 if (!setsom_found)
1974 {
1975 length += 2;
1976 setsom_found = TRUE;
1977 }
1978 cc += 1;
1979 break;
1980
1981 case OP_MARK:
1982 case OP_COMMIT_ARG:
1983 case OP_PRUNE_ARG:
1984 case OP_THEN_ARG:
1985 SLJIT_ASSERT(common->mark_ptr != 0);
1986 stack_restore = TRUE;
1987 if (!setmark_found)
1988 {
1989 length += 2;
1990 setmark_found = TRUE;
1991 }
1992 if (common->control_head_ptr != 0)
1993 *needs_control_head = TRUE;
1994 cc += 1 + 2 + cc[1];
1995 break;
1996
1997 case OP_RECURSE:
1998 stack_restore = TRUE;
1999 if (common->has_set_som && !setsom_found)
2000 {
2001 length += 2;
2002 setsom_found = TRUE;
2003 }
2004 if (common->mark_ptr != 0 && !setmark_found)
2005 {
2006 length += 2;
2007 setmark_found = TRUE;
2008 }
2009 if (common->capture_last_ptr != 0 && !capture_last_found)
2010 {
2011 length += 2;
2012 capture_last_found = TRUE;
2013 }
2014 cc += 1 + LINK_SIZE;
2015 break;
2016
2017 case OP_CBRA:
2018 case OP_CBRAPOS:
2019 case OP_SCBRA:
2020 case OP_SCBRAPOS:
2021 stack_restore = TRUE;
2022 if (common->capture_last_ptr != 0 && !capture_last_found)
2023 {
2024 length += 2;
2025 capture_last_found = TRUE;
2026 }
2027 length += 3;
2028 cc += 1 + LINK_SIZE + IMM2_SIZE;
2029 break;
2030
2031 case OP_THEN:
2032 stack_restore = TRUE;
2033 if (common->control_head_ptr != 0)
2034 *needs_control_head = TRUE;
2035 cc ++;
2036 break;
2037
2038 default:
2039 stack_restore = TRUE;
2040 /* Fall through. */
2041
2042 case OP_NOT_WORD_BOUNDARY:
2043 case OP_WORD_BOUNDARY:
2044 case OP_NOT_DIGIT:
2045 case OP_DIGIT:
2046 case OP_NOT_WHITESPACE:
2047 case OP_WHITESPACE:
2048 case OP_NOT_WORDCHAR:
2049 case OP_WORDCHAR:
2050 case OP_ANY:
2051 case OP_ALLANY:
2052 case OP_ANYBYTE:
2053 case OP_NOTPROP:
2054 case OP_PROP:
2055 case OP_ANYNL:
2056 case OP_NOT_HSPACE:
2057 case OP_HSPACE:
2058 case OP_NOT_VSPACE:
2059 case OP_VSPACE:
2060 case OP_EXTUNI:
2061 case OP_EODN:
2062 case OP_EOD:
2063 case OP_CIRC:
2064 case OP_CIRCM:
2065 case OP_DOLL:
2066 case OP_DOLLM:
2067 case OP_CHAR:
2068 case OP_CHARI:
2069 case OP_NOT:
2070 case OP_NOTI:
2071
2072 case OP_EXACT:
2073 case OP_POSSTAR:
2074 case OP_POSPLUS:
2075 case OP_POSQUERY:
2076 case OP_POSUPTO:
2077
2078 case OP_EXACTI:
2079 case OP_POSSTARI:
2080 case OP_POSPLUSI:
2081 case OP_POSQUERYI:
2082 case OP_POSUPTOI:
2083
2084 case OP_NOTEXACT:
2085 case OP_NOTPOSSTAR:
2086 case OP_NOTPOSPLUS:
2087 case OP_NOTPOSQUERY:
2088 case OP_NOTPOSUPTO:
2089
2090 case OP_NOTEXACTI:
2091 case OP_NOTPOSSTARI:
2092 case OP_NOTPOSPLUSI:
2093 case OP_NOTPOSQUERYI:
2094 case OP_NOTPOSUPTOI:
2095
2096 case OP_TYPEEXACT:
2097 case OP_TYPEPOSSTAR:
2098 case OP_TYPEPOSPLUS:
2099 case OP_TYPEPOSQUERY:
2100 case OP_TYPEPOSUPTO:
2101
2102 case OP_CLASS:
2103 case OP_NCLASS:
2104 case OP_XCLASS:
2105
2106 case OP_CALLOUT:
2107 case OP_CALLOUT_STR:
2108
2109 cc = next_opcode(common, cc);
2110 SLJIT_ASSERT(cc != NULL);
2111 break;
2112 }
2113
2114 /* Possessive quantifiers can use a special case. */
2115 if (SLJIT_UNLIKELY(possessive == length))
2116 return stack_restore ? no_frame : no_stack;
2117
2118 if (length > 0)
2119 return length + 1;
2120 return stack_restore ? no_frame : no_stack;
2121 }
2122
init_frame(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,int stackpos,int stacktop)2123 static void init_frame(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, int stackpos, int stacktop)
2124 {
2125 DEFINE_COMPILER;
2126 BOOL setsom_found = FALSE;
2127 BOOL setmark_found = FALSE;
2128 /* The last capture is a local variable even for recursions. */
2129 BOOL capture_last_found = FALSE;
2130 int offset;
2131
2132 /* >= 1 + shortest item size (2) */
2133 SLJIT_UNUSED_ARG(stacktop);
2134 SLJIT_ASSERT(stackpos >= stacktop + 2);
2135
2136 stackpos = STACK(stackpos);
2137 if (ccend == NULL)
2138 {
2139 ccend = bracketend(cc) - (1 + LINK_SIZE);
2140 if (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)
2141 cc = next_opcode(common, cc);
2142 }
2143
2144 SLJIT_ASSERT(cc != NULL);
2145 while (cc < ccend)
2146 switch(*cc)
2147 {
2148 case OP_SET_SOM:
2149 SLJIT_ASSERT(common->has_set_som);
2150 if (!setsom_found)
2151 {
2152 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2153 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2154 stackpos -= (int)sizeof(sljit_sw);
2155 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2156 stackpos -= (int)sizeof(sljit_sw);
2157 setsom_found = TRUE;
2158 }
2159 cc += 1;
2160 break;
2161
2162 case OP_MARK:
2163 case OP_COMMIT_ARG:
2164 case OP_PRUNE_ARG:
2165 case OP_THEN_ARG:
2166 SLJIT_ASSERT(common->mark_ptr != 0);
2167 if (!setmark_found)
2168 {
2169 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2170 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2171 stackpos -= (int)sizeof(sljit_sw);
2172 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2173 stackpos -= (int)sizeof(sljit_sw);
2174 setmark_found = TRUE;
2175 }
2176 cc += 1 + 2 + cc[1];
2177 break;
2178
2179 case OP_RECURSE:
2180 if (common->has_set_som && !setsom_found)
2181 {
2182 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2183 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2184 stackpos -= (int)sizeof(sljit_sw);
2185 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2186 stackpos -= (int)sizeof(sljit_sw);
2187 setsom_found = TRUE;
2188 }
2189 if (common->mark_ptr != 0 && !setmark_found)
2190 {
2191 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2192 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2193 stackpos -= (int)sizeof(sljit_sw);
2194 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2195 stackpos -= (int)sizeof(sljit_sw);
2196 setmark_found = TRUE;
2197 }
2198 if (common->capture_last_ptr != 0 && !capture_last_found)
2199 {
2200 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2201 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2202 stackpos -= (int)sizeof(sljit_sw);
2203 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2204 stackpos -= (int)sizeof(sljit_sw);
2205 capture_last_found = TRUE;
2206 }
2207 cc += 1 + LINK_SIZE;
2208 break;
2209
2210 case OP_CBRA:
2211 case OP_CBRAPOS:
2212 case OP_SCBRA:
2213 case OP_SCBRAPOS:
2214 if (common->capture_last_ptr != 0 && !capture_last_found)
2215 {
2216 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2217 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2218 stackpos -= (int)sizeof(sljit_sw);
2219 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2220 stackpos -= (int)sizeof(sljit_sw);
2221 capture_last_found = TRUE;
2222 }
2223 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2224 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
2225 stackpos -= (int)sizeof(sljit_sw);
2226 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
2227 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
2228 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2229 stackpos -= (int)sizeof(sljit_sw);
2230 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
2231 stackpos -= (int)sizeof(sljit_sw);
2232
2233 cc += 1 + LINK_SIZE + IMM2_SIZE;
2234 break;
2235
2236 default:
2237 cc = next_opcode(common, cc);
2238 SLJIT_ASSERT(cc != NULL);
2239 break;
2240 }
2241
2242 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
2243 SLJIT_ASSERT(stackpos == STACK(stacktop));
2244 }
2245
2246 #define RECURSE_TMP_REG_COUNT 3
2247
2248 typedef struct delayed_mem_copy_status {
2249 struct sljit_compiler *compiler;
2250 int store_bases[RECURSE_TMP_REG_COUNT];
2251 int store_offsets[RECURSE_TMP_REG_COUNT];
2252 int tmp_regs[RECURSE_TMP_REG_COUNT];
2253 int saved_tmp_regs[RECURSE_TMP_REG_COUNT];
2254 int next_tmp_reg;
2255 } delayed_mem_copy_status;
2256
delayed_mem_copy_init(delayed_mem_copy_status * status,compiler_common * common)2257 static void delayed_mem_copy_init(delayed_mem_copy_status *status, compiler_common *common)
2258 {
2259 int i;
2260
2261 for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2262 {
2263 SLJIT_ASSERT(status->tmp_regs[i] >= 0);
2264 SLJIT_ASSERT(sljit_get_register_index(status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]);
2265
2266 status->store_bases[i] = -1;
2267 }
2268 status->next_tmp_reg = 0;
2269 status->compiler = common->compiler;
2270 }
2271
delayed_mem_copy_move(delayed_mem_copy_status * status,int load_base,sljit_sw load_offset,int store_base,sljit_sw store_offset)2272 static void delayed_mem_copy_move(delayed_mem_copy_status *status, int load_base, sljit_sw load_offset,
2273 int store_base, sljit_sw store_offset)
2274 {
2275 struct sljit_compiler *compiler = status->compiler;
2276 int next_tmp_reg = status->next_tmp_reg;
2277 int tmp_reg = status->tmp_regs[next_tmp_reg];
2278
2279 SLJIT_ASSERT(load_base > 0 && store_base > 0);
2280
2281 if (status->store_bases[next_tmp_reg] == -1)
2282 {
2283 /* Preserve virtual registers. */
2284 if (sljit_get_register_index(status->saved_tmp_regs[next_tmp_reg]) < 0)
2285 OP1(SLJIT_MOV, status->saved_tmp_regs[next_tmp_reg], 0, tmp_reg, 0);
2286 }
2287 else
2288 OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2289
2290 OP1(SLJIT_MOV, tmp_reg, 0, SLJIT_MEM1(load_base), load_offset);
2291 status->store_bases[next_tmp_reg] = store_base;
2292 status->store_offsets[next_tmp_reg] = store_offset;
2293
2294 status->next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2295 }
2296
delayed_mem_copy_finish(delayed_mem_copy_status * status)2297 static void delayed_mem_copy_finish(delayed_mem_copy_status *status)
2298 {
2299 struct sljit_compiler *compiler = status->compiler;
2300 int next_tmp_reg = status->next_tmp_reg;
2301 int tmp_reg, saved_tmp_reg, i;
2302
2303 for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2304 {
2305 if (status->store_bases[next_tmp_reg] != -1)
2306 {
2307 tmp_reg = status->tmp_regs[next_tmp_reg];
2308 saved_tmp_reg = status->saved_tmp_regs[next_tmp_reg];
2309
2310 OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2311
2312 /* Restore virtual registers. */
2313 if (sljit_get_register_index(saved_tmp_reg) < 0)
2314 OP1(SLJIT_MOV, tmp_reg, 0, saved_tmp_reg, 0);
2315 }
2316
2317 next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2318 }
2319 }
2320
2321 #undef RECURSE_TMP_REG_COUNT
2322
recurse_check_bit(compiler_common * common,sljit_sw bit_index)2323 static BOOL recurse_check_bit(compiler_common *common, sljit_sw bit_index)
2324 {
2325 uint8_t *byte;
2326 uint8_t mask;
2327
2328 SLJIT_ASSERT((bit_index & (sizeof(sljit_sw) - 1)) == 0);
2329
2330 bit_index >>= SLJIT_WORD_SHIFT;
2331
2332 SLJIT_ASSERT((bit_index >> 3) < common->recurse_bitset_size);
2333
2334 mask = 1 << (bit_index & 0x7);
2335 byte = common->recurse_bitset + (bit_index >> 3);
2336
2337 if (*byte & mask)
2338 return FALSE;
2339
2340 *byte |= mask;
2341 return TRUE;
2342 }
2343
2344 enum get_recurse_flags {
2345 recurse_flag_quit_found = (1 << 0),
2346 recurse_flag_accept_found = (1 << 1),
2347 recurse_flag_setsom_found = (1 << 2),
2348 recurse_flag_setmark_found = (1 << 3),
2349 recurse_flag_control_head_found = (1 << 4),
2350 };
2351
get_recurse_data_length(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,uint32_t * result_flags)2352 static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, uint32_t *result_flags)
2353 {
2354 int length = 1;
2355 int size, offset;
2356 PCRE2_SPTR alternative;
2357 uint32_t recurse_flags = 0;
2358
2359 memset(common->recurse_bitset, 0, common->recurse_bitset_size);
2360
2361 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2362 SLJIT_ASSERT(common->control_head_ptr != 0);
2363 recurse_flags |= recurse_flag_control_head_found;
2364 #endif
2365
2366 /* Calculate the sum of the private machine words. */
2367 while (cc < ccend)
2368 {
2369 size = 0;
2370 switch(*cc)
2371 {
2372 case OP_SET_SOM:
2373 SLJIT_ASSERT(common->has_set_som);
2374 recurse_flags |= recurse_flag_setsom_found;
2375 cc += 1;
2376 break;
2377
2378 case OP_RECURSE:
2379 if (common->has_set_som)
2380 recurse_flags |= recurse_flag_setsom_found;
2381 if (common->mark_ptr != 0)
2382 recurse_flags |= recurse_flag_setmark_found;
2383 if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2384 length++;
2385 cc += 1 + LINK_SIZE;
2386 break;
2387
2388 case OP_KET:
2389 offset = PRIVATE_DATA(cc);
2390 if (offset != 0)
2391 {
2392 if (recurse_check_bit(common, offset))
2393 length++;
2394 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2395 cc += PRIVATE_DATA(cc + 1);
2396 }
2397 cc += 1 + LINK_SIZE;
2398 break;
2399
2400 case OP_ASSERT:
2401 case OP_ASSERT_NOT:
2402 case OP_ASSERTBACK:
2403 case OP_ASSERTBACK_NOT:
2404 case OP_ASSERT_NA:
2405 case OP_ASSERTBACK_NA:
2406 case OP_ONCE:
2407 case OP_SCRIPT_RUN:
2408 case OP_BRAPOS:
2409 case OP_SBRA:
2410 case OP_SBRAPOS:
2411 case OP_SCOND:
2412 SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
2413 if (recurse_check_bit(common, PRIVATE_DATA(cc)))
2414 length++;
2415 cc += 1 + LINK_SIZE;
2416 break;
2417
2418 case OP_CBRA:
2419 case OP_SCBRA:
2420 offset = GET2(cc, 1 + LINK_SIZE);
2421 if (recurse_check_bit(common, OVECTOR(offset << 1)))
2422 {
2423 SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
2424 length += 2;
2425 }
2426 if (common->optimized_cbracket[offset] == 0 && recurse_check_bit(common, OVECTOR_PRIV(offset)))
2427 length++;
2428 if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2429 length++;
2430 cc += 1 + LINK_SIZE + IMM2_SIZE;
2431 break;
2432
2433 case OP_CBRAPOS:
2434 case OP_SCBRAPOS:
2435 offset = GET2(cc, 1 + LINK_SIZE);
2436 if (recurse_check_bit(common, OVECTOR(offset << 1)))
2437 {
2438 SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
2439 length += 2;
2440 }
2441 if (recurse_check_bit(common, OVECTOR_PRIV(offset)))
2442 length++;
2443 if (recurse_check_bit(common, PRIVATE_DATA(cc)))
2444 length++;
2445 if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2446 length++;
2447 cc += 1 + LINK_SIZE + IMM2_SIZE;
2448 break;
2449
2450 case OP_COND:
2451 /* Might be a hidden SCOND. */
2452 alternative = cc + GET(cc, 1);
2453 if ((*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) && recurse_check_bit(common, PRIVATE_DATA(cc)))
2454 length++;
2455 cc += 1 + LINK_SIZE;
2456 break;
2457
2458 CASE_ITERATOR_PRIVATE_DATA_1
2459 offset = PRIVATE_DATA(cc);
2460 if (offset != 0 && recurse_check_bit(common, offset))
2461 length++;
2462 cc += 2;
2463 #ifdef SUPPORT_UNICODE
2464 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2465 #endif
2466 break;
2467
2468 CASE_ITERATOR_PRIVATE_DATA_2A
2469 offset = PRIVATE_DATA(cc);
2470 if (offset != 0 && recurse_check_bit(common, offset))
2471 {
2472 SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2473 length += 2;
2474 }
2475 cc += 2;
2476 #ifdef SUPPORT_UNICODE
2477 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2478 #endif
2479 break;
2480
2481 CASE_ITERATOR_PRIVATE_DATA_2B
2482 offset = PRIVATE_DATA(cc);
2483 if (offset != 0 && recurse_check_bit(common, offset))
2484 {
2485 SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2486 length += 2;
2487 }
2488 cc += 2 + IMM2_SIZE;
2489 #ifdef SUPPORT_UNICODE
2490 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2491 #endif
2492 break;
2493
2494 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2495 offset = PRIVATE_DATA(cc);
2496 if (offset != 0 && recurse_check_bit(common, offset))
2497 length++;
2498 cc += 1;
2499 break;
2500
2501 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2502 offset = PRIVATE_DATA(cc);
2503 if (offset != 0 && recurse_check_bit(common, offset))
2504 {
2505 SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2506 length += 2;
2507 }
2508 cc += 1;
2509 break;
2510
2511 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2512 offset = PRIVATE_DATA(cc);
2513 if (offset != 0 && recurse_check_bit(common, offset))
2514 {
2515 SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2516 length += 2;
2517 }
2518 cc += 1 + IMM2_SIZE;
2519 break;
2520
2521 case OP_CLASS:
2522 case OP_NCLASS:
2523 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2524 case OP_XCLASS:
2525 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2526 #else
2527 size = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2528 #endif
2529
2530 offset = PRIVATE_DATA(cc);
2531 if (offset != 0 && recurse_check_bit(common, offset))
2532 length += get_class_iterator_size(cc + size);
2533 cc += size;
2534 break;
2535
2536 case OP_MARK:
2537 case OP_COMMIT_ARG:
2538 case OP_PRUNE_ARG:
2539 case OP_THEN_ARG:
2540 SLJIT_ASSERT(common->mark_ptr != 0);
2541 recurse_flags |= recurse_flag_setmark_found;
2542 if (common->control_head_ptr != 0)
2543 recurse_flags |= recurse_flag_control_head_found;
2544 if (*cc != OP_MARK)
2545 recurse_flags |= recurse_flag_quit_found;
2546
2547 cc += 1 + 2 + cc[1];
2548 break;
2549
2550 case OP_PRUNE:
2551 case OP_SKIP:
2552 case OP_COMMIT:
2553 recurse_flags |= recurse_flag_quit_found;
2554 cc++;
2555 break;
2556
2557 case OP_SKIP_ARG:
2558 recurse_flags |= recurse_flag_quit_found;
2559 cc += 1 + 2 + cc[1];
2560 break;
2561
2562 case OP_THEN:
2563 SLJIT_ASSERT(common->control_head_ptr != 0);
2564 recurse_flags |= recurse_flag_quit_found | recurse_flag_control_head_found;
2565 cc++;
2566 break;
2567
2568 case OP_ACCEPT:
2569 case OP_ASSERT_ACCEPT:
2570 recurse_flags |= recurse_flag_accept_found;
2571 cc++;
2572 break;
2573
2574 default:
2575 cc = next_opcode(common, cc);
2576 SLJIT_ASSERT(cc != NULL);
2577 break;
2578 }
2579 }
2580 SLJIT_ASSERT(cc == ccend);
2581
2582 if (recurse_flags & recurse_flag_control_head_found)
2583 length++;
2584 if (recurse_flags & recurse_flag_quit_found)
2585 {
2586 if (recurse_flags & recurse_flag_setsom_found)
2587 length++;
2588 if (recurse_flags & recurse_flag_setmark_found)
2589 length++;
2590 }
2591
2592 *result_flags = recurse_flags;
2593 return length;
2594 }
2595
2596 enum copy_recurse_data_types {
2597 recurse_copy_from_global,
2598 recurse_copy_private_to_global,
2599 recurse_copy_shared_to_global,
2600 recurse_copy_kept_shared_to_global,
2601 recurse_swap_global
2602 };
2603
copy_recurse_data(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,int type,int stackptr,int stacktop,uint32_t recurse_flags)2604 static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
2605 int type, int stackptr, int stacktop, uint32_t recurse_flags)
2606 {
2607 delayed_mem_copy_status status;
2608 PCRE2_SPTR alternative;
2609 sljit_sw private_srcw[2];
2610 sljit_sw shared_srcw[3];
2611 sljit_sw kept_shared_srcw[2];
2612 int private_count, shared_count, kept_shared_count;
2613 int from_sp, base_reg, offset, i;
2614
2615 memset(common->recurse_bitset, 0, common->recurse_bitset_size);
2616
2617 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2618 SLJIT_ASSERT(common->control_head_ptr != 0);
2619 recurse_check_bit(common, common->control_head_ptr);
2620 #endif
2621
2622 switch (type)
2623 {
2624 case recurse_copy_from_global:
2625 from_sp = TRUE;
2626 base_reg = STACK_TOP;
2627 break;
2628
2629 case recurse_copy_private_to_global:
2630 case recurse_copy_shared_to_global:
2631 case recurse_copy_kept_shared_to_global:
2632 from_sp = FALSE;
2633 base_reg = STACK_TOP;
2634 break;
2635
2636 default:
2637 SLJIT_ASSERT(type == recurse_swap_global);
2638 from_sp = FALSE;
2639 base_reg = TMP2;
2640 break;
2641 }
2642
2643 stackptr = STACK(stackptr);
2644 stacktop = STACK(stacktop);
2645
2646 status.tmp_regs[0] = TMP1;
2647 status.saved_tmp_regs[0] = TMP1;
2648
2649 if (base_reg != TMP2)
2650 {
2651 status.tmp_regs[1] = TMP2;
2652 status.saved_tmp_regs[1] = TMP2;
2653 }
2654 else
2655 {
2656 status.saved_tmp_regs[1] = RETURN_ADDR;
2657 if (HAS_VIRTUAL_REGISTERS)
2658 status.tmp_regs[1] = STR_PTR;
2659 else
2660 status.tmp_regs[1] = RETURN_ADDR;
2661 }
2662
2663 status.saved_tmp_regs[2] = TMP3;
2664 if (HAS_VIRTUAL_REGISTERS)
2665 status.tmp_regs[2] = STR_END;
2666 else
2667 status.tmp_regs[2] = TMP3;
2668
2669 delayed_mem_copy_init(&status, common);
2670
2671 if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2672 {
2673 SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2674
2675 if (!from_sp)
2676 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->recursive_head_ptr);
2677
2678 if (from_sp || type == recurse_swap_global)
2679 delayed_mem_copy_move(&status, SLJIT_SP, common->recursive_head_ptr, base_reg, stackptr);
2680 }
2681
2682 stackptr += sizeof(sljit_sw);
2683
2684 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2685 if (type != recurse_copy_shared_to_global)
2686 {
2687 if (!from_sp)
2688 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->control_head_ptr);
2689
2690 if (from_sp || type == recurse_swap_global)
2691 delayed_mem_copy_move(&status, SLJIT_SP, common->control_head_ptr, base_reg, stackptr);
2692 }
2693
2694 stackptr += sizeof(sljit_sw);
2695 #endif
2696
2697 while (cc < ccend)
2698 {
2699 private_count = 0;
2700 shared_count = 0;
2701 kept_shared_count = 0;
2702
2703 switch(*cc)
2704 {
2705 case OP_SET_SOM:
2706 SLJIT_ASSERT(common->has_set_som);
2707 if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, OVECTOR(0)))
2708 {
2709 kept_shared_srcw[0] = OVECTOR(0);
2710 kept_shared_count = 1;
2711 }
2712 cc += 1;
2713 break;
2714
2715 case OP_RECURSE:
2716 if (recurse_flags & recurse_flag_quit_found)
2717 {
2718 if (common->has_set_som && recurse_check_bit(common, OVECTOR(0)))
2719 {
2720 kept_shared_srcw[0] = OVECTOR(0);
2721 kept_shared_count = 1;
2722 }
2723 if (common->mark_ptr != 0 && recurse_check_bit(common, common->mark_ptr))
2724 {
2725 kept_shared_srcw[kept_shared_count] = common->mark_ptr;
2726 kept_shared_count++;
2727 }
2728 }
2729 if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2730 {
2731 shared_srcw[0] = common->capture_last_ptr;
2732 shared_count = 1;
2733 }
2734 cc += 1 + LINK_SIZE;
2735 break;
2736
2737 case OP_KET:
2738 private_srcw[0] = PRIVATE_DATA(cc);
2739 if (private_srcw[0] != 0)
2740 {
2741 if (recurse_check_bit(common, private_srcw[0]))
2742 private_count = 1;
2743 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2744 cc += PRIVATE_DATA(cc + 1);
2745 }
2746 cc += 1 + LINK_SIZE;
2747 break;
2748
2749 case OP_ASSERT:
2750 case OP_ASSERT_NOT:
2751 case OP_ASSERTBACK:
2752 case OP_ASSERTBACK_NOT:
2753 case OP_ASSERT_NA:
2754 case OP_ASSERTBACK_NA:
2755 case OP_ONCE:
2756 case OP_SCRIPT_RUN:
2757 case OP_BRAPOS:
2758 case OP_SBRA:
2759 case OP_SBRAPOS:
2760 case OP_SCOND:
2761 private_srcw[0] = PRIVATE_DATA(cc);
2762 if (recurse_check_bit(common, private_srcw[0]))
2763 private_count = 1;
2764 cc += 1 + LINK_SIZE;
2765 break;
2766
2767 case OP_CBRA:
2768 case OP_SCBRA:
2769 offset = GET2(cc, 1 + LINK_SIZE);
2770 shared_srcw[0] = OVECTOR(offset << 1);
2771 if (recurse_check_bit(common, shared_srcw[0]))
2772 {
2773 shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
2774 SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
2775 shared_count = 2;
2776 }
2777
2778 if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2779 {
2780 shared_srcw[shared_count] = common->capture_last_ptr;
2781 shared_count++;
2782 }
2783
2784 if (common->optimized_cbracket[offset] == 0)
2785 {
2786 private_srcw[0] = OVECTOR_PRIV(offset);
2787 if (recurse_check_bit(common, private_srcw[0]))
2788 private_count = 1;
2789 }
2790
2791 cc += 1 + LINK_SIZE + IMM2_SIZE;
2792 break;
2793
2794 case OP_CBRAPOS:
2795 case OP_SCBRAPOS:
2796 offset = GET2(cc, 1 + LINK_SIZE);
2797 shared_srcw[0] = OVECTOR(offset << 1);
2798 if (recurse_check_bit(common, shared_srcw[0]))
2799 {
2800 shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
2801 SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
2802 shared_count = 2;
2803 }
2804
2805 if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2806 {
2807 shared_srcw[shared_count] = common->capture_last_ptr;
2808 shared_count++;
2809 }
2810
2811 private_srcw[0] = PRIVATE_DATA(cc);
2812 if (recurse_check_bit(common, private_srcw[0]))
2813 private_count = 1;
2814
2815 offset = OVECTOR_PRIV(offset);
2816 if (recurse_check_bit(common, offset))
2817 {
2818 private_srcw[private_count] = offset;
2819 private_count++;
2820 }
2821 cc += 1 + LINK_SIZE + IMM2_SIZE;
2822 break;
2823
2824 case OP_COND:
2825 /* Might be a hidden SCOND. */
2826 alternative = cc + GET(cc, 1);
2827 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2828 {
2829 private_srcw[0] = PRIVATE_DATA(cc);
2830 if (recurse_check_bit(common, private_srcw[0]))
2831 private_count = 1;
2832 }
2833 cc += 1 + LINK_SIZE;
2834 break;
2835
2836 CASE_ITERATOR_PRIVATE_DATA_1
2837 private_srcw[0] = PRIVATE_DATA(cc);
2838 if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2839 private_count = 1;
2840 cc += 2;
2841 #ifdef SUPPORT_UNICODE
2842 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2843 #endif
2844 break;
2845
2846 CASE_ITERATOR_PRIVATE_DATA_2A
2847 private_srcw[0] = PRIVATE_DATA(cc);
2848 if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2849 {
2850 private_count = 2;
2851 private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2852 SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2853 }
2854 cc += 2;
2855 #ifdef SUPPORT_UNICODE
2856 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2857 #endif
2858 break;
2859
2860 CASE_ITERATOR_PRIVATE_DATA_2B
2861 private_srcw[0] = PRIVATE_DATA(cc);
2862 if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2863 {
2864 private_count = 2;
2865 private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2866 SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2867 }
2868 cc += 2 + IMM2_SIZE;
2869 #ifdef SUPPORT_UNICODE
2870 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2871 #endif
2872 break;
2873
2874 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2875 private_srcw[0] = PRIVATE_DATA(cc);
2876 if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2877 private_count = 1;
2878 cc += 1;
2879 break;
2880
2881 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2882 private_srcw[0] = PRIVATE_DATA(cc);
2883 if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2884 {
2885 private_count = 2;
2886 private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2887 SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2888 }
2889 cc += 1;
2890 break;
2891
2892 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2893 private_srcw[0] = PRIVATE_DATA(cc);
2894 if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2895 {
2896 private_count = 2;
2897 private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2898 SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2899 }
2900 cc += 1 + IMM2_SIZE;
2901 break;
2902
2903 case OP_CLASS:
2904 case OP_NCLASS:
2905 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2906 case OP_XCLASS:
2907 i = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2908 #else
2909 i = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2910 #endif
2911 if (PRIVATE_DATA(cc) != 0)
2912 {
2913 private_count = 1;
2914 private_srcw[0] = PRIVATE_DATA(cc);
2915 switch(get_class_iterator_size(cc + i))
2916 {
2917 case 1:
2918 break;
2919
2920 case 2:
2921 if (recurse_check_bit(common, private_srcw[0]))
2922 {
2923 private_count = 2;
2924 private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2925 SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2926 }
2927 break;
2928
2929 default:
2930 SLJIT_UNREACHABLE();
2931 break;
2932 }
2933 }
2934 cc += i;
2935 break;
2936
2937 case OP_MARK:
2938 case OP_COMMIT_ARG:
2939 case OP_PRUNE_ARG:
2940 case OP_THEN_ARG:
2941 SLJIT_ASSERT(common->mark_ptr != 0);
2942 if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, common->mark_ptr))
2943 {
2944 kept_shared_srcw[0] = common->mark_ptr;
2945 kept_shared_count = 1;
2946 }
2947 if (common->control_head_ptr != 0 && recurse_check_bit(common, common->control_head_ptr))
2948 {
2949 private_srcw[0] = common->control_head_ptr;
2950 private_count = 1;
2951 }
2952 cc += 1 + 2 + cc[1];
2953 break;
2954
2955 case OP_THEN:
2956 SLJIT_ASSERT(common->control_head_ptr != 0);
2957 if (recurse_check_bit(common, common->control_head_ptr))
2958 {
2959 private_srcw[0] = common->control_head_ptr;
2960 private_count = 1;
2961 }
2962 cc++;
2963 break;
2964
2965 default:
2966 cc = next_opcode(common, cc);
2967 SLJIT_ASSERT(cc != NULL);
2968 continue;
2969 }
2970
2971 if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2972 {
2973 SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2974
2975 for (i = 0; i < private_count; i++)
2976 {
2977 SLJIT_ASSERT(private_srcw[i] != 0);
2978
2979 if (!from_sp)
2980 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, private_srcw[i]);
2981
2982 if (from_sp || type == recurse_swap_global)
2983 delayed_mem_copy_move(&status, SLJIT_SP, private_srcw[i], base_reg, stackptr);
2984
2985 stackptr += sizeof(sljit_sw);
2986 }
2987 }
2988 else
2989 stackptr += sizeof(sljit_sw) * private_count;
2990
2991 if (type != recurse_copy_private_to_global && type != recurse_copy_kept_shared_to_global)
2992 {
2993 SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_swap_global);
2994
2995 for (i = 0; i < shared_count; i++)
2996 {
2997 SLJIT_ASSERT(shared_srcw[i] != 0);
2998
2999 if (!from_sp)
3000 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, shared_srcw[i]);
3001
3002 if (from_sp || type == recurse_swap_global)
3003 delayed_mem_copy_move(&status, SLJIT_SP, shared_srcw[i], base_reg, stackptr);
3004
3005 stackptr += sizeof(sljit_sw);
3006 }
3007 }
3008 else
3009 stackptr += sizeof(sljit_sw) * shared_count;
3010
3011 if (type != recurse_copy_private_to_global && type != recurse_swap_global)
3012 {
3013 SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_copy_kept_shared_to_global);
3014
3015 for (i = 0; i < kept_shared_count; i++)
3016 {
3017 SLJIT_ASSERT(kept_shared_srcw[i] != 0);
3018
3019 if (!from_sp)
3020 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, kept_shared_srcw[i]);
3021
3022 if (from_sp || type == recurse_swap_global)
3023 delayed_mem_copy_move(&status, SLJIT_SP, kept_shared_srcw[i], base_reg, stackptr);
3024
3025 stackptr += sizeof(sljit_sw);
3026 }
3027 }
3028 else
3029 stackptr += sizeof(sljit_sw) * kept_shared_count;
3030 }
3031
3032 SLJIT_ASSERT(cc == ccend && stackptr == stacktop);
3033
3034 delayed_mem_copy_finish(&status);
3035 }
3036
set_then_offsets(compiler_common * common,PCRE2_SPTR cc,sljit_u8 * current_offset)3037 static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_SPTR cc, sljit_u8 *current_offset)
3038 {
3039 PCRE2_SPTR end = bracketend(cc);
3040 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
3041
3042 /* Assert captures then. */
3043 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA)
3044 current_offset = NULL;
3045 /* Conditional block does not. */
3046 if (*cc == OP_COND || *cc == OP_SCOND)
3047 has_alternatives = FALSE;
3048
3049 cc = next_opcode(common, cc);
3050 if (has_alternatives)
3051 current_offset = common->then_offsets + (cc - common->start);
3052
3053 while (cc < end)
3054 {
3055 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
3056 cc = set_then_offsets(common, cc, current_offset);
3057 else
3058 {
3059 if (*cc == OP_ALT && has_alternatives)
3060 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
3061 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
3062 *current_offset = 1;
3063 cc = next_opcode(common, cc);
3064 }
3065 }
3066
3067 return end;
3068 }
3069
3070 #undef CASE_ITERATOR_PRIVATE_DATA_1
3071 #undef CASE_ITERATOR_PRIVATE_DATA_2A
3072 #undef CASE_ITERATOR_PRIVATE_DATA_2B
3073 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
3074 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
3075 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
3076
is_powerof2(unsigned int value)3077 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
3078 {
3079 return (value & (value - 1)) == 0;
3080 }
3081
set_jumps(jump_list * list,struct sljit_label * label)3082 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
3083 {
3084 while (list)
3085 {
3086 /* sljit_set_label is clever enough to do nothing
3087 if either the jump or the label is NULL. */
3088 SET_LABEL(list->jump, label);
3089 list = list->next;
3090 }
3091 }
3092
add_jump(struct sljit_compiler * compiler,jump_list ** list,struct sljit_jump * jump)3093 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
3094 {
3095 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
3096 if (list_item)
3097 {
3098 list_item->next = *list;
3099 list_item->jump = jump;
3100 *list = list_item;
3101 }
3102 }
3103
add_stub(compiler_common * common,struct sljit_jump * start)3104 static void add_stub(compiler_common *common, struct sljit_jump *start)
3105 {
3106 DEFINE_COMPILER;
3107 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
3108
3109 if (list_item)
3110 {
3111 list_item->start = start;
3112 list_item->quit = LABEL();
3113 list_item->next = common->stubs;
3114 common->stubs = list_item;
3115 }
3116 }
3117
flush_stubs(compiler_common * common)3118 static void flush_stubs(compiler_common *common)
3119 {
3120 DEFINE_COMPILER;
3121 stub_list *list_item = common->stubs;
3122
3123 while (list_item)
3124 {
3125 JUMPHERE(list_item->start);
3126 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
3127 JUMPTO(SLJIT_JUMP, list_item->quit);
3128 list_item = list_item->next;
3129 }
3130 common->stubs = NULL;
3131 }
3132
count_match(compiler_common * common)3133 static SLJIT_INLINE void count_match(compiler_common *common)
3134 {
3135 DEFINE_COMPILER;
3136
3137 OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
3138 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
3139 }
3140
allocate_stack(compiler_common * common,int size)3141 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
3142 {
3143 /* May destroy all locals and registers except TMP2. */
3144 DEFINE_COMPILER;
3145
3146 SLJIT_ASSERT(size > 0);
3147 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
3148 #ifdef DESTROY_REGISTERS
3149 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
3150 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3151 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3152 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
3153 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
3154 #endif
3155 add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
3156 }
3157
free_stack(compiler_common * common,int size)3158 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
3159 {
3160 DEFINE_COMPILER;
3161
3162 SLJIT_ASSERT(size > 0);
3163 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
3164 }
3165
allocate_read_only_data(compiler_common * common,sljit_uw size)3166 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
3167 {
3168 DEFINE_COMPILER;
3169 sljit_uw *result;
3170
3171 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3172 return NULL;
3173
3174 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
3175 if (SLJIT_UNLIKELY(result == NULL))
3176 {
3177 sljit_set_compiler_memory_error(compiler);
3178 return NULL;
3179 }
3180
3181 *(void**)result = common->read_only_data_head;
3182 common->read_only_data_head = (void *)result;
3183 return result + 1;
3184 }
3185
reset_ovector(compiler_common * common,int length)3186 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
3187 {
3188 DEFINE_COMPILER;
3189 struct sljit_label *loop;
3190 sljit_s32 i;
3191
3192 /* At this point we can freely use all temporary registers. */
3193 SLJIT_ASSERT(length > 1);
3194 /* TMP1 returns with begin - 1. */
3195 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
3196 if (length < 8)
3197 {
3198 for (i = 1; i < length; i++)
3199 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
3200 }
3201 else
3202 {
3203 if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3204 {
3205 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
3206 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3207 loop = LABEL();
3208 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
3209 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3210 JUMPTO(SLJIT_NOT_ZERO, loop);
3211 }
3212 else
3213 {
3214 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));
3215 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3216 loop = LABEL();
3217 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);
3218 OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));
3219 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3220 JUMPTO(SLJIT_NOT_ZERO, loop);
3221 }
3222 }
3223 }
3224
reset_early_fail(compiler_common * common)3225 static SLJIT_INLINE void reset_early_fail(compiler_common *common)
3226 {
3227 DEFINE_COMPILER;
3228 sljit_u32 size = (sljit_u32)(common->early_fail_end_ptr - common->early_fail_start_ptr);
3229 sljit_u32 uncleared_size;
3230 sljit_s32 src = SLJIT_IMM;
3231 sljit_s32 i;
3232 struct sljit_label *loop;
3233
3234 SLJIT_ASSERT(common->early_fail_start_ptr < common->early_fail_end_ptr);
3235
3236 if (size == sizeof(sljit_sw))
3237 {
3238 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->early_fail_start_ptr, SLJIT_IMM, 0);
3239 return;
3240 }
3241
3242 if (sljit_get_register_index(TMP3) >= 0 && !sljit_has_cpu_feature(SLJIT_HAS_ZERO_REGISTER))
3243 {
3244 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
3245 src = TMP3;
3246 }
3247
3248 if (size <= 6 * sizeof(sljit_sw))
3249 {
3250 for (i = common->early_fail_start_ptr; i < common->early_fail_end_ptr; i += sizeof(sljit_sw))
3251 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, src, 0);
3252 return;
3253 }
3254
3255 GET_LOCAL_BASE(TMP1, 0, common->early_fail_start_ptr);
3256
3257 uncleared_size = ((size / sizeof(sljit_sw)) % 3) * sizeof(sljit_sw);
3258
3259 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, size - uncleared_size);
3260
3261 loop = LABEL();
3262 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3263 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3264 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * (sljit_sw)sizeof(sljit_sw), src, 0);
3265 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * (sljit_sw)sizeof(sljit_sw), src, 0);
3266 CMPTO(SLJIT_LESS, TMP1, 0, TMP2, 0, loop);
3267
3268 if (uncleared_size >= sizeof(sljit_sw))
3269 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3270
3271 if (uncleared_size >= 2 * sizeof(sljit_sw))
3272 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), sizeof(sljit_sw), src, 0);
3273 }
3274
do_reset_match(compiler_common * common,int length)3275 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
3276 {
3277 DEFINE_COMPILER;
3278 struct sljit_label *loop;
3279 int i;
3280
3281 SLJIT_ASSERT(length > 1);
3282 /* OVECTOR(1) contains the "string begin - 1" constant. */
3283 if (length > 2)
3284 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3285 if (length < 8)
3286 {
3287 for (i = 2; i < length; i++)
3288 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
3289 }
3290 else
3291 {
3292 if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3293 {
3294 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
3295 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3296 loop = LABEL();
3297 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
3298 OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3299 JUMPTO(SLJIT_NOT_ZERO, loop);
3300 }
3301 else
3302 {
3303 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));
3304 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3305 loop = LABEL();
3306 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
3307 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));
3308 OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3309 JUMPTO(SLJIT_NOT_ZERO, loop);
3310 }
3311 }
3312
3313 if (!HAS_VIRTUAL_REGISTERS)
3314 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, stack));
3315 else
3316 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
3317
3318 if (common->mark_ptr != 0)
3319 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
3320 if (common->control_head_ptr != 0)
3321 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
3322 if (HAS_VIRTUAL_REGISTERS)
3323 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
3324
3325 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3326 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));
3327 }
3328
do_search_mark(sljit_sw * current,PCRE2_SPTR skip_arg)3329 static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg)
3330 {
3331 while (current != NULL)
3332 {
3333 switch (current[1])
3334 {
3335 case type_then_trap:
3336 break;
3337
3338 case type_mark:
3339 if (PRIV(strcmp)(skip_arg, (PCRE2_SPTR)current[2]) == 0)
3340 return current[3];
3341 break;
3342
3343 default:
3344 SLJIT_UNREACHABLE();
3345 break;
3346 }
3347 SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
3348 current = (sljit_sw*)current[0];
3349 }
3350 return 0;
3351 }
3352
copy_ovector(compiler_common * common,int topbracket)3353 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
3354 {
3355 DEFINE_COMPILER;
3356 struct sljit_label *loop;
3357 BOOL has_pre;
3358
3359 /* At this point we can freely use all registers. */
3360 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3361 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
3362
3363 if (HAS_VIRTUAL_REGISTERS)
3364 {
3365 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
3366 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3367 if (common->mark_ptr != 0)
3368 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3369 OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount));
3370 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3371 if (common->mark_ptr != 0)
3372 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
3373 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, match_data),
3374 SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3375 }
3376 else
3377 {
3378 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3379 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, match_data));
3380 if (common->mark_ptr != 0)
3381 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3382 OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, oveccount));
3383 OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3384 if (common->mark_ptr != 0)
3385 OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R0, 0);
3386 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3387 }
3388
3389 has_pre = sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
3390
3391 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
3392 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
3393
3394 loop = LABEL();
3395
3396 if (has_pre)
3397 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
3398 else
3399 {
3400 OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
3401 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
3402 }
3403
3404 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(PCRE2_SIZE));
3405 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);
3406 /* Copy the integer value to the output buffer */
3407 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3408 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
3409 #endif
3410
3411 SLJIT_ASSERT(sizeof(PCRE2_SIZE) == 4 || sizeof(PCRE2_SIZE) == 8);
3412 OP1(((sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV), SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);
3413
3414 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3415 JUMPTO(SLJIT_NOT_ZERO, loop);
3416
3417 /* Calculate the return value, which is the maximum ovector value. */
3418 if (topbracket > 1)
3419 {
3420 if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))) == SLJIT_SUCCESS)
3421 {
3422 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
3423 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3424
3425 /* OVECTOR(0) is never equal to SLJIT_S2. */
3426 loop = LABEL();
3427 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
3428 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3429 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3430 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3431 }
3432 else
3433 {
3434 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));
3435 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3436
3437 /* OVECTOR(0) is never equal to SLJIT_S2. */
3438 loop = LABEL();
3439 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
3440 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * (sljit_sw)sizeof(sljit_sw));
3441 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3442 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3443 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3444 }
3445 }
3446 else
3447 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
3448 }
3449
return_with_partial_match(compiler_common * common,struct sljit_label * quit)3450 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
3451 {
3452 DEFINE_COMPILER;
3453 sljit_s32 mov_opcode;
3454 sljit_s32 arguments_reg = !HAS_VIRTUAL_REGISTERS ? ARGUMENTS : SLJIT_R1;
3455
3456 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S0, str_end_must_be_saved_reg0);
3457 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
3458 && (common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start != 0 : common->hit_start == 0));
3459
3460 if (arguments_reg != ARGUMENTS)
3461 OP1(SLJIT_MOV, arguments_reg, 0, ARGUMENTS, 0);
3462 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP),
3463 common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start : common->start_ptr);
3464 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_PARTIAL);
3465
3466 /* Store match begin and end. */
3467 OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, begin));
3468 OP1(SLJIT_MOV, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0);
3469 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, match_data));
3470
3471 mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
3472
3473 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S1, 0);
3474 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3475 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
3476 #endif
3477 OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector), SLJIT_R2, 0);
3478
3479 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_S1, 0);
3480 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3481 OP2(SLJIT_ASHR, STR_END, 0, STR_END, 0, SLJIT_IMM, UCHAR_SHIFT);
3482 #endif
3483 OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector) + sizeof(PCRE2_SIZE), STR_END, 0);
3484
3485 JUMPTO(SLJIT_JUMP, quit);
3486 }
3487
check_start_used_ptr(compiler_common * common)3488 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
3489 {
3490 /* May destroy TMP1. */
3491 DEFINE_COMPILER;
3492 struct sljit_jump *jump;
3493
3494 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3495 {
3496 /* The value of -1 must be kept for start_used_ptr! */
3497 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
3498 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
3499 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
3500 jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
3501 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3502 JUMPHERE(jump);
3503 }
3504 else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3505 {
3506 jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3507 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3508 JUMPHERE(jump);
3509 }
3510 }
3511
char_has_othercase(compiler_common * common,PCRE2_SPTR cc)3512 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR cc)
3513 {
3514 /* Detects if the character has an othercase. */
3515 unsigned int c;
3516
3517 #ifdef SUPPORT_UNICODE
3518 if (common->utf || common->ucp)
3519 {
3520 if (common->utf)
3521 {
3522 GETCHAR(c, cc);
3523 }
3524 else
3525 c = *cc;
3526
3527 if (c > 127)
3528 return c != UCD_OTHERCASE(c);
3529
3530 return common->fcc[c] != c;
3531 }
3532 else
3533 #endif
3534 c = *cc;
3535 return MAX_255(c) ? common->fcc[c] != c : FALSE;
3536 }
3537
char_othercase(compiler_common * common,unsigned int c)3538 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
3539 {
3540 /* Returns with the othercase. */
3541 #ifdef SUPPORT_UNICODE
3542 if ((common->utf || common->ucp) && c > 127)
3543 return UCD_OTHERCASE(c);
3544 #endif
3545 return TABLE_GET(c, common->fcc, c);
3546 }
3547
char_get_othercase_bit(compiler_common * common,PCRE2_SPTR cc)3548 static unsigned int char_get_othercase_bit(compiler_common *common, PCRE2_SPTR cc)
3549 {
3550 /* Detects if the character and its othercase has only 1 bit difference. */
3551 unsigned int c, oc, bit;
3552 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3553 int n;
3554 #endif
3555
3556 #ifdef SUPPORT_UNICODE
3557 if (common->utf || common->ucp)
3558 {
3559 if (common->utf)
3560 {
3561 GETCHAR(c, cc);
3562 }
3563 else
3564 c = *cc;
3565
3566 if (c <= 127)
3567 oc = common->fcc[c];
3568 else
3569 oc = UCD_OTHERCASE(c);
3570 }
3571 else
3572 {
3573 c = *cc;
3574 oc = TABLE_GET(c, common->fcc, c);
3575 }
3576 #else
3577 c = *cc;
3578 oc = TABLE_GET(c, common->fcc, c);
3579 #endif
3580
3581 SLJIT_ASSERT(c != oc);
3582
3583 bit = c ^ oc;
3584 /* Optimized for English alphabet. */
3585 if (c <= 127 && bit == 0x20)
3586 return (0 << 8) | 0x20;
3587
3588 /* Since c != oc, they must have at least 1 bit difference. */
3589 if (!is_powerof2(bit))
3590 return 0;
3591
3592 #if PCRE2_CODE_UNIT_WIDTH == 8
3593
3594 #ifdef SUPPORT_UNICODE
3595 if (common->utf && c > 127)
3596 {
3597 n = GET_EXTRALEN(*cc);
3598 while ((bit & 0x3f) == 0)
3599 {
3600 n--;
3601 bit >>= 6;
3602 }
3603 return (n << 8) | bit;
3604 }
3605 #endif /* SUPPORT_UNICODE */
3606 return (0 << 8) | bit;
3607
3608 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3609
3610 #ifdef SUPPORT_UNICODE
3611 if (common->utf && c > 65535)
3612 {
3613 if (bit >= (1u << 10))
3614 bit >>= 10;
3615 else
3616 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
3617 }
3618 #endif /* SUPPORT_UNICODE */
3619 return (bit < 256) ? ((0u << 8) | bit) : ((1u << 8) | (bit >> 8));
3620
3621 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3622 }
3623
check_partial(compiler_common * common,BOOL force)3624 static void check_partial(compiler_common *common, BOOL force)
3625 {
3626 /* Checks whether a partial matching is occurred. Does not modify registers. */
3627 DEFINE_COMPILER;
3628 struct sljit_jump *jump = NULL;
3629
3630 SLJIT_ASSERT(!force || common->mode != PCRE2_JIT_COMPLETE);
3631
3632 if (common->mode == PCRE2_JIT_COMPLETE)
3633 return;
3634
3635 if (!force && !common->allow_empty_partial)
3636 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3637 else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3638 jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
3639
3640 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3641 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3642 else
3643 {
3644 if (common->partialmatchlabel != NULL)
3645 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3646 else
3647 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3648 }
3649
3650 if (jump != NULL)
3651 JUMPHERE(jump);
3652 }
3653
check_str_end(compiler_common * common,jump_list ** end_reached)3654 static void check_str_end(compiler_common *common, jump_list **end_reached)
3655 {
3656 /* Does not affect registers. Usually used in a tight spot. */
3657 DEFINE_COMPILER;
3658 struct sljit_jump *jump;
3659
3660 if (common->mode == PCRE2_JIT_COMPLETE)
3661 {
3662 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3663 return;
3664 }
3665
3666 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3667 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3668 {
3669 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3670 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3671 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
3672 }
3673 else
3674 {
3675 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3676 if (common->partialmatchlabel != NULL)
3677 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3678 else
3679 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3680 }
3681 JUMPHERE(jump);
3682 }
3683
detect_partial_match(compiler_common * common,jump_list ** backtracks)3684 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
3685 {
3686 DEFINE_COMPILER;
3687 struct sljit_jump *jump;
3688
3689 if (common->mode == PCRE2_JIT_COMPLETE)
3690 {
3691 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3692 return;
3693 }
3694
3695 /* Partial matching mode. */
3696 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3697 if (!common->allow_empty_partial)
3698 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3699 else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3700 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1));
3701
3702 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3703 {
3704 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3705 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3706 }
3707 else
3708 {
3709 if (common->partialmatchlabel != NULL)
3710 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3711 else
3712 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3713 }
3714 JUMPHERE(jump);
3715 }
3716
process_partial_match(compiler_common * common)3717 static void process_partial_match(compiler_common *common)
3718 {
3719 DEFINE_COMPILER;
3720 struct sljit_jump *jump;
3721
3722 /* Partial matching mode. */
3723 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3724 {
3725 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3726 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3727 JUMPHERE(jump);
3728 }
3729 else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3730 {
3731 if (common->partialmatchlabel != NULL)
3732 CMPTO(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0, common->partialmatchlabel);
3733 else
3734 add_jump(compiler, &common->partialmatch, CMP(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3735 }
3736 }
3737
detect_partial_match_to(compiler_common * common,struct sljit_label * label)3738 static void detect_partial_match_to(compiler_common *common, struct sljit_label *label)
3739 {
3740 DEFINE_COMPILER;
3741
3742 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, label);
3743 process_partial_match(common);
3744 }
3745
peek_char(compiler_common * common,sljit_u32 max,sljit_s32 dst,sljit_sw dstw,jump_list ** backtracks)3746 static void peek_char(compiler_common *common, sljit_u32 max, sljit_s32 dst, sljit_sw dstw, jump_list **backtracks)
3747 {
3748 /* Reads the character into TMP1, keeps STR_PTR.
3749 Does not check STR_END. TMP2, dst, RETURN_ADDR Destroyed. */
3750 DEFINE_COMPILER;
3751 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3752 struct sljit_jump *jump;
3753 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3754
3755 SLJIT_UNUSED_ARG(max);
3756 SLJIT_UNUSED_ARG(dst);
3757 SLJIT_UNUSED_ARG(dstw);
3758 SLJIT_UNUSED_ARG(backtracks);
3759
3760 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3761
3762 #ifdef SUPPORT_UNICODE
3763 #if PCRE2_CODE_UNIT_WIDTH == 8
3764 if (common->utf)
3765 {
3766 if (max < 128) return;
3767
3768 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3769 OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3770 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3771 add_jump(compiler, common->invalid_utf ? &common->utfreadchar_invalid : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3772 OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3773 if (backtracks && common->invalid_utf)
3774 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3775 JUMPHERE(jump);
3776 }
3777 #elif PCRE2_CODE_UNIT_WIDTH == 16
3778 if (common->utf)
3779 {
3780 if (max < 0xd800) return;
3781
3782 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3783
3784 if (common->invalid_utf)
3785 {
3786 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3787 OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3788 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3789 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3790 OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3791 if (backtracks && common->invalid_utf)
3792 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3793 }
3794 else
3795 {
3796 /* TMP2 contains the high surrogate. */
3797 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
3798 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3799 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3800 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
3801 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3802 }
3803
3804 JUMPHERE(jump);
3805 }
3806 #elif PCRE2_CODE_UNIT_WIDTH == 32
3807 if (common->invalid_utf)
3808 {
3809 if (max < 0xd800) return;
3810
3811 if (backtracks != NULL)
3812 {
3813 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3814 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3815 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
3816 }
3817 else
3818 {
3819 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3820 OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000);
3821 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
3822 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3823 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
3824 }
3825 }
3826 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3827 #endif /* SUPPORT_UNICODE */
3828 }
3829
peek_char_back(compiler_common * common,sljit_u32 max,jump_list ** backtracks)3830 static void peek_char_back(compiler_common *common, sljit_u32 max, jump_list **backtracks)
3831 {
3832 /* Reads one character back without moving STR_PTR. TMP2 must
3833 contain the start of the subject buffer. Affects TMP1, TMP2, and RETURN_ADDR. */
3834 DEFINE_COMPILER;
3835
3836 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3837 struct sljit_jump *jump;
3838 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3839
3840 SLJIT_UNUSED_ARG(max);
3841 SLJIT_UNUSED_ARG(backtracks);
3842
3843 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3844
3845 #ifdef SUPPORT_UNICODE
3846 #if PCRE2_CODE_UNIT_WIDTH == 8
3847 if (common->utf)
3848 {
3849 if (max < 128) return;
3850
3851 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3852 if (common->invalid_utf)
3853 {
3854 add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3855 if (backtracks != NULL)
3856 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3857 }
3858 else
3859 add_jump(compiler, &common->utfpeakcharback, JUMP(SLJIT_FAST_CALL));
3860 JUMPHERE(jump);
3861 }
3862 #elif PCRE2_CODE_UNIT_WIDTH == 16
3863 if (common->utf)
3864 {
3865 if (max < 0xd800) return;
3866
3867 if (common->invalid_utf)
3868 {
3869 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3870 add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3871 if (backtracks != NULL)
3872 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3873 }
3874 else
3875 {
3876 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3877 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xdc00);
3878 /* TMP2 contains the low surrogate. */
3879 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3880 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
3881 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3882 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
3883 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3884 }
3885 JUMPHERE(jump);
3886 }
3887 #elif PCRE2_CODE_UNIT_WIDTH == 32
3888 if (common->invalid_utf)
3889 {
3890 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3891 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3892 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
3893 }
3894 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3895 #endif /* SUPPORT_UNICODE */
3896 }
3897
3898 #define READ_CHAR_UPDATE_STR_PTR 0x1
3899 #define READ_CHAR_UTF8_NEWLINE 0x2
3900 #define READ_CHAR_NEWLINE (READ_CHAR_UPDATE_STR_PTR | READ_CHAR_UTF8_NEWLINE)
3901 #define READ_CHAR_VALID_UTF 0x4
3902
read_char(compiler_common * common,sljit_u32 min,sljit_u32 max,jump_list ** backtracks,sljit_u32 options)3903 static void read_char(compiler_common *common, sljit_u32 min, sljit_u32 max,
3904 jump_list **backtracks, sljit_u32 options)
3905 {
3906 /* Reads the precise value of a character into TMP1, if the character is
3907 between min and max (c >= min && c <= max). Otherwise it returns with a value
3908 outside the range. Does not check STR_END. */
3909 DEFINE_COMPILER;
3910 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3911 struct sljit_jump *jump;
3912 #endif
3913 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3914 struct sljit_jump *jump2;
3915 #endif
3916
3917 SLJIT_UNUSED_ARG(min);
3918 SLJIT_UNUSED_ARG(max);
3919 SLJIT_UNUSED_ARG(backtracks);
3920 SLJIT_UNUSED_ARG(options);
3921 SLJIT_ASSERT(min <= max);
3922
3923 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3924 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3925
3926 #ifdef SUPPORT_UNICODE
3927 #if PCRE2_CODE_UNIT_WIDTH == 8
3928 if (common->utf)
3929 {
3930 if (max < 128 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
3931
3932 if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
3933 {
3934 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3935
3936 if (options & READ_CHAR_UTF8_NEWLINE)
3937 add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
3938 else
3939 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3940
3941 if (backtracks != NULL)
3942 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3943 JUMPHERE(jump);
3944 return;
3945 }
3946
3947 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3948 if (min >= 0x10000)
3949 {
3950 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
3951 if (options & READ_CHAR_UPDATE_STR_PTR)
3952 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3953 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3954 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
3955 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3956 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3957 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3958 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3959 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3960 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3961 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3962 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
3963 if (!(options & READ_CHAR_UPDATE_STR_PTR))
3964 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
3965 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3966 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3967 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3968 JUMPHERE(jump2);
3969 if (options & READ_CHAR_UPDATE_STR_PTR)
3970 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3971 }
3972 else if (min >= 0x800 && max <= 0xffff)
3973 {
3974 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
3975 if (options & READ_CHAR_UPDATE_STR_PTR)
3976 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3977 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3978 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
3979 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3980 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3981 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3982 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3983 if (!(options & READ_CHAR_UPDATE_STR_PTR))
3984 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3985 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3986 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3987 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3988 JUMPHERE(jump2);
3989 if (options & READ_CHAR_UPDATE_STR_PTR)
3990 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3991 }
3992 else if (max >= 0x800)
3993 {
3994 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3995 }
3996 else if (max < 128)
3997 {
3998 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3999 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4000 }
4001 else
4002 {
4003 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4004 if (!(options & READ_CHAR_UPDATE_STR_PTR))
4005 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4006 else
4007 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4008 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4009 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4010 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4011 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4012 if (options & READ_CHAR_UPDATE_STR_PTR)
4013 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
4014 }
4015 JUMPHERE(jump);
4016 }
4017 #elif PCRE2_CODE_UNIT_WIDTH == 16
4018 if (common->utf)
4019 {
4020 if (max < 0xd800 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
4021
4022 if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
4023 {
4024 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4025 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4026
4027 if (options & READ_CHAR_UTF8_NEWLINE)
4028 add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
4029 else
4030 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4031
4032 if (backtracks != NULL)
4033 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4034 JUMPHERE(jump);
4035 return;
4036 }
4037
4038 if (max >= 0x10000)
4039 {
4040 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4041 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
4042 /* TMP2 contains the high surrogate. */
4043 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4044 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
4045 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4046 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
4047 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4048 JUMPHERE(jump);
4049 return;
4050 }
4051
4052 /* Skip low surrogate if necessary. */
4053 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4054
4055 if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4056 {
4057 if (options & READ_CHAR_UPDATE_STR_PTR)
4058 OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4059 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
4060 if (options & READ_CHAR_UPDATE_STR_PTR)
4061 CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0);
4062 if (max >= 0xd800)
4063 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, 0x10000);
4064 }
4065 else
4066 {
4067 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4068 if (options & READ_CHAR_UPDATE_STR_PTR)
4069 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4070 if (max >= 0xd800)
4071 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
4072 JUMPHERE(jump);
4073 }
4074 }
4075 #elif PCRE2_CODE_UNIT_WIDTH == 32
4076 if (common->invalid_utf)
4077 {
4078 if (backtracks != NULL)
4079 {
4080 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4081 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4082 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
4083 }
4084 else
4085 {
4086 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4087 OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000);
4088 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4089 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4090 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4091 }
4092 }
4093 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4094 #endif /* SUPPORT_UNICODE */
4095 }
4096
4097 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4098
is_char7_bitset(const sljit_u8 * bitset,BOOL nclass)4099 static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
4100 {
4101 /* Tells whether the character codes below 128 are enough
4102 to determine a match. */
4103 const sljit_u8 value = nclass ? 0xff : 0;
4104 const sljit_u8 *end = bitset + 32;
4105
4106 bitset += 16;
4107 do
4108 {
4109 if (*bitset++ != value)
4110 return FALSE;
4111 }
4112 while (bitset < end);
4113 return TRUE;
4114 }
4115
read_char7_type(compiler_common * common,jump_list ** backtracks,BOOL negated)4116 static void read_char7_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4117 {
4118 /* Reads the precise character type of a character into TMP1, if the character
4119 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
4120 full_read argument tells whether characters above max are accepted or not. */
4121 DEFINE_COMPILER;
4122 struct sljit_jump *jump;
4123
4124 SLJIT_ASSERT(common->utf);
4125
4126 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4127 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4128
4129 /* All values > 127 are zero in ctypes. */
4130 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4131
4132 if (negated)
4133 {
4134 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4135
4136 if (common->invalid_utf)
4137 {
4138 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4139 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4140 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4141 }
4142 else
4143 {
4144 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4145 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4146 }
4147 JUMPHERE(jump);
4148 }
4149 }
4150
4151 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4152
read_char8_type(compiler_common * common,jump_list ** backtracks,BOOL negated)4153 static void read_char8_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4154 {
4155 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
4156 DEFINE_COMPILER;
4157 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
4158 struct sljit_jump *jump;
4159 #endif
4160 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4161 struct sljit_jump *jump2;
4162 #endif
4163
4164 SLJIT_UNUSED_ARG(backtracks);
4165 SLJIT_UNUSED_ARG(negated);
4166
4167 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4168 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4169
4170 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4171 if (common->utf)
4172 {
4173 /* The result of this read may be unused, but saves an "else" part. */
4174 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4175 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4176
4177 if (!negated)
4178 {
4179 if (common->invalid_utf)
4180 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4181
4182 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4183 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4184 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4185 if (common->invalid_utf)
4186 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe0 - 0xc2));
4187
4188 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4189 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4190 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4191 if (common->invalid_utf)
4192 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40));
4193
4194 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4195 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4196 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4197 JUMPHERE(jump2);
4198 }
4199 else if (common->invalid_utf)
4200 {
4201 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4202 OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
4203 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4204
4205 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4206 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4207 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4208 JUMPHERE(jump2);
4209 }
4210 else
4211 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
4212
4213 JUMPHERE(jump);
4214 return;
4215 }
4216 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4217
4218 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32
4219 if (common->invalid_utf && negated)
4220 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x110000));
4221 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32 */
4222
4223 #if PCRE2_CODE_UNIT_WIDTH != 8
4224 /* The ctypes array contains only 256 values. */
4225 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4226 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4227 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4228 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4229 #if PCRE2_CODE_UNIT_WIDTH != 8
4230 JUMPHERE(jump);
4231 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4232
4233 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
4234 if (common->utf && negated)
4235 {
4236 /* Skip low surrogate if necessary. */
4237 if (!common->invalid_utf)
4238 {
4239 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4240
4241 if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4242 {
4243 OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4244 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
4245 CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0);
4246 }
4247 else
4248 {
4249 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4250 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4251 JUMPHERE(jump);
4252 }
4253 return;
4254 }
4255
4256 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4257 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4258 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4259 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4260
4261 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4262 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4263 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
4264 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4265
4266 JUMPHERE(jump);
4267 return;
4268 }
4269 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 */
4270 }
4271
move_back(compiler_common * common,jump_list ** backtracks,BOOL must_be_valid)4272 static void move_back(compiler_common *common, jump_list **backtracks, BOOL must_be_valid)
4273 {
4274 /* Goes one character back. Affects STR_PTR and TMP1. If must_be_valid is TRUE,
4275 TMP2 is not used. Otherwise TMP2 must contain the start of the subject buffer,
4276 and it is destroyed. Does not modify STR_PTR for invalid character sequences. */
4277 DEFINE_COMPILER;
4278
4279 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4280 struct sljit_jump *jump;
4281 #endif
4282
4283 #ifdef SUPPORT_UNICODE
4284 #if PCRE2_CODE_UNIT_WIDTH == 8
4285 struct sljit_label *label;
4286
4287 if (common->utf)
4288 {
4289 if (!must_be_valid && common->invalid_utf)
4290 {
4291 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4292 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4293 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4294 add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4295 if (backtracks != NULL)
4296 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4297 JUMPHERE(jump);
4298 return;
4299 }
4300
4301 label = LABEL();
4302 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4303 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4304 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4305 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
4306 return;
4307 }
4308 #elif PCRE2_CODE_UNIT_WIDTH == 16
4309 if (common->utf)
4310 {
4311 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4312 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4313
4314 if (!must_be_valid && common->invalid_utf)
4315 {
4316 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4317 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0xd800);
4318 add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4319 if (backtracks != NULL)
4320 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4321 JUMPHERE(jump);
4322 return;
4323 }
4324
4325 /* Skip low surrogate if necessary. */
4326 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4327 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xdc00);
4328 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4329 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4330 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4331 return;
4332 }
4333 #elif PCRE2_CODE_UNIT_WIDTH == 32
4334 if (common->invalid_utf && !must_be_valid)
4335 {
4336 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4337 if (backtracks != NULL)
4338 {
4339 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4340 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4341 return;
4342 }
4343
4344 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x110000);
4345 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
4346 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4347 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4348 return;
4349 }
4350 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4351 #endif /* SUPPORT_UNICODE */
4352
4353 SLJIT_UNUSED_ARG(backtracks);
4354 SLJIT_UNUSED_ARG(must_be_valid);
4355
4356 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4357 }
4358
check_newlinechar(compiler_common * common,int nltype,jump_list ** backtracks,BOOL jumpifmatch)4359 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
4360 {
4361 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
4362 DEFINE_COMPILER;
4363 struct sljit_jump *jump;
4364
4365 if (nltype == NLTYPE_ANY)
4366 {
4367 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4368 sljit_set_current_flags(compiler, SLJIT_SET_Z);
4369 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
4370 }
4371 else if (nltype == NLTYPE_ANYCRLF)
4372 {
4373 if (jumpifmatch)
4374 {
4375 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
4376 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4377 }
4378 else
4379 {
4380 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4381 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4382 JUMPHERE(jump);
4383 }
4384 }
4385 else
4386 {
4387 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
4388 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4389 }
4390 }
4391
4392 #ifdef SUPPORT_UNICODE
4393
4394 #if PCRE2_CODE_UNIT_WIDTH == 8
do_utfreadchar(compiler_common * common)4395 static void do_utfreadchar(compiler_common *common)
4396 {
4397 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
4398 of the character (>= 0xc0). Return char value in TMP1. */
4399 DEFINE_COMPILER;
4400 struct sljit_jump *jump;
4401
4402 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4403 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4404 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4405 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4406 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4407
4408 /* Searching for the first zero. */
4409 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4410 jump = JUMP(SLJIT_NOT_ZERO);
4411 /* Two byte sequence. */
4412 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3000);
4413 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4414 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4415
4416 JUMPHERE(jump);
4417 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4418 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4419 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4420 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4421
4422 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000);
4423 jump = JUMP(SLJIT_NOT_ZERO);
4424 /* Three byte sequence. */
4425 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0000);
4426 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4427 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4428
4429 /* Four byte sequence. */
4430 JUMPHERE(jump);
4431 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4432 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0000);
4433 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4434 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4435 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4436 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4437 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4438 }
4439
do_utfreadtype8(compiler_common * common)4440 static void do_utfreadtype8(compiler_common *common)
4441 {
4442 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
4443 of the character (>= 0xc0). Return value in TMP1. */
4444 DEFINE_COMPILER;
4445 struct sljit_jump *jump;
4446 struct sljit_jump *compare;
4447
4448 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4449
4450 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0x20);
4451 jump = JUMP(SLJIT_NOT_ZERO);
4452 /* Two byte sequence. */
4453 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4454 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4455 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
4456 /* The upper 5 bits are known at this point. */
4457 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
4458 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4459 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4460 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
4461 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4462 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4463
4464 JUMPHERE(compare);
4465 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4466 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4467
4468 /* We only have types for characters less than 256. */
4469 JUMPHERE(jump);
4470 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4471 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4472 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4473 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4474 }
4475
do_utfreadchar_invalid(compiler_common * common)4476 static void do_utfreadchar_invalid(compiler_common *common)
4477 {
4478 /* Slow decoding a UTF-8 character. TMP1 contains the first byte
4479 of the character (>= 0xc0). Return char value in TMP1. STR_PTR is
4480 undefined for invalid characters. */
4481 DEFINE_COMPILER;
4482 sljit_s32 i;
4483 sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4484 struct sljit_jump *jump;
4485 struct sljit_jump *buffer_end_close;
4486 struct sljit_label *three_byte_entry;
4487 struct sljit_label *exit_invalid_label;
4488 struct sljit_jump *exit_invalid[11];
4489
4490 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4491
4492 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc2);
4493
4494 /* Usually more than 3 characters remained in the subject buffer. */
4495 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4496
4497 /* Not a valid start of a multi-byte sequence, no more bytes read. */
4498 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xf5 - 0xc2);
4499
4500 buffer_end_close = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4501
4502 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4503 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4504 /* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4505 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4506 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4507 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4508
4509 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4510 jump = JUMP(SLJIT_NOT_ZERO);
4511
4512 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4513 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4514
4515 JUMPHERE(jump);
4516
4517 /* Three-byte sequence. */
4518 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4519 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4520 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4521 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4522 if (has_cmov)
4523 {
4524 OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4525 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0x20000);
4526 exit_invalid[2] = NULL;
4527 }
4528 else
4529 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4530
4531 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000);
4532 jump = JUMP(SLJIT_NOT_ZERO);
4533
4534 three_byte_entry = LABEL();
4535
4536 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2d800);
4537 if (has_cmov)
4538 {
4539 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4540 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0xd800);
4541 exit_invalid[3] = NULL;
4542 }
4543 else
4544 exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4545 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4546 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4547
4548 if (has_cmov)
4549 {
4550 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4551 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4552 exit_invalid[4] = NULL;
4553 }
4554 else
4555 exit_invalid[4] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4556 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4557
4558 JUMPHERE(jump);
4559
4560 /* Four-byte sequence. */
4561 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4562 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4563 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4564 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4565 if (has_cmov)
4566 {
4567 OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4568 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0);
4569 exit_invalid[5] = NULL;
4570 }
4571 else
4572 exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4573
4574 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc10000);
4575 if (has_cmov)
4576 {
4577 OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4578 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000);
4579 exit_invalid[6] = NULL;
4580 }
4581 else
4582 exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4583
4584 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4585 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4586
4587 JUMPHERE(buffer_end_close);
4588 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4589 exit_invalid[7] = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4590
4591 /* Two-byte sequence. */
4592 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4593 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4594 /* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4595 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4596 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4597 exit_invalid[8] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4598
4599 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4600 jump = JUMP(SLJIT_NOT_ZERO);
4601
4602 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4603
4604 /* Three-byte sequence. */
4605 JUMPHERE(jump);
4606 exit_invalid[9] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4607
4608 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4609 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4610 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4611 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4612 if (has_cmov)
4613 {
4614 OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4615 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4616 exit_invalid[10] = NULL;
4617 }
4618 else
4619 exit_invalid[10] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4620
4621 /* One will be substracted from STR_PTR later. */
4622 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4623
4624 /* Four byte sequences are not possible. */
4625 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x30000, three_byte_entry);
4626
4627 exit_invalid_label = LABEL();
4628 for (i = 0; i < 11; i++)
4629 sljit_set_label(exit_invalid[i], exit_invalid_label);
4630
4631 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4632 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4633 }
4634
do_utfreadnewline_invalid(compiler_common * common)4635 static void do_utfreadnewline_invalid(compiler_common *common)
4636 {
4637 /* Slow decoding a UTF-8 character, specialized for newlines.
4638 TMP1 contains the first byte of the character (>= 0xc0). Return
4639 char value in TMP1. */
4640 DEFINE_COMPILER;
4641 struct sljit_label *loop;
4642 struct sljit_label *skip_start;
4643 struct sljit_label *three_byte_exit;
4644 struct sljit_jump *jump[5];
4645
4646 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4647
4648 if (common->nltype != NLTYPE_ANY)
4649 {
4650 SLJIT_ASSERT(common->nltype != NLTYPE_FIXED || common->newline < 128);
4651
4652 /* All newlines are ascii, just skip intermediate octets. */
4653 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4654 loop = LABEL();
4655 if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)) == SLJIT_SUCCESS)
4656 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4657 else
4658 {
4659 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4660 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4661 }
4662
4663 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4664 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4665 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4666
4667 JUMPHERE(jump[0]);
4668
4669 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4670 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4671 return;
4672 }
4673
4674 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4675 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4676 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4677
4678 jump[1] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xc2);
4679 jump[2] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xe2);
4680
4681 skip_start = LABEL();
4682 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4683 jump[3] = CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80);
4684
4685 /* Skip intermediate octets. */
4686 loop = LABEL();
4687 jump[4] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4688 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4689 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4690 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4691 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4692
4693 JUMPHERE(jump[3]);
4694 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4695
4696 three_byte_exit = LABEL();
4697 JUMPHERE(jump[0]);
4698 JUMPHERE(jump[4]);
4699
4700 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4701 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4702
4703 /* Two byte long newline: 0x85. */
4704 JUMPHERE(jump[1]);
4705 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x85, skip_start);
4706
4707 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x85);
4708 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4709
4710 /* Three byte long newlines: 0x2028 and 0x2029. */
4711 JUMPHERE(jump[2]);
4712 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, skip_start);
4713 CMPTO(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0, three_byte_exit);
4714
4715 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4716 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4717
4718 OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 0x80);
4719 CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40, skip_start);
4720
4721 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0x2000);
4722 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4723 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4724 }
4725
do_utfmoveback_invalid(compiler_common * common)4726 static void do_utfmoveback_invalid(compiler_common *common)
4727 {
4728 /* Goes one character back. */
4729 DEFINE_COMPILER;
4730 sljit_s32 i;
4731 struct sljit_jump *jump;
4732 struct sljit_jump *buffer_start_close;
4733 struct sljit_label *exit_ok_label;
4734 struct sljit_label *exit_invalid_label;
4735 struct sljit_jump *exit_invalid[7];
4736
4737 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4738
4739 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4740 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
4741
4742 /* Two-byte sequence. */
4743 buffer_start_close = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4744
4745 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4746
4747 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4748 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x20);
4749
4750 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4751 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4752 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4753
4754 /* Three-byte sequence. */
4755 JUMPHERE(jump);
4756 exit_invalid[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4757
4758 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4759
4760 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4761 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x10);
4762
4763 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4764 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4765 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4766
4767 /* Four-byte sequence. */
4768 JUMPHERE(jump);
4769 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4770 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40);
4771
4772 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4773 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0);
4774 exit_invalid[3] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x05);
4775
4776 exit_ok_label = LABEL();
4777 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4778 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4779
4780 /* Two-byte sequence. */
4781 JUMPHERE(buffer_start_close);
4782 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4783
4784 exit_invalid[4] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4785
4786 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4787
4788 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4789 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20, exit_ok_label);
4790
4791 /* Three-byte sequence. */
4792 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4793 exit_invalid[5] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4794 exit_invalid[6] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4795
4796 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4797
4798 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4799 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10, exit_ok_label);
4800
4801 /* Four-byte sequences are not possible. */
4802
4803 exit_invalid_label = LABEL();
4804 sljit_set_label(exit_invalid[5], exit_invalid_label);
4805 sljit_set_label(exit_invalid[6], exit_invalid_label);
4806 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4807 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4808 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4809
4810 JUMPHERE(exit_invalid[4]);
4811 /* -2 + 4 = 2 */
4812 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4813
4814 exit_invalid_label = LABEL();
4815 for (i = 0; i < 4; i++)
4816 sljit_set_label(exit_invalid[i], exit_invalid_label);
4817 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4818 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(4));
4819 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4820 }
4821
do_utfpeakcharback(compiler_common * common)4822 static void do_utfpeakcharback(compiler_common *common)
4823 {
4824 /* Peak a character back. Does not modify STR_PTR. */
4825 DEFINE_COMPILER;
4826 struct sljit_jump *jump[2];
4827
4828 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4829
4830 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4831 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4832 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20);
4833
4834 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4835 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4836 jump[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10);
4837
4838 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
4839 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4840 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
4841 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4842 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4843
4844 JUMPHERE(jump[1]);
4845 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4846 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4847 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4848 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4849
4850 JUMPHERE(jump[0]);
4851 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4852 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4853 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4854 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4855
4856 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4857 }
4858
do_utfpeakcharback_invalid(compiler_common * common)4859 static void do_utfpeakcharback_invalid(compiler_common *common)
4860 {
4861 /* Peak a character back. Does not modify STR_PTR. */
4862 DEFINE_COMPILER;
4863 sljit_s32 i;
4864 sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4865 struct sljit_jump *jump[2];
4866 struct sljit_label *two_byte_entry;
4867 struct sljit_label *three_byte_entry;
4868 struct sljit_label *exit_invalid_label;
4869 struct sljit_jump *exit_invalid[8];
4870
4871 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4872
4873 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
4874 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
4875 jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4876
4877 /* Two-byte sequence. */
4878 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4879 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4880 jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x1e);
4881
4882 two_byte_entry = LABEL();
4883 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4884 /* If TMP1 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4885 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4886 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4887
4888 JUMPHERE(jump[1]);
4889 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
4890 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4891 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4892 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4893 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4894
4895 /* Three-byte sequence. */
4896 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4897 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
4898 jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x10);
4899
4900 three_byte_entry = LABEL();
4901 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
4902 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4903
4904 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4905 if (has_cmov)
4906 {
4907 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4908 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, -0xd800);
4909 exit_invalid[2] = NULL;
4910 }
4911 else
4912 exit_invalid[2] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4913
4914 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4915 if (has_cmov)
4916 {
4917 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4918 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4919 exit_invalid[3] = NULL;
4920 }
4921 else
4922 exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4923
4924 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4925
4926 JUMPHERE(jump[1]);
4927 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0 - 0x80);
4928 exit_invalid[4] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4929 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
4930 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4931
4932 /* Four-byte sequence. */
4933 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
4934 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4935 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
4936 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18);
4937 /* ADD is used instead of OR because of the SUB 0x10000 above. */
4938 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4939
4940 if (has_cmov)
4941 {
4942 OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4943 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000);
4944 exit_invalid[5] = NULL;
4945 }
4946 else
4947 exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4948
4949 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4950 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4951
4952 JUMPHERE(jump[0]);
4953 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4954 jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4955
4956 /* Two-byte sequence. */
4957 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4958 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4959 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
4960
4961 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
4962 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4963 exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4964 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4965 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4966
4967 /* Three-byte sequence. */
4968 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4969 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
4970 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x10, three_byte_entry);
4971
4972 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4973 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4974
4975 JUMPHERE(jump[0]);
4976 exit_invalid[7] = CMP(SLJIT_GREATER, TMP2, 0, STR_PTR, 0);
4977
4978 /* Two-byte sequence. */
4979 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4980 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4981 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
4982
4983 exit_invalid_label = LABEL();
4984 for (i = 0; i < 8; i++)
4985 sljit_set_label(exit_invalid[i], exit_invalid_label);
4986
4987 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4988 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4989 }
4990
4991 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
4992
4993 #if PCRE2_CODE_UNIT_WIDTH == 16
4994
do_utfreadchar_invalid(compiler_common * common)4995 static void do_utfreadchar_invalid(compiler_common *common)
4996 {
4997 /* Slow decoding a UTF-16 character. TMP1 contains the first half
4998 of the character (>= 0xd800). Return char value in TMP1. STR_PTR is
4999 undefined for invalid characters. */
5000 DEFINE_COMPILER;
5001 struct sljit_jump *exit_invalid[3];
5002
5003 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5004
5005 /* TMP2 contains the high surrogate. */
5006 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
5007 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5008
5009 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5010 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5011 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5012
5013 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5014 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
5015 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
5016
5017 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5018 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5019
5020 JUMPHERE(exit_invalid[0]);
5021 JUMPHERE(exit_invalid[1]);
5022 JUMPHERE(exit_invalid[2]);
5023 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5024 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5025 }
5026
do_utfreadnewline_invalid(compiler_common * common)5027 static void do_utfreadnewline_invalid(compiler_common *common)
5028 {
5029 /* Slow decoding a UTF-16 character, specialized for newlines.
5030 TMP1 contains the first half of the character (>= 0xd800). Return
5031 char value in TMP1. */
5032
5033 DEFINE_COMPILER;
5034 struct sljit_jump *exit_invalid[2];
5035
5036 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5037
5038 /* TMP2 contains the high surrogate. */
5039 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5040
5041 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5042 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
5043
5044 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
5045 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
5046 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
5047 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
5048 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
5049 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5050
5051 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5052
5053 JUMPHERE(exit_invalid[0]);
5054 JUMPHERE(exit_invalid[1]);
5055 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5056 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5057 }
5058
do_utfmoveback_invalid(compiler_common * common)5059 static void do_utfmoveback_invalid(compiler_common *common)
5060 {
5061 /* Goes one character back. */
5062 DEFINE_COMPILER;
5063 struct sljit_jump *exit_invalid[3];
5064
5065 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5066
5067 exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5068 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5069
5070 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5071 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5072 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
5073
5074 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5075 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
5076 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5077
5078 JUMPHERE(exit_invalid[0]);
5079 JUMPHERE(exit_invalid[1]);
5080 JUMPHERE(exit_invalid[2]);
5081
5082 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5083 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
5084 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5085 }
5086
do_utfpeakcharback_invalid(compiler_common * common)5087 static void do_utfpeakcharback_invalid(compiler_common *common)
5088 {
5089 /* Peak a character back. Does not modify STR_PTR. */
5090 DEFINE_COMPILER;
5091 struct sljit_jump *jump;
5092 struct sljit_jump *exit_invalid[3];
5093
5094 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5095
5096 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000);
5097 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5098 exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
5099 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5100
5101 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5102 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
5103 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
5104 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
5105 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5106 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5107
5108 JUMPHERE(jump);
5109 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5110
5111 JUMPHERE(exit_invalid[0]);
5112 JUMPHERE(exit_invalid[1]);
5113 JUMPHERE(exit_invalid[2]);
5114
5115 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5116 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5117 }
5118
5119 #endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
5120
5121 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
5122 #define UCD_BLOCK_MASK 127
5123 #define UCD_BLOCK_SHIFT 7
5124
do_getucd(compiler_common * common)5125 static void do_getucd(compiler_common *common)
5126 {
5127 /* Search the UCD record for the character comes in TMP1.
5128 Returns chartype in TMP1 and UCD offset in TMP2. */
5129 DEFINE_COMPILER;
5130 #if PCRE2_CODE_UNIT_WIDTH == 32
5131 struct sljit_jump *jump;
5132 #endif
5133
5134 #if defined SLJIT_DEBUG && SLJIT_DEBUG
5135 /* dummy_ucd_record */
5136 const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5137 SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5138 SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5139 #endif
5140
5141 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5142
5143 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5144
5145 #if PCRE2_CODE_UNIT_WIDTH == 32
5146 if (!common->utf)
5147 {
5148 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5149 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5150 JUMPHERE(jump);
5151 }
5152 #endif
5153
5154 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5155 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5156 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5157 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5158 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5159 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5160 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5161 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5162 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5163 }
5164
do_getucdtype(compiler_common * common)5165 static void do_getucdtype(compiler_common *common)
5166 {
5167 /* Search the UCD record for the character comes in TMP1.
5168 Returns chartype in TMP1 and UCD offset in TMP2. */
5169 DEFINE_COMPILER;
5170 #if PCRE2_CODE_UNIT_WIDTH == 32
5171 struct sljit_jump *jump;
5172 #endif
5173
5174 #if defined SLJIT_DEBUG && SLJIT_DEBUG
5175 /* dummy_ucd_record */
5176 const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5177 SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5178 SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5179 #endif
5180
5181 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5182
5183 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5184
5185 #if PCRE2_CODE_UNIT_WIDTH == 32
5186 if (!common->utf)
5187 {
5188 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5189 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5190 JUMPHERE(jump);
5191 }
5192 #endif
5193
5194 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5195 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5196 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5197 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5198 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5199 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5200 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5201 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5202
5203 /* TMP2 is multiplied by 12. Same as (TMP2 << 2) + ((TMP2 << 2) << 1). */
5204 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5205 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
5206 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5207 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 1);
5208
5209 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5210 }
5211
5212 #endif /* SUPPORT_UNICODE */
5213
mainloop_entry(compiler_common * common)5214 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common)
5215 {
5216 DEFINE_COMPILER;
5217 struct sljit_label *mainloop;
5218 struct sljit_label *newlinelabel = NULL;
5219 struct sljit_jump *start;
5220 struct sljit_jump *end = NULL;
5221 struct sljit_jump *end2 = NULL;
5222 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5223 struct sljit_label *loop;
5224 struct sljit_jump *jump;
5225 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5226 jump_list *newline = NULL;
5227 sljit_u32 overall_options = common->re->overall_options;
5228 BOOL hascrorlf = (common->re->flags & PCRE2_HASCRORLF) != 0;
5229 BOOL newlinecheck = FALSE;
5230 BOOL readuchar = FALSE;
5231
5232 if (!(hascrorlf || (overall_options & PCRE2_FIRSTLINE) != 0)
5233 && (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
5234 newlinecheck = TRUE;
5235
5236 SLJIT_ASSERT(common->abort_label == NULL);
5237
5238 if ((overall_options & PCRE2_FIRSTLINE) != 0)
5239 {
5240 /* Search for the end of the first line. */
5241 SLJIT_ASSERT(common->match_end_ptr != 0);
5242 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5243
5244 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5245 {
5246 mainloop = LABEL();
5247 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5248 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5249 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5250 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5251 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
5252 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
5253 JUMPHERE(end);
5254 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5255 }
5256 else
5257 {
5258 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5259 mainloop = LABEL();
5260 /* Continual stores does not cause data dependency. */
5261 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5262 read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
5263 check_newlinechar(common, common->nltype, &newline, TRUE);
5264 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
5265 JUMPHERE(end);
5266 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5267 set_jumps(newline, LABEL());
5268 }
5269
5270 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5271 }
5272 else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0)
5273 {
5274 /* Check whether offset limit is set and valid. */
5275 SLJIT_ASSERT(common->match_end_ptr != 0);
5276
5277 if (HAS_VIRTUAL_REGISTERS)
5278 {
5279 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5280 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5281 }
5282 else
5283 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5284
5285 OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5286 end = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET);
5287 if (HAS_VIRTUAL_REGISTERS)
5288 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5289 else
5290 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
5291
5292 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5293 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5294 #endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5295 if (HAS_VIRTUAL_REGISTERS)
5296 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5297
5298 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
5299 end2 = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5300 OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5301 JUMPHERE(end2);
5302 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
5303 add_jump(compiler, &common->abort, CMP(SLJIT_LESS, TMP2, 0, STR_PTR, 0));
5304 JUMPHERE(end);
5305 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, TMP2, 0);
5306 }
5307
5308 start = JUMP(SLJIT_JUMP);
5309
5310 if (newlinecheck)
5311 {
5312 newlinelabel = LABEL();
5313 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5314 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5315 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5316 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
5317 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
5318 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5319 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5320 #endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5321 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5322 end2 = JUMP(SLJIT_JUMP);
5323 }
5324
5325 mainloop = LABEL();
5326
5327 /* Increasing the STR_PTR here requires one less jump in the most common case. */
5328 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5329 if (common->utf && !common->invalid_utf) readuchar = TRUE;
5330 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5331 if (newlinecheck) readuchar = TRUE;
5332
5333 if (readuchar)
5334 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5335
5336 if (newlinecheck)
5337 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
5338
5339 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5340 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5341 #if PCRE2_CODE_UNIT_WIDTH == 8
5342 if (common->invalid_utf)
5343 {
5344 /* Skip continuation code units. */
5345 loop = LABEL();
5346 jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5347 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5348 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5349 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5350 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x40, loop);
5351 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5352 JUMPHERE(jump);
5353 }
5354 else if (common->utf)
5355 {
5356 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5357 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5358 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5359 JUMPHERE(jump);
5360 }
5361 #elif PCRE2_CODE_UNIT_WIDTH == 16
5362 if (common->invalid_utf)
5363 {
5364 /* Skip continuation code units. */
5365 loop = LABEL();
5366 jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5367 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5368 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5369 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5370 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400, loop);
5371 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5372 JUMPHERE(jump);
5373 }
5374 else if (common->utf)
5375 {
5376 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5377
5378 if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
5379 {
5380 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5381 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5382 CMOV(SLJIT_LESS, STR_PTR, TMP2, 0);
5383 }
5384 else
5385 {
5386 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5387 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
5388 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5389 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5390 }
5391 }
5392 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
5393 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5394 JUMPHERE(start);
5395
5396 if (newlinecheck)
5397 {
5398 JUMPHERE(end);
5399 JUMPHERE(end2);
5400 }
5401
5402 return mainloop;
5403 }
5404
5405
add_prefix_char(PCRE2_UCHAR chr,fast_forward_char_data * chars,BOOL last)5406 static SLJIT_INLINE void add_prefix_char(PCRE2_UCHAR chr, fast_forward_char_data *chars, BOOL last)
5407 {
5408 sljit_u32 i, count = chars->count;
5409
5410 if (count == 255)
5411 return;
5412
5413 if (count == 0)
5414 {
5415 chars->count = 1;
5416 chars->chars[0] = chr;
5417
5418 if (last)
5419 chars->last_count = 1;
5420 return;
5421 }
5422
5423 for (i = 0; i < count; i++)
5424 if (chars->chars[i] == chr)
5425 return;
5426
5427 if (count >= MAX_DIFF_CHARS)
5428 {
5429 chars->count = 255;
5430 return;
5431 }
5432
5433 chars->chars[count] = chr;
5434 chars->count = count + 1;
5435
5436 if (last)
5437 chars->last_count++;
5438 }
5439
scan_prefix(compiler_common * common,PCRE2_SPTR cc,fast_forward_char_data * chars,int max_chars,sljit_u32 * rec_count)5440 static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars, int max_chars, sljit_u32 *rec_count)
5441 {
5442 /* Recursive function, which scans prefix literals. */
5443 BOOL last, any, class, caseless;
5444 int len, repeat, len_save, consumed = 0;
5445 sljit_u32 chr; /* Any unicode character. */
5446 sljit_u8 *bytes, *bytes_end, byte;
5447 PCRE2_SPTR alternative, cc_save, oc;
5448 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5449 PCRE2_UCHAR othercase[4];
5450 #elif defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
5451 PCRE2_UCHAR othercase[2];
5452 #else
5453 PCRE2_UCHAR othercase[1];
5454 #endif
5455
5456 repeat = 1;
5457 while (TRUE)
5458 {
5459 if (*rec_count == 0)
5460 return 0;
5461 (*rec_count)--;
5462
5463 last = TRUE;
5464 any = FALSE;
5465 class = FALSE;
5466 caseless = FALSE;
5467
5468 switch (*cc)
5469 {
5470 case OP_CHARI:
5471 caseless = TRUE;
5472 /* Fall through */
5473 case OP_CHAR:
5474 last = FALSE;
5475 cc++;
5476 break;
5477
5478 case OP_SOD:
5479 case OP_SOM:
5480 case OP_SET_SOM:
5481 case OP_NOT_WORD_BOUNDARY:
5482 case OP_WORD_BOUNDARY:
5483 case OP_EODN:
5484 case OP_EOD:
5485 case OP_CIRC:
5486 case OP_CIRCM:
5487 case OP_DOLL:
5488 case OP_DOLLM:
5489 /* Zero width assertions. */
5490 cc++;
5491 continue;
5492
5493 case OP_ASSERT:
5494 case OP_ASSERT_NOT:
5495 case OP_ASSERTBACK:
5496 case OP_ASSERTBACK_NOT:
5497 case OP_ASSERT_NA:
5498 case OP_ASSERTBACK_NA:
5499 cc = bracketend(cc);
5500 continue;
5501
5502 case OP_PLUSI:
5503 case OP_MINPLUSI:
5504 case OP_POSPLUSI:
5505 caseless = TRUE;
5506 /* Fall through */
5507 case OP_PLUS:
5508 case OP_MINPLUS:
5509 case OP_POSPLUS:
5510 cc++;
5511 break;
5512
5513 case OP_EXACTI:
5514 caseless = TRUE;
5515 /* Fall through */
5516 case OP_EXACT:
5517 repeat = GET2(cc, 1);
5518 last = FALSE;
5519 cc += 1 + IMM2_SIZE;
5520 break;
5521
5522 case OP_QUERYI:
5523 case OP_MINQUERYI:
5524 case OP_POSQUERYI:
5525 caseless = TRUE;
5526 /* Fall through */
5527 case OP_QUERY:
5528 case OP_MINQUERY:
5529 case OP_POSQUERY:
5530 len = 1;
5531 cc++;
5532 #ifdef SUPPORT_UNICODE
5533 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5534 #endif
5535 max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
5536 if (max_chars == 0)
5537 return consumed;
5538 last = FALSE;
5539 break;
5540
5541 case OP_KET:
5542 cc += 1 + LINK_SIZE;
5543 continue;
5544
5545 case OP_ALT:
5546 cc += GET(cc, 1);
5547 continue;
5548
5549 case OP_ONCE:
5550 case OP_BRA:
5551 case OP_BRAPOS:
5552 case OP_CBRA:
5553 case OP_CBRAPOS:
5554 alternative = cc + GET(cc, 1);
5555 while (*alternative == OP_ALT)
5556 {
5557 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
5558 if (max_chars == 0)
5559 return consumed;
5560 alternative += GET(alternative, 1);
5561 }
5562
5563 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
5564 cc += IMM2_SIZE;
5565 cc += 1 + LINK_SIZE;
5566 continue;
5567
5568 case OP_CLASS:
5569 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5570 if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
5571 return consumed;
5572 #endif
5573 class = TRUE;
5574 break;
5575
5576 case OP_NCLASS:
5577 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5578 if (common->utf) return consumed;
5579 #endif
5580 class = TRUE;
5581 break;
5582
5583 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
5584 case OP_XCLASS:
5585 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5586 if (common->utf) return consumed;
5587 #endif
5588 any = TRUE;
5589 cc += GET(cc, 1);
5590 break;
5591 #endif
5592
5593 case OP_DIGIT:
5594 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5595 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
5596 return consumed;
5597 #endif
5598 any = TRUE;
5599 cc++;
5600 break;
5601
5602 case OP_WHITESPACE:
5603 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5604 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
5605 return consumed;
5606 #endif
5607 any = TRUE;
5608 cc++;
5609 break;
5610
5611 case OP_WORDCHAR:
5612 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5613 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
5614 return consumed;
5615 #endif
5616 any = TRUE;
5617 cc++;
5618 break;
5619
5620 case OP_NOT:
5621 case OP_NOTI:
5622 cc++;
5623 /* Fall through. */
5624 case OP_NOT_DIGIT:
5625 case OP_NOT_WHITESPACE:
5626 case OP_NOT_WORDCHAR:
5627 case OP_ANY:
5628 case OP_ALLANY:
5629 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5630 if (common->utf) return consumed;
5631 #endif
5632 any = TRUE;
5633 cc++;
5634 break;
5635
5636 #ifdef SUPPORT_UNICODE
5637 case OP_NOTPROP:
5638 case OP_PROP:
5639 #if PCRE2_CODE_UNIT_WIDTH != 32
5640 if (common->utf) return consumed;
5641 #endif
5642 any = TRUE;
5643 cc += 1 + 2;
5644 break;
5645 #endif
5646
5647 case OP_TYPEEXACT:
5648 repeat = GET2(cc, 1);
5649 cc += 1 + IMM2_SIZE;
5650 continue;
5651
5652 case OP_NOTEXACT:
5653 case OP_NOTEXACTI:
5654 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5655 if (common->utf) return consumed;
5656 #endif
5657 any = TRUE;
5658 repeat = GET2(cc, 1);
5659 cc += 1 + IMM2_SIZE + 1;
5660 break;
5661
5662 default:
5663 return consumed;
5664 }
5665
5666 if (any)
5667 {
5668 do
5669 {
5670 chars->count = 255;
5671
5672 consumed++;
5673 if (--max_chars == 0)
5674 return consumed;
5675 chars++;
5676 }
5677 while (--repeat > 0);
5678
5679 repeat = 1;
5680 continue;
5681 }
5682
5683 if (class)
5684 {
5685 bytes = (sljit_u8*) (cc + 1);
5686 cc += 1 + 32 / sizeof(PCRE2_UCHAR);
5687
5688 switch (*cc)
5689 {
5690 case OP_CRSTAR:
5691 case OP_CRMINSTAR:
5692 case OP_CRPOSSTAR:
5693 case OP_CRQUERY:
5694 case OP_CRMINQUERY:
5695 case OP_CRPOSQUERY:
5696 max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
5697 if (max_chars == 0)
5698 return consumed;
5699 break;
5700
5701 default:
5702 case OP_CRPLUS:
5703 case OP_CRMINPLUS:
5704 case OP_CRPOSPLUS:
5705 break;
5706
5707 case OP_CRRANGE:
5708 case OP_CRMINRANGE:
5709 case OP_CRPOSRANGE:
5710 repeat = GET2(cc, 1);
5711 if (repeat <= 0)
5712 return consumed;
5713 break;
5714 }
5715
5716 do
5717 {
5718 if (bytes[31] & 0x80)
5719 chars->count = 255;
5720 else if (chars->count != 255)
5721 {
5722 bytes_end = bytes + 32;
5723 chr = 0;
5724 do
5725 {
5726 byte = *bytes++;
5727 SLJIT_ASSERT((chr & 0x7) == 0);
5728 if (byte == 0)
5729 chr += 8;
5730 else
5731 {
5732 do
5733 {
5734 if ((byte & 0x1) != 0)
5735 add_prefix_char(chr, chars, TRUE);
5736 byte >>= 1;
5737 chr++;
5738 }
5739 while (byte != 0);
5740 chr = (chr + 7) & ~7;
5741 }
5742 }
5743 while (chars->count != 255 && bytes < bytes_end);
5744 bytes = bytes_end - 32;
5745 }
5746
5747 consumed++;
5748 if (--max_chars == 0)
5749 return consumed;
5750 chars++;
5751 }
5752 while (--repeat > 0);
5753
5754 switch (*cc)
5755 {
5756 case OP_CRSTAR:
5757 case OP_CRMINSTAR:
5758 case OP_CRPOSSTAR:
5759 return consumed;
5760
5761 case OP_CRQUERY:
5762 case OP_CRMINQUERY:
5763 case OP_CRPOSQUERY:
5764 cc++;
5765 break;
5766
5767 case OP_CRRANGE:
5768 case OP_CRMINRANGE:
5769 case OP_CRPOSRANGE:
5770 if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
5771 return consumed;
5772 cc += 1 + 2 * IMM2_SIZE;
5773 break;
5774 }
5775
5776 repeat = 1;
5777 continue;
5778 }
5779
5780 len = 1;
5781 #ifdef SUPPORT_UNICODE
5782 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5783 #endif
5784
5785 if (caseless && char_has_othercase(common, cc))
5786 {
5787 #ifdef SUPPORT_UNICODE
5788 if (common->utf)
5789 {
5790 GETCHAR(chr, cc);
5791 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
5792 return consumed;
5793 }
5794 else
5795 #endif
5796 {
5797 chr = *cc;
5798 #ifdef SUPPORT_UNICODE
5799 if (common->ucp && chr > 127)
5800 othercase[0] = UCD_OTHERCASE(chr);
5801 else
5802 #endif
5803 othercase[0] = TABLE_GET(chr, common->fcc, chr);
5804 }
5805 }
5806 else
5807 {
5808 caseless = FALSE;
5809 othercase[0] = 0; /* Stops compiler warning - PH */
5810 }
5811
5812 len_save = len;
5813 cc_save = cc;
5814 while (TRUE)
5815 {
5816 oc = othercase;
5817 do
5818 {
5819 len--;
5820 consumed++;
5821
5822 chr = *cc;
5823 add_prefix_char(*cc, chars, len == 0);
5824
5825 if (caseless)
5826 add_prefix_char(*oc, chars, len == 0);
5827
5828 if (--max_chars == 0)
5829 return consumed;
5830 chars++;
5831 cc++;
5832 oc++;
5833 }
5834 while (len > 0);
5835
5836 if (--repeat == 0)
5837 break;
5838
5839 len = len_save;
5840 cc = cc_save;
5841 }
5842
5843 repeat = 1;
5844 if (last)
5845 return consumed;
5846 }
5847 }
5848
5849 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
jumpto_if_not_utf_char_start(struct sljit_compiler * compiler,sljit_s32 reg,struct sljit_label * label)5850 static void jumpto_if_not_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg, struct sljit_label *label)
5851 {
5852 #if PCRE2_CODE_UNIT_WIDTH == 8
5853 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
5854 CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0x80, label);
5855 #elif PCRE2_CODE_UNIT_WIDTH == 16
5856 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
5857 CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00, label);
5858 #else
5859 #error "Unknown code width"
5860 #endif
5861 }
5862 #endif
5863
5864 #include "pcre2_jit_simd_inc.h"
5865
5866 #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
5867
check_fast_forward_char_pair_simd(compiler_common * common,fast_forward_char_data * chars,int max)5868 static BOOL check_fast_forward_char_pair_simd(compiler_common *common, fast_forward_char_data *chars, int max)
5869 {
5870 sljit_s32 i, j, max_i = 0, max_j = 0;
5871 sljit_u32 max_pri = 0;
5872 PCRE2_UCHAR a1, a2, a_pri, b1, b2, b_pri;
5873
5874 for (i = max - 1; i >= 1; i--)
5875 {
5876 if (chars[i].last_count > 2)
5877 {
5878 a1 = chars[i].chars[0];
5879 a2 = chars[i].chars[1];
5880 a_pri = chars[i].last_count;
5881
5882 j = i - max_fast_forward_char_pair_offset();
5883 if (j < 0)
5884 j = 0;
5885
5886 while (j < i)
5887 {
5888 b_pri = chars[j].last_count;
5889 if (b_pri > 2 && a_pri + b_pri >= max_pri)
5890 {
5891 b1 = chars[j].chars[0];
5892 b2 = chars[j].chars[1];
5893
5894 if (a1 != b1 && a1 != b2 && a2 != b1 && a2 != b2)
5895 {
5896 max_pri = a_pri + b_pri;
5897 max_i = i;
5898 max_j = j;
5899 }
5900 }
5901 j++;
5902 }
5903 }
5904 }
5905
5906 if (max_pri == 0)
5907 return FALSE;
5908
5909 fast_forward_char_pair_simd(common, max_i, chars[max_i].chars[0], chars[max_i].chars[1], max_j, chars[max_j].chars[0], chars[max_j].chars[1]);
5910 return TRUE;
5911 }
5912
5913 #endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
5914
fast_forward_first_char2(compiler_common * common,PCRE2_UCHAR char1,PCRE2_UCHAR char2,sljit_s32 offset)5915 static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
5916 {
5917 DEFINE_COMPILER;
5918 struct sljit_label *start;
5919 struct sljit_jump *match;
5920 struct sljit_jump *partial_quit;
5921 PCRE2_UCHAR mask;
5922 BOOL has_match_end = (common->match_end_ptr != 0);
5923
5924 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);
5925
5926 if (has_match_end)
5927 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
5928
5929 if (offset > 0)
5930 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
5931
5932 if (has_match_end)
5933 {
5934 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
5935
5936 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
5937 OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
5938 CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
5939 }
5940
5941 #ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
5942
5943 if (JIT_HAS_FAST_FORWARD_CHAR_SIMD)
5944 {
5945 fast_forward_char_simd(common, char1, char2, offset);
5946
5947 if (offset > 0)
5948 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
5949
5950 if (has_match_end)
5951 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
5952 return;
5953 }
5954
5955 #endif
5956
5957 start = LABEL();
5958
5959 partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5960 if (common->mode == PCRE2_JIT_COMPLETE)
5961 add_jump(compiler, &common->failed_match, partial_quit);
5962
5963 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5964 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5965
5966 if (char1 == char2)
5967 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1, start);
5968 else
5969 {
5970 mask = char1 ^ char2;
5971 if (is_powerof2(mask))
5972 {
5973 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
5974 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask, start);
5975 }
5976 else
5977 {
5978 match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
5979 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char2, start);
5980 JUMPHERE(match);
5981 }
5982 }
5983
5984 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5985 if (common->utf && offset > 0)
5986 {
5987 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-(offset + 1)));
5988 jumpto_if_not_utf_char_start(compiler, TMP1, start);
5989 }
5990 #endif
5991
5992 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
5993
5994 if (common->mode != PCRE2_JIT_COMPLETE)
5995 JUMPHERE(partial_quit);
5996
5997 if (has_match_end)
5998 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
5999 }
6000
fast_forward_first_n_chars(compiler_common * common)6001 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
6002 {
6003 DEFINE_COMPILER;
6004 struct sljit_label *start;
6005 struct sljit_jump *match;
6006 fast_forward_char_data chars[MAX_N_CHARS];
6007 sljit_s32 offset;
6008 PCRE2_UCHAR mask;
6009 PCRE2_UCHAR *char_set, *char_set_end;
6010 int i, max, from;
6011 int range_right = -1, range_len;
6012 sljit_u8 *update_table = NULL;
6013 BOOL in_range;
6014 sljit_u32 rec_count;
6015
6016 for (i = 0; i < MAX_N_CHARS; i++)
6017 {
6018 chars[i].count = 0;
6019 chars[i].last_count = 0;
6020 }
6021
6022 rec_count = 10000;
6023 max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
6024
6025 if (max < 1)
6026 return FALSE;
6027
6028 /* Convert last_count to priority. */
6029 for (i = 0; i < max; i++)
6030 {
6031 SLJIT_ASSERT(chars[i].count > 0 && chars[i].last_count <= chars[i].count);
6032
6033 if (chars[i].count == 1)
6034 {
6035 chars[i].last_count = (chars[i].last_count == 1) ? 7 : 5;
6036 /* Simplifies algorithms later. */
6037 chars[i].chars[1] = chars[i].chars[0];
6038 }
6039 else if (chars[i].count == 2)
6040 {
6041 SLJIT_ASSERT(chars[i].chars[0] != chars[i].chars[1]);
6042
6043 if (is_powerof2(chars[i].chars[0] ^ chars[i].chars[1]))
6044 chars[i].last_count = (chars[i].last_count == 2) ? 6 : 4;
6045 else
6046 chars[i].last_count = (chars[i].last_count == 2) ? 3 : 2;
6047 }
6048 else
6049 chars[i].last_count = (chars[i].count == 255) ? 0 : 1;
6050 }
6051
6052 #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6053 if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && check_fast_forward_char_pair_simd(common, chars, max))
6054 return TRUE;
6055 #endif
6056
6057 in_range = FALSE;
6058 /* Prevent compiler "uninitialized" warning */
6059 from = 0;
6060 range_len = 4 /* minimum length */ - 1;
6061 for (i = 0; i <= max; i++)
6062 {
6063 if (in_range && (i - from) > range_len && (chars[i - 1].count < 255))
6064 {
6065 range_len = i - from;
6066 range_right = i - 1;
6067 }
6068
6069 if (i < max && chars[i].count < 255)
6070 {
6071 SLJIT_ASSERT(chars[i].count > 0);
6072 if (!in_range)
6073 {
6074 in_range = TRUE;
6075 from = i;
6076 }
6077 }
6078 else
6079 in_range = FALSE;
6080 }
6081
6082 if (range_right >= 0)
6083 {
6084 update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
6085 if (update_table == NULL)
6086 return TRUE;
6087 memset(update_table, IN_UCHARS(range_len), 256);
6088
6089 for (i = 0; i < range_len; i++)
6090 {
6091 SLJIT_ASSERT(chars[range_right - i].count > 0 && chars[range_right - i].count < 255);
6092
6093 char_set = chars[range_right - i].chars;
6094 char_set_end = char_set + chars[range_right - i].count;
6095 do
6096 {
6097 if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
6098 update_table[(*char_set) & 0xff] = IN_UCHARS(i);
6099 char_set++;
6100 }
6101 while (char_set < char_set_end);
6102 }
6103 }
6104
6105 offset = -1;
6106 /* Scan forward. */
6107 for (i = 0; i < max; i++)
6108 {
6109 if (range_right == i)
6110 continue;
6111
6112 if (offset == -1)
6113 {
6114 if (chars[i].last_count >= 2)
6115 offset = i;
6116 }
6117 else if (chars[offset].last_count < chars[i].last_count)
6118 offset = i;
6119 }
6120
6121 SLJIT_ASSERT(offset == -1 || (chars[offset].count >= 1 && chars[offset].count <= 2));
6122
6123 if (range_right < 0)
6124 {
6125 if (offset < 0)
6126 return FALSE;
6127 /* Works regardless the value is 1 or 2. */
6128 fast_forward_first_char2(common, chars[offset].chars[0], chars[offset].chars[1], offset);
6129 return TRUE;
6130 }
6131
6132 SLJIT_ASSERT(range_right != offset);
6133
6134 if (common->match_end_ptr != 0)
6135 {
6136 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6137 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6138 OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6139 add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6140 OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
6141 CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
6142 }
6143 else
6144 {
6145 OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6146 add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6147 }
6148
6149 SLJIT_ASSERT(range_right >= 0);
6150
6151 if (!HAS_VIRTUAL_REGISTERS)
6152 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
6153
6154 start = LABEL();
6155 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6156
6157 #if PCRE2_CODE_UNIT_WIDTH == 8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
6158 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
6159 #else
6160 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
6161 #endif
6162
6163 if (!HAS_VIRTUAL_REGISTERS)
6164 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
6165 else
6166 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
6167
6168 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6169 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
6170
6171 if (offset >= 0)
6172 {
6173 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
6174 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6175
6176 if (chars[offset].count == 1)
6177 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0], start);
6178 else
6179 {
6180 mask = chars[offset].chars[0] ^ chars[offset].chars[1];
6181 if (is_powerof2(mask))
6182 {
6183 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
6184 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0] | mask, start);
6185 }
6186 else
6187 {
6188 match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0]);
6189 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[1], start);
6190 JUMPHERE(match);
6191 }
6192 }
6193 }
6194
6195 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6196 if (common->utf && offset != 0)
6197 {
6198 if (offset < 0)
6199 {
6200 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6201 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6202 }
6203 else
6204 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6205
6206 jumpto_if_not_utf_char_start(compiler, TMP1, start);
6207
6208 if (offset < 0)
6209 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6210 }
6211 #endif
6212
6213 if (offset >= 0)
6214 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6215
6216 if (common->match_end_ptr != 0)
6217 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6218 else
6219 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6220 return TRUE;
6221 }
6222
fast_forward_first_char(compiler_common * common)6223 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common)
6224 {
6225 PCRE2_UCHAR first_char = (PCRE2_UCHAR)(common->re->first_codeunit);
6226 PCRE2_UCHAR oc;
6227
6228 oc = first_char;
6229 if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0)
6230 {
6231 oc = TABLE_GET(first_char, common->fcc, first_char);
6232 #if defined SUPPORT_UNICODE
6233 if (first_char > 127 && (common->utf || common->ucp))
6234 oc = UCD_OTHERCASE(first_char);
6235 #endif
6236 }
6237
6238 fast_forward_first_char2(common, first_char, oc, 0);
6239 }
6240
fast_forward_newline(compiler_common * common)6241 static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
6242 {
6243 DEFINE_COMPILER;
6244 struct sljit_label *loop;
6245 struct sljit_jump *lastchar = NULL;
6246 struct sljit_jump *firstchar;
6247 struct sljit_jump *quit = NULL;
6248 struct sljit_jump *foundcr = NULL;
6249 struct sljit_jump *notfoundnl;
6250 jump_list *newline = NULL;
6251
6252 if (common->match_end_ptr != 0)
6253 {
6254 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6255 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6256 }
6257
6258 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6259 {
6260 #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6261 if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && common->mode == PCRE2_JIT_COMPLETE)
6262 {
6263 if (HAS_VIRTUAL_REGISTERS)
6264 {
6265 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6266 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6267 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6268 }
6269 else
6270 {
6271 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6272 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6273 }
6274 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6275
6276 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6277 OP2U(SLJIT_SUB | SLJIT_SET_Z, STR_PTR, 0, TMP1, 0);
6278 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_NOT_EQUAL);
6279 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6280 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6281 #endif
6282 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6283
6284 fast_forward_char_pair_simd(common, 1, common->newline & 0xff, common->newline & 0xff, 0, (common->newline >> 8) & 0xff, (common->newline >> 8) & 0xff);
6285 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6286 }
6287 else
6288 #endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
6289 {
6290 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6291 if (HAS_VIRTUAL_REGISTERS)
6292 {
6293 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6294 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6295 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6296 }
6297 else
6298 {
6299 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6300 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6301 }
6302 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6303
6304 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
6305 OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, STR_PTR, 0, TMP1, 0);
6306 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
6307 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6308 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
6309 #endif
6310 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6311
6312 loop = LABEL();
6313 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6314 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6315 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6316 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6317 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
6318 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
6319
6320 JUMPHERE(quit);
6321 JUMPHERE(lastchar);
6322 }
6323
6324 JUMPHERE(firstchar);
6325
6326 if (common->match_end_ptr != 0)
6327 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6328 return;
6329 }
6330
6331 if (HAS_VIRTUAL_REGISTERS)
6332 {
6333 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6334 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6335 }
6336 else
6337 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6338
6339 /* Example: match /^/ to \r\n from offset 1. */
6340 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6341
6342 if (common->nltype == NLTYPE_ANY)
6343 move_back(common, NULL, FALSE);
6344 else
6345 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6346
6347 loop = LABEL();
6348 common->ff_newline_shortcut = loop;
6349
6350 #ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
6351 if (JIT_HAS_FAST_FORWARD_CHAR_SIMD && (common->nltype == NLTYPE_FIXED || common->nltype == NLTYPE_ANYCRLF))
6352 {
6353 if (common->nltype == NLTYPE_ANYCRLF)
6354 {
6355 fast_forward_char_simd(common, CHAR_CR, CHAR_LF, 0);
6356 if (common->mode != PCRE2_JIT_COMPLETE)
6357 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6358
6359 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6360 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6361 quit = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6362 }
6363 else
6364 {
6365 fast_forward_char_simd(common, common->newline, common->newline, 0);
6366
6367 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6368 if (common->mode != PCRE2_JIT_COMPLETE)
6369 {
6370 OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
6371 CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
6372 }
6373 }
6374 }
6375 else
6376 #endif /* JIT_HAS_FAST_FORWARD_CHAR_SIMD */
6377 {
6378 read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
6379 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6380 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6381 foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6382 check_newlinechar(common, common->nltype, &newline, FALSE);
6383 set_jumps(newline, loop);
6384 }
6385
6386 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6387 {
6388 if (quit == NULL)
6389 {
6390 quit = JUMP(SLJIT_JUMP);
6391 JUMPHERE(foundcr);
6392 }
6393
6394 notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6395 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6396 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NL);
6397 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
6398 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6399 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6400 #endif
6401 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6402 JUMPHERE(notfoundnl);
6403 JUMPHERE(quit);
6404 }
6405
6406 if (lastchar)
6407 JUMPHERE(lastchar);
6408 JUMPHERE(firstchar);
6409
6410 if (common->match_end_ptr != 0)
6411 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6412 }
6413
6414 static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
6415
fast_forward_start_bits(compiler_common * common)6416 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common)
6417 {
6418 DEFINE_COMPILER;
6419 const sljit_u8 *start_bits = common->re->start_bitmap;
6420 struct sljit_label *start;
6421 struct sljit_jump *partial_quit;
6422 #if PCRE2_CODE_UNIT_WIDTH != 8
6423 struct sljit_jump *found = NULL;
6424 #endif
6425 jump_list *matches = NULL;
6426
6427 if (common->match_end_ptr != 0)
6428 {
6429 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6430 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
6431 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6432 OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
6433 CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
6434 }
6435
6436 start = LABEL();
6437
6438 partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6439 if (common->mode == PCRE2_JIT_COMPLETE)
6440 add_jump(compiler, &common->failed_match, partial_quit);
6441
6442 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6443 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6444
6445 if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &matches))
6446 {
6447 #if PCRE2_CODE_UNIT_WIDTH != 8
6448 if ((start_bits[31] & 0x80) != 0)
6449 found = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255);
6450 else
6451 CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255, start);
6452 #elif defined SUPPORT_UNICODE
6453 if (common->utf && is_char7_bitset(start_bits, FALSE))
6454 CMPTO(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 127, start);
6455 #endif
6456 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6457 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6458 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
6459 if (!HAS_VIRTUAL_REGISTERS)
6460 {
6461 OP2(SLJIT_SHL, TMP3, 0, SLJIT_IMM, 1, TMP2, 0);
6462 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP3, 0);
6463 }
6464 else
6465 {
6466 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6467 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
6468 }
6469 JUMPTO(SLJIT_ZERO, start);
6470 }
6471 else
6472 set_jumps(matches, start);
6473
6474 #if PCRE2_CODE_UNIT_WIDTH != 8
6475 if (found != NULL)
6476 JUMPHERE(found);
6477 #endif
6478
6479 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6480
6481 if (common->mode != PCRE2_JIT_COMPLETE)
6482 JUMPHERE(partial_quit);
6483
6484 if (common->match_end_ptr != 0)
6485 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
6486 }
6487
search_requested_char(compiler_common * common,PCRE2_UCHAR req_char,BOOL caseless,BOOL has_firstchar)6488 static SLJIT_INLINE jump_list *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar)
6489 {
6490 DEFINE_COMPILER;
6491 struct sljit_label *loop;
6492 struct sljit_jump *toolong;
6493 struct sljit_jump *already_found;
6494 struct sljit_jump *found;
6495 struct sljit_jump *found_oc = NULL;
6496 jump_list *not_found = NULL;
6497 sljit_u32 oc, bit;
6498
6499 SLJIT_ASSERT(common->req_char_ptr != 0);
6500 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(REQ_CU_MAX) * 100);
6501 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
6502 toolong = CMP(SLJIT_LESS, TMP2, 0, STR_END, 0);
6503 already_found = CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0);
6504
6505 if (has_firstchar)
6506 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6507 else
6508 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
6509
6510 oc = req_char;
6511 if (caseless)
6512 {
6513 oc = TABLE_GET(req_char, common->fcc, req_char);
6514 #if defined SUPPORT_UNICODE
6515 if (req_char > 127 && (common->utf || common->ucp))
6516 oc = UCD_OTHERCASE(req_char);
6517 #endif
6518 }
6519
6520 #ifdef JIT_HAS_FAST_REQUESTED_CHAR_SIMD
6521 if (JIT_HAS_FAST_REQUESTED_CHAR_SIMD)
6522 {
6523 not_found = fast_requested_char_simd(common, req_char, oc);
6524 }
6525 else
6526 #endif
6527 {
6528 loop = LABEL();
6529 add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
6530
6531 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
6532
6533 if (req_char == oc)
6534 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6535 else
6536 {
6537 bit = req_char ^ oc;
6538 if (is_powerof2(bit))
6539 {
6540 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
6541 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
6542 }
6543 else
6544 {
6545 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6546 found_oc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
6547 }
6548 }
6549 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6550 JUMPTO(SLJIT_JUMP, loop);
6551
6552 JUMPHERE(found);
6553 if (found_oc)
6554 JUMPHERE(found_oc);
6555 }
6556
6557 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
6558
6559 JUMPHERE(already_found);
6560 JUMPHERE(toolong);
6561 return not_found;
6562 }
6563
do_revertframes(compiler_common * common)6564 static void do_revertframes(compiler_common *common)
6565 {
6566 DEFINE_COMPILER;
6567 struct sljit_jump *jump;
6568 struct sljit_label *mainloop;
6569
6570 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
6571 GET_LOCAL_BASE(TMP1, 0, 0);
6572
6573 /* Drop frames until we reach STACK_TOP. */
6574 mainloop = LABEL();
6575 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -sizeof(sljit_sw));
6576 jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6577
6578 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6579 if (HAS_VIRTUAL_REGISTERS)
6580 {
6581 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6582 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
6583 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
6584 }
6585 else
6586 {
6587 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6588 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
6589 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
6590 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
6591 GET_LOCAL_BASE(TMP1, 0, 0);
6592 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0);
6593 }
6594 JUMPTO(SLJIT_JUMP, mainloop);
6595
6596 JUMPHERE(jump);
6597 jump = CMP(SLJIT_NOT_ZERO /* SIG_LESS */, TMP2, 0, SLJIT_IMM, 0);
6598 /* End of reverting values. */
6599 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
6600
6601 JUMPHERE(jump);
6602 OP2(SLJIT_SUB, TMP2, 0, SLJIT_IMM, 0, TMP2, 0);
6603 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6604 if (HAS_VIRTUAL_REGISTERS)
6605 {
6606 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6607 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
6608 }
6609 else
6610 {
6611 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6612 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
6613 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0);
6614 }
6615 JUMPTO(SLJIT_JUMP, mainloop);
6616 }
6617
check_wordboundary(compiler_common * common)6618 static void check_wordboundary(compiler_common *common)
6619 {
6620 DEFINE_COMPILER;
6621 struct sljit_jump *skipread;
6622 jump_list *skipread_list = NULL;
6623 #ifdef SUPPORT_UNICODE
6624 struct sljit_label *valid_utf;
6625 jump_list *invalid_utf1 = NULL;
6626 #endif /* SUPPORT_UNICODE */
6627 jump_list *invalid_utf2 = NULL;
6628 #if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE
6629 struct sljit_jump *jump;
6630 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 || SUPPORT_UNICODE */
6631
6632 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
6633
6634 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6635 /* Get type of the previous char, and put it to TMP3. */
6636 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6637 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6638 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
6639 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6640
6641 #ifdef SUPPORT_UNICODE
6642 if (common->invalid_utf)
6643 {
6644 peek_char_back(common, READ_CHAR_MAX, &invalid_utf1);
6645
6646 if (common->mode != PCRE2_JIT_COMPLETE)
6647 {
6648 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
6649 OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
6650 move_back(common, NULL, TRUE);
6651 check_start_used_ptr(common);
6652 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
6653 OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
6654 }
6655 }
6656 else
6657 #endif /* SUPPORT_UNICODE */
6658 {
6659 if (common->mode == PCRE2_JIT_COMPLETE)
6660 peek_char_back(common, READ_CHAR_MAX, NULL);
6661 else
6662 {
6663 move_back(common, NULL, TRUE);
6664 check_start_used_ptr(common);
6665 read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR);
6666 }
6667 }
6668
6669 /* Testing char type. */
6670 #ifdef SUPPORT_UNICODE
6671 if (common->ucp)
6672 {
6673 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
6674 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
6675 add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6676 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
6677 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
6678 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6679 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
6680 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
6681 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6682 JUMPHERE(jump);
6683 OP1(SLJIT_MOV, TMP3, 0, TMP2, 0);
6684 }
6685 else
6686 #endif /* SUPPORT_UNICODE */
6687 {
6688 #if PCRE2_CODE_UNIT_WIDTH != 8
6689 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6690 #elif defined SUPPORT_UNICODE
6691 /* Here TMP3 has already been zeroed. */
6692 jump = NULL;
6693 if (common->utf)
6694 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6695 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6696 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
6697 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
6698 OP2(SLJIT_AND, TMP3, 0, TMP1, 0, SLJIT_IMM, 1);
6699 #if PCRE2_CODE_UNIT_WIDTH != 8
6700 JUMPHERE(jump);
6701 #elif defined SUPPORT_UNICODE
6702 if (jump != NULL)
6703 JUMPHERE(jump);
6704 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6705 }
6706 JUMPHERE(skipread);
6707
6708 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6709 check_str_end(common, &skipread_list);
6710 peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf2);
6711
6712 /* Testing char type. This is a code duplication. */
6713 #ifdef SUPPORT_UNICODE
6714
6715 valid_utf = LABEL();
6716
6717 if (common->ucp)
6718 {
6719 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
6720 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
6721 add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6722 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
6723 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
6724 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6725 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
6726 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
6727 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6728 JUMPHERE(jump);
6729 }
6730 else
6731 #endif /* SUPPORT_UNICODE */
6732 {
6733 #if PCRE2_CODE_UNIT_WIDTH != 8
6734 /* TMP2 may be destroyed by peek_char. */
6735 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6736 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6737 #elif defined SUPPORT_UNICODE
6738 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6739 jump = NULL;
6740 if (common->utf)
6741 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6742 #endif
6743 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
6744 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
6745 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
6746 #if PCRE2_CODE_UNIT_WIDTH != 8
6747 JUMPHERE(jump);
6748 #elif defined SUPPORT_UNICODE
6749 if (jump != NULL)
6750 JUMPHERE(jump);
6751 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6752 }
6753 set_jumps(skipread_list, LABEL());
6754
6755 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6756 OP2(SLJIT_XOR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, TMP3, 0);
6757 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6758
6759 #ifdef SUPPORT_UNICODE
6760 if (common->invalid_utf)
6761 {
6762 set_jumps(invalid_utf1, LABEL());
6763
6764 peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, NULL);
6765 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR, valid_utf);
6766
6767 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6768 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, -1);
6769 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6770
6771 set_jumps(invalid_utf2, LABEL());
6772 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6773 OP1(SLJIT_MOV, TMP2, 0, TMP3, 0);
6774 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6775 }
6776 #endif /* SUPPORT_UNICODE */
6777 }
6778
optimize_class_ranges(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)6779 static BOOL optimize_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6780 {
6781 /* May destroy TMP1. */
6782 DEFINE_COMPILER;
6783 int ranges[MAX_CLASS_RANGE_SIZE];
6784 sljit_u8 bit, cbit, all;
6785 int i, byte, length = 0;
6786
6787 bit = bits[0] & 0x1;
6788 /* All bits will be zero or one (since bit is zero or one). */
6789 all = -bit;
6790
6791 for (i = 0; i < 256; )
6792 {
6793 byte = i >> 3;
6794 if ((i & 0x7) == 0 && bits[byte] == all)
6795 i += 8;
6796 else
6797 {
6798 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
6799 if (cbit != bit)
6800 {
6801 if (length >= MAX_CLASS_RANGE_SIZE)
6802 return FALSE;
6803 ranges[length] = i;
6804 length++;
6805 bit = cbit;
6806 all = -cbit;
6807 }
6808 i++;
6809 }
6810 }
6811
6812 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
6813 {
6814 if (length >= MAX_CLASS_RANGE_SIZE)
6815 return FALSE;
6816 ranges[length] = 256;
6817 length++;
6818 }
6819
6820 if (length < 0 || length > 4)
6821 return FALSE;
6822
6823 bit = bits[0] & 0x1;
6824 if (invert) bit ^= 0x1;
6825
6826 /* No character is accepted. */
6827 if (length == 0 && bit == 0)
6828 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6829
6830 switch(length)
6831 {
6832 case 0:
6833 /* When bit != 0, all characters are accepted. */
6834 return TRUE;
6835
6836 case 1:
6837 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6838 return TRUE;
6839
6840 case 2:
6841 if (ranges[0] + 1 != ranges[1])
6842 {
6843 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6844 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6845 }
6846 else
6847 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6848 return TRUE;
6849
6850 case 3:
6851 if (bit != 0)
6852 {
6853 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
6854 if (ranges[0] + 1 != ranges[1])
6855 {
6856 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6857 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6858 }
6859 else
6860 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6861 return TRUE;
6862 }
6863
6864 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
6865 if (ranges[1] + 1 != ranges[2])
6866 {
6867 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
6868 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
6869 }
6870 else
6871 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
6872 return TRUE;
6873
6874 case 4:
6875 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
6876 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
6877 && (ranges[1] & (ranges[2] - ranges[0])) == 0
6878 && is_powerof2(ranges[2] - ranges[0]))
6879 {
6880 SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
6881 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
6882 if (ranges[2] + 1 != ranges[3])
6883 {
6884 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
6885 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
6886 }
6887 else
6888 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
6889 return TRUE;
6890 }
6891
6892 if (bit != 0)
6893 {
6894 i = 0;
6895 if (ranges[0] + 1 != ranges[1])
6896 {
6897 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6898 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6899 i = ranges[0];
6900 }
6901 else
6902 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6903
6904 if (ranges[2] + 1 != ranges[3])
6905 {
6906 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
6907 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
6908 }
6909 else
6910 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
6911 return TRUE;
6912 }
6913
6914 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6915 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
6916 if (ranges[1] + 1 != ranges[2])
6917 {
6918 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
6919 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
6920 }
6921 else
6922 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6923 return TRUE;
6924
6925 default:
6926 SLJIT_UNREACHABLE();
6927 return FALSE;
6928 }
6929 }
6930
optimize_class_chars(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)6931 static BOOL optimize_class_chars(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6932 {
6933 /* May destroy TMP1. */
6934 DEFINE_COMPILER;
6935 uint16_t char_list[MAX_CLASS_CHARS_SIZE];
6936 uint8_t byte;
6937 sljit_s32 type;
6938 int i, j, k, len, c;
6939
6940 if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
6941 return FALSE;
6942
6943 len = 0;
6944
6945 for (i = 0; i < 32; i++)
6946 {
6947 byte = bits[i];
6948
6949 if (nclass)
6950 byte = ~byte;
6951
6952 j = 0;
6953 while (byte != 0)
6954 {
6955 if (byte & 0x1)
6956 {
6957 c = i * 8 + j;
6958
6959 k = len;
6960
6961 if ((c & 0x20) != 0)
6962 {
6963 for (k = 0; k < len; k++)
6964 if (char_list[k] == c - 0x20)
6965 {
6966 char_list[k] |= 0x120;
6967 break;
6968 }
6969 }
6970
6971 if (k == len)
6972 {
6973 if (len >= MAX_CLASS_CHARS_SIZE)
6974 return FALSE;
6975
6976 char_list[len++] = (uint16_t) c;
6977 }
6978 }
6979
6980 byte >>= 1;
6981 j++;
6982 }
6983 }
6984
6985 if (len == 0) return FALSE; /* Should never occur, but stops analyzers complaining. */
6986
6987 i = 0;
6988 j = 0;
6989
6990 if (char_list[0] == 0)
6991 {
6992 i++;
6993 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0);
6994 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_ZERO);
6995 }
6996 else
6997 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6998
6999 while (i < len)
7000 {
7001 if ((char_list[i] & 0x100) != 0)
7002 j++;
7003 else
7004 {
7005 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i]);
7006 CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
7007 }
7008 i++;
7009 }
7010
7011 if (j != 0)
7012 {
7013 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x20);
7014
7015 for (i = 0; i < len; i++)
7016 if ((char_list[i] & 0x100) != 0)
7017 {
7018 j--;
7019 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff);
7020 CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
7021 }
7022 }
7023
7024 if (invert)
7025 nclass = !nclass;
7026
7027 type = nclass ? SLJIT_NOT_EQUAL : SLJIT_EQUAL;
7028 add_jump(compiler, backtracks, CMP(type, TMP2, 0, SLJIT_IMM, 0));
7029 return TRUE;
7030 }
7031
optimize_class(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)7032 static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
7033 {
7034 /* May destroy TMP1. */
7035 if (optimize_class_ranges(common, bits, nclass, invert, backtracks))
7036 return TRUE;
7037 return optimize_class_chars(common, bits, nclass, invert, backtracks);
7038 }
7039
check_anynewline(compiler_common * common)7040 static void check_anynewline(compiler_common *common)
7041 {
7042 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7043 DEFINE_COMPILER;
7044
7045 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
7046
7047 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
7048 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
7049 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7050 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
7051 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7052 #if PCRE2_CODE_UNIT_WIDTH == 8
7053 if (common->utf)
7054 {
7055 #endif
7056 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7057 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7058 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
7059 #if PCRE2_CODE_UNIT_WIDTH == 8
7060 }
7061 #endif
7062 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7063 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7064 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7065 }
7066
check_hspace(compiler_common * common)7067 static void check_hspace(compiler_common *common)
7068 {
7069 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7070 DEFINE_COMPILER;
7071
7072 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
7073
7074 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x09);
7075 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7076 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x20);
7077 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7078 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xa0);
7079 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7080 #if PCRE2_CODE_UNIT_WIDTH == 8
7081 if (common->utf)
7082 {
7083 #endif
7084 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7085 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x1680);
7086 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7087 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e);
7088 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7089 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
7090 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
7091 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7092 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
7093 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7094 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
7095 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7096 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
7097 #if PCRE2_CODE_UNIT_WIDTH == 8
7098 }
7099 #endif
7100 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7101 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7102
7103 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7104 }
7105
check_vspace(compiler_common * common)7106 static void check_vspace(compiler_common *common)
7107 {
7108 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7109 DEFINE_COMPILER;
7110
7111 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
7112
7113 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
7114 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
7115 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7116 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
7117 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7118 #if PCRE2_CODE_UNIT_WIDTH == 8
7119 if (common->utf)
7120 {
7121 #endif
7122 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7123 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7124 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
7125 #if PCRE2_CODE_UNIT_WIDTH == 8
7126 }
7127 #endif
7128 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7129 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7130
7131 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7132 }
7133
do_casefulcmp(compiler_common * common)7134 static void do_casefulcmp(compiler_common *common)
7135 {
7136 DEFINE_COMPILER;
7137 struct sljit_jump *jump;
7138 struct sljit_label *label;
7139 int char1_reg;
7140 int char2_reg;
7141
7142 if (HAS_VIRTUAL_REGISTERS)
7143 {
7144 char1_reg = STR_END;
7145 char2_reg = STACK_TOP;
7146 }
7147 else
7148 {
7149 char1_reg = TMP3;
7150 char2_reg = RETURN_ADDR;
7151 }
7152
7153 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7154 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7155
7156 if (char1_reg == STR_END)
7157 {
7158 OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0);
7159 OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
7160 }
7161
7162 if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7163 {
7164 label = LABEL();
7165 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7166 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7167 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7168 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7169 JUMPTO(SLJIT_NOT_ZERO, label);
7170
7171 JUMPHERE(jump);
7172 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7173 }
7174 else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7175 {
7176 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7177 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7178
7179 label = LABEL();
7180 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7181 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7182 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7183 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7184 JUMPTO(SLJIT_NOT_ZERO, label);
7185
7186 JUMPHERE(jump);
7187 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7188 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7189 }
7190 else
7191 {
7192 label = LABEL();
7193 OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7194 OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7195 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7196 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7197 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7198 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7199 JUMPTO(SLJIT_NOT_ZERO, label);
7200
7201 JUMPHERE(jump);
7202 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7203 }
7204
7205 if (char1_reg == STR_END)
7206 {
7207 OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0);
7208 OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);
7209 }
7210
7211 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7212 }
7213
do_caselesscmp(compiler_common * common)7214 static void do_caselesscmp(compiler_common *common)
7215 {
7216 DEFINE_COMPILER;
7217 struct sljit_jump *jump;
7218 struct sljit_label *label;
7219 int char1_reg = STR_END;
7220 int char2_reg;
7221 int lcc_table;
7222 int opt_type = 0;
7223
7224 if (HAS_VIRTUAL_REGISTERS)
7225 {
7226 char2_reg = STACK_TOP;
7227 lcc_table = STACK_LIMIT;
7228 }
7229 else
7230 {
7231 char2_reg = RETURN_ADDR;
7232 lcc_table = TMP3;
7233 }
7234
7235 if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7236 opt_type = 1;
7237 else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7238 opt_type = 2;
7239
7240 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7241 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7242
7243 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0);
7244
7245 if (char2_reg == STACK_TOP)
7246 {
7247 OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0);
7248 OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0);
7249 }
7250
7251 OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
7252
7253 if (opt_type == 1)
7254 {
7255 label = LABEL();
7256 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7257 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7258 }
7259 else if (opt_type == 2)
7260 {
7261 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7262 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7263
7264 label = LABEL();
7265 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7266 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7267 }
7268 else
7269 {
7270 label = LABEL();
7271 OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7272 OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7273 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7274 }
7275
7276 #if PCRE2_CODE_UNIT_WIDTH != 8
7277 jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255);
7278 #endif
7279 OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0);
7280 #if PCRE2_CODE_UNIT_WIDTH != 8
7281 JUMPHERE(jump);
7282 jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255);
7283 #endif
7284 OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0);
7285 #if PCRE2_CODE_UNIT_WIDTH != 8
7286 JUMPHERE(jump);
7287 #endif
7288
7289 if (opt_type == 0)
7290 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7291
7292 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7293 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7294 JUMPTO(SLJIT_NOT_ZERO, label);
7295
7296 JUMPHERE(jump);
7297 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7298
7299 if (opt_type == 2)
7300 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7301
7302 if (char2_reg == STACK_TOP)
7303 {
7304 OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0);
7305 OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0);
7306 }
7307
7308 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
7309 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7310 }
7311
byte_sequence_compare(compiler_common * common,BOOL caseless,PCRE2_SPTR cc,compare_context * context,jump_list ** backtracks)7312 static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc,
7313 compare_context *context, jump_list **backtracks)
7314 {
7315 DEFINE_COMPILER;
7316 unsigned int othercasebit = 0;
7317 PCRE2_SPTR othercasechar = NULL;
7318 #ifdef SUPPORT_UNICODE
7319 int utflength;
7320 #endif
7321
7322 if (caseless && char_has_othercase(common, cc))
7323 {
7324 othercasebit = char_get_othercase_bit(common, cc);
7325 SLJIT_ASSERT(othercasebit);
7326 /* Extracting bit difference info. */
7327 #if PCRE2_CODE_UNIT_WIDTH == 8
7328 othercasechar = cc + (othercasebit >> 8);
7329 othercasebit &= 0xff;
7330 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7331 /* Note that this code only handles characters in the BMP. If there
7332 ever are characters outside the BMP whose othercase differs in only one
7333 bit from itself (there currently are none), this code will need to be
7334 revised for PCRE2_CODE_UNIT_WIDTH == 32. */
7335 othercasechar = cc + (othercasebit >> 9);
7336 if ((othercasebit & 0x100) != 0)
7337 othercasebit = (othercasebit & 0xff) << 8;
7338 else
7339 othercasebit &= 0xff;
7340 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7341 }
7342
7343 if (context->sourcereg == -1)
7344 {
7345 #if PCRE2_CODE_UNIT_WIDTH == 8
7346 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7347 if (context->length >= 4)
7348 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7349 else if (context->length >= 2)
7350 OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7351 else
7352 #endif
7353 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7354 #elif PCRE2_CODE_UNIT_WIDTH == 16
7355 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7356 if (context->length >= 4)
7357 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7358 else
7359 #endif
7360 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7361 #elif PCRE2_CODE_UNIT_WIDTH == 32
7362 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7363 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7364 context->sourcereg = TMP2;
7365 }
7366
7367 #ifdef SUPPORT_UNICODE
7368 utflength = 1;
7369 if (common->utf && HAS_EXTRALEN(*cc))
7370 utflength += GET_EXTRALEN(*cc);
7371
7372 do
7373 {
7374 #endif
7375
7376 context->length -= IN_UCHARS(1);
7377 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7378
7379 /* Unaligned read is supported. */
7380 if (othercasebit != 0 && othercasechar == cc)
7381 {
7382 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
7383 context->oc.asuchars[context->ucharptr] = othercasebit;
7384 }
7385 else
7386 {
7387 context->c.asuchars[context->ucharptr] = *cc;
7388 context->oc.asuchars[context->ucharptr] = 0;
7389 }
7390 context->ucharptr++;
7391
7392 #if PCRE2_CODE_UNIT_WIDTH == 8
7393 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
7394 #else
7395 if (context->ucharptr >= 2 || context->length == 0)
7396 #endif
7397 {
7398 if (context->length >= 4)
7399 OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7400 else if (context->length >= 2)
7401 OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7402 #if PCRE2_CODE_UNIT_WIDTH == 8
7403 else if (context->length >= 1)
7404 OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7405 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7406 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7407
7408 switch(context->ucharptr)
7409 {
7410 case 4 / sizeof(PCRE2_UCHAR):
7411 if (context->oc.asint != 0)
7412 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
7413 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
7414 break;
7415
7416 case 2 / sizeof(PCRE2_UCHAR):
7417 if (context->oc.asushort != 0)
7418 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
7419 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
7420 break;
7421
7422 #if PCRE2_CODE_UNIT_WIDTH == 8
7423 case 1:
7424 if (context->oc.asbyte != 0)
7425 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
7426 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
7427 break;
7428 #endif
7429
7430 default:
7431 SLJIT_UNREACHABLE();
7432 break;
7433 }
7434 context->ucharptr = 0;
7435 }
7436
7437 #else
7438
7439 /* Unaligned read is unsupported or in 32 bit mode. */
7440 if (context->length >= 1)
7441 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7442
7443 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7444
7445 if (othercasebit != 0 && othercasechar == cc)
7446 {
7447 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
7448 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
7449 }
7450 else
7451 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
7452
7453 #endif
7454
7455 cc++;
7456 #ifdef SUPPORT_UNICODE
7457 utflength--;
7458 }
7459 while (utflength > 0);
7460 #endif
7461
7462 return cc;
7463 }
7464
7465 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
7466
7467 #define SET_TYPE_OFFSET(value) \
7468 if ((value) != typeoffset) \
7469 { \
7470 if ((value) < typeoffset) \
7471 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
7472 else \
7473 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
7474 } \
7475 typeoffset = (value);
7476
7477 #define SET_CHAR_OFFSET(value) \
7478 if ((value) != charoffset) \
7479 { \
7480 if ((value) < charoffset) \
7481 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
7482 else \
7483 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
7484 } \
7485 charoffset = (value);
7486
7487 static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr);
7488
7489 #ifdef SUPPORT_UNICODE
7490 #define XCLASS_SAVE_CHAR 0x001
7491 #define XCLASS_CHAR_SAVED 0x002
7492 #define XCLASS_HAS_TYPE 0x004
7493 #define XCLASS_HAS_SCRIPT 0x008
7494 #define XCLASS_HAS_SCRIPT_EXTENSION 0x010
7495 #define XCLASS_HAS_BOOL 0x020
7496 #define XCLASS_HAS_BIDICL 0x040
7497 #define XCLASS_NEEDS_UCD (XCLASS_HAS_TYPE | XCLASS_HAS_SCRIPT | XCLASS_HAS_SCRIPT_EXTENSION | XCLASS_HAS_BOOL | XCLASS_HAS_BIDICL)
7498 #define XCLASS_SCRIPT_EXTENSION_NOTPROP 0x080
7499 #define XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR 0x100
7500 #define XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0 0x200
7501
7502 #endif /* SUPPORT_UNICODE */
7503
compile_xclass_matchingpath(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks)7504 static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
7505 {
7506 DEFINE_COMPILER;
7507 jump_list *found = NULL;
7508 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
7509 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
7510 struct sljit_jump *jump = NULL;
7511 PCRE2_SPTR ccbegin;
7512 int compares, invertcmp, numberofcmps;
7513 #if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7514 BOOL utf = common->utf;
7515 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
7516
7517 #ifdef SUPPORT_UNICODE
7518 sljit_u32 unicode_status = 0;
7519 int typereg = TMP1;
7520 const sljit_u32 *other_cases;
7521 sljit_uw typeoffset;
7522 #endif /* SUPPORT_UNICODE */
7523
7524 /* Scanning the necessary info. */
7525 cc++;
7526 ccbegin = cc;
7527 compares = 0;
7528
7529 if (cc[-1] & XCL_MAP)
7530 {
7531 min = 0;
7532 cc += 32 / sizeof(PCRE2_UCHAR);
7533 }
7534
7535 while (*cc != XCL_END)
7536 {
7537 compares++;
7538 if (*cc == XCL_SINGLE)
7539 {
7540 cc ++;
7541 GETCHARINCTEST(c, cc);
7542 if (c > max) max = c;
7543 if (c < min) min = c;
7544 #ifdef SUPPORT_UNICODE
7545 unicode_status |= XCLASS_SAVE_CHAR;
7546 #endif /* SUPPORT_UNICODE */
7547 }
7548 else if (*cc == XCL_RANGE)
7549 {
7550 cc ++;
7551 GETCHARINCTEST(c, cc);
7552 if (c < min) min = c;
7553 GETCHARINCTEST(c, cc);
7554 if (c > max) max = c;
7555 #ifdef SUPPORT_UNICODE
7556 unicode_status |= XCLASS_SAVE_CHAR;
7557 #endif /* SUPPORT_UNICODE */
7558 }
7559 #ifdef SUPPORT_UNICODE
7560 else
7561 {
7562 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7563 cc++;
7564 if (*cc == PT_CLIST && cc[-1] == XCL_PROP)
7565 {
7566 other_cases = PRIV(ucd_caseless_sets) + cc[1];
7567 while (*other_cases != NOTACHAR)
7568 {
7569 if (*other_cases > max) max = *other_cases;
7570 if (*other_cases < min) min = *other_cases;
7571 other_cases++;
7572 }
7573 }
7574 else
7575 {
7576 max = READ_CHAR_MAX;
7577 min = 0;
7578 }
7579
7580 switch(*cc)
7581 {
7582 case PT_ANY:
7583 /* Any either accepts everything or ignored. */
7584 if (cc[-1] == XCL_PROP)
7585 {
7586 compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
7587 if (list == backtracks)
7588 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7589 return;
7590 }
7591 break;
7592
7593 case PT_LAMP:
7594 case PT_GC:
7595 case PT_PC:
7596 case PT_ALNUM:
7597 unicode_status |= XCLASS_HAS_TYPE;
7598 break;
7599
7600 case PT_SCX:
7601 unicode_status |= XCLASS_HAS_SCRIPT_EXTENSION;
7602 if (cc[-1] == XCL_NOTPROP)
7603 {
7604 unicode_status |= XCLASS_SCRIPT_EXTENSION_NOTPROP;
7605 break;
7606 }
7607 compares++;
7608 /* Fall through */
7609
7610 case PT_SC:
7611 unicode_status |= XCLASS_HAS_SCRIPT;
7612 break;
7613
7614 case PT_SPACE:
7615 case PT_PXSPACE:
7616 case PT_WORD:
7617 case PT_PXGRAPH:
7618 case PT_PXPRINT:
7619 case PT_PXPUNCT:
7620 unicode_status |= XCLASS_SAVE_CHAR | XCLASS_HAS_TYPE;
7621 break;
7622
7623 case PT_CLIST:
7624 case PT_UCNC:
7625 unicode_status |= XCLASS_SAVE_CHAR;
7626 break;
7627
7628 case PT_BOOL:
7629 unicode_status |= XCLASS_HAS_BOOL;
7630 break;
7631
7632 case PT_BIDICL:
7633 unicode_status |= XCLASS_HAS_BIDICL;
7634 break;
7635
7636 default:
7637 SLJIT_UNREACHABLE();
7638 break;
7639 }
7640 cc += 2;
7641 }
7642 #endif /* SUPPORT_UNICODE */
7643 }
7644 SLJIT_ASSERT(compares > 0);
7645
7646 /* We are not necessary in utf mode even in 8 bit mode. */
7647 cc = ccbegin;
7648 if ((cc[-1] & XCL_NOT) != 0)
7649 read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR);
7650 else
7651 {
7652 #ifdef SUPPORT_UNICODE
7653 read_char(common, min, max, (unicode_status & XCLASS_NEEDS_UCD) ? backtracks : NULL, 0);
7654 #else /* !SUPPORT_UNICODE */
7655 read_char(common, min, max, NULL, 0);
7656 #endif /* SUPPORT_UNICODE */
7657 }
7658
7659 if ((cc[-1] & XCL_HASPROP) == 0)
7660 {
7661 if ((cc[-1] & XCL_MAP) != 0)
7662 {
7663 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7664 if (!optimize_class(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
7665 {
7666 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7667 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7668 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7669 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7670 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
7671 add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
7672 }
7673
7674 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7675 JUMPHERE(jump);
7676
7677 cc += 32 / sizeof(PCRE2_UCHAR);
7678 }
7679 else
7680 {
7681 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
7682 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
7683 }
7684 }
7685 else if ((cc[-1] & XCL_MAP) != 0)
7686 {
7687 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7688 #ifdef SUPPORT_UNICODE
7689 unicode_status |= XCLASS_CHAR_SAVED;
7690 #endif /* SUPPORT_UNICODE */
7691 if (!optimize_class(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
7692 {
7693 #if PCRE2_CODE_UNIT_WIDTH == 8
7694 jump = NULL;
7695 if (common->utf)
7696 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7697 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7698
7699 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7700 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7701 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7702 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7703 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
7704 add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
7705
7706 #if PCRE2_CODE_UNIT_WIDTH == 8
7707 if (common->utf)
7708 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7709 JUMPHERE(jump);
7710 }
7711
7712 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7713 cc += 32 / sizeof(PCRE2_UCHAR);
7714 }
7715
7716 #ifdef SUPPORT_UNICODE
7717 if (unicode_status & XCLASS_NEEDS_UCD)
7718 {
7719 if ((unicode_status & (XCLASS_SAVE_CHAR | XCLASS_CHAR_SAVED)) == XCLASS_SAVE_CHAR)
7720 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7721
7722 #if PCRE2_CODE_UNIT_WIDTH == 32
7723 if (!common->utf)
7724 {
7725 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
7726 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
7727 JUMPHERE(jump);
7728 }
7729 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
7730
7731 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7732 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
7733 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
7734 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
7735 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7736 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7737 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
7738 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
7739 OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
7740 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
7741 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
7742
7743 ccbegin = cc;
7744
7745 if (unicode_status & XCLASS_HAS_BIDICL)
7746 {
7747 OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
7748 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BIDICLASS_SHIFT);
7749
7750 while (*cc != XCL_END)
7751 {
7752 if (*cc == XCL_SINGLE)
7753 {
7754 cc ++;
7755 GETCHARINCTEST(c, cc);
7756 }
7757 else if (*cc == XCL_RANGE)
7758 {
7759 cc ++;
7760 GETCHARINCTEST(c, cc);
7761 GETCHARINCTEST(c, cc);
7762 }
7763 else
7764 {
7765 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7766 cc++;
7767 if (*cc == PT_BIDICL)
7768 {
7769 compares--;
7770 invertcmp = (compares == 0 && list != backtracks);
7771 if (cc[-1] == XCL_NOTPROP)
7772 invertcmp ^= 0x1;
7773 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
7774 add_jump(compiler, compares > 0 ? list : backtracks, jump);
7775 }
7776 cc += 2;
7777 }
7778 }
7779
7780 cc = ccbegin;
7781 }
7782
7783 if (unicode_status & XCLASS_HAS_BOOL)
7784 {
7785 OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, bprops));
7786 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BPROPS_MASK);
7787 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
7788
7789 while (*cc != XCL_END)
7790 {
7791 if (*cc == XCL_SINGLE)
7792 {
7793 cc ++;
7794 GETCHARINCTEST(c, cc);
7795 }
7796 else if (*cc == XCL_RANGE)
7797 {
7798 cc ++;
7799 GETCHARINCTEST(c, cc);
7800 GETCHARINCTEST(c, cc);
7801 }
7802 else
7803 {
7804 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7805 cc++;
7806 if (*cc == PT_BOOL)
7807 {
7808 compares--;
7809 invertcmp = (compares == 0 && list != backtracks);
7810 if (cc[-1] == XCL_NOTPROP)
7811 invertcmp ^= 0x1;
7812
7813 OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_boolprop_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)1 << (cc[1] & 0x1f));
7814 add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
7815 }
7816 cc += 2;
7817 }
7818 }
7819
7820 cc = ccbegin;
7821 }
7822
7823 if (unicode_status & XCLASS_HAS_SCRIPT)
7824 {
7825 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
7826
7827 while (*cc != XCL_END)
7828 {
7829 if (*cc == XCL_SINGLE)
7830 {
7831 cc ++;
7832 GETCHARINCTEST(c, cc);
7833 }
7834 else if (*cc == XCL_RANGE)
7835 {
7836 cc ++;
7837 GETCHARINCTEST(c, cc);
7838 GETCHARINCTEST(c, cc);
7839 }
7840 else
7841 {
7842 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7843 cc++;
7844 switch (*cc)
7845 {
7846 case PT_SCX:
7847 if (cc[-1] == XCL_NOTPROP)
7848 break;
7849 /* Fall through */
7850
7851 case PT_SC:
7852 compares--;
7853 invertcmp = (compares == 0 && list != backtracks);
7854 if (cc[-1] == XCL_NOTPROP)
7855 invertcmp ^= 0x1;
7856
7857 add_jump(compiler, compares > 0 ? list : backtracks, CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]));
7858 }
7859 cc += 2;
7860 }
7861 }
7862
7863 cc = ccbegin;
7864 }
7865
7866 if (unicode_status & XCLASS_HAS_SCRIPT_EXTENSION)
7867 {
7868 OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
7869 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_SCRIPTX_MASK);
7870 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
7871
7872 if (unicode_status & XCLASS_SCRIPT_EXTENSION_NOTPROP)
7873 {
7874 if (unicode_status & XCLASS_HAS_TYPE)
7875 {
7876 if (unicode_status & XCLASS_SAVE_CHAR)
7877 {
7878 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP2, 0);
7879 unicode_status |= XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0;
7880 }
7881 else
7882 {
7883 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0);
7884 unicode_status |= XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR;
7885 }
7886 }
7887 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
7888 }
7889
7890 while (*cc != XCL_END)
7891 {
7892 if (*cc == XCL_SINGLE)
7893 {
7894 cc ++;
7895 GETCHARINCTEST(c, cc);
7896 }
7897 else if (*cc == XCL_RANGE)
7898 {
7899 cc ++;
7900 GETCHARINCTEST(c, cc);
7901 GETCHARINCTEST(c, cc);
7902 }
7903 else
7904 {
7905 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7906 cc++;
7907 if (*cc == PT_SCX)
7908 {
7909 compares--;
7910 invertcmp = (compares == 0 && list != backtracks);
7911
7912 jump = NULL;
7913 if (cc[-1] == XCL_NOTPROP)
7914 {
7915 jump = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, (int)cc[1]);
7916 if (invertcmp)
7917 {
7918 add_jump(compiler, backtracks, jump);
7919 jump = NULL;
7920 }
7921 invertcmp ^= 0x1;
7922 }
7923
7924 OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_script_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)1 << (cc[1] & 0x1f));
7925 add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
7926
7927 if (jump != NULL)
7928 JUMPHERE(jump);
7929 }
7930 cc += 2;
7931 }
7932 }
7933
7934 if (unicode_status & XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0)
7935 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7936 else if (unicode_status & XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR)
7937 OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0);
7938 cc = ccbegin;
7939 }
7940
7941 if (unicode_status & XCLASS_SAVE_CHAR)
7942 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7943
7944 if (unicode_status & XCLASS_HAS_TYPE)
7945 {
7946 if (unicode_status & XCLASS_SAVE_CHAR)
7947 typereg = RETURN_ADDR;
7948
7949 OP1(SLJIT_MOV_U8, typereg, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7950 }
7951 }
7952 #endif /* SUPPORT_UNICODE */
7953
7954 /* Generating code. */
7955 charoffset = 0;
7956 numberofcmps = 0;
7957 #ifdef SUPPORT_UNICODE
7958 typeoffset = 0;
7959 #endif /* SUPPORT_UNICODE */
7960
7961 while (*cc != XCL_END)
7962 {
7963 compares--;
7964 invertcmp = (compares == 0 && list != backtracks);
7965 jump = NULL;
7966
7967 if (*cc == XCL_SINGLE)
7968 {
7969 cc ++;
7970 GETCHARINCTEST(c, cc);
7971
7972 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
7973 {
7974 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7975 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7976 numberofcmps++;
7977 }
7978 else if (numberofcmps > 0)
7979 {
7980 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7981 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7982 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7983 numberofcmps = 0;
7984 }
7985 else
7986 {
7987 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7988 numberofcmps = 0;
7989 }
7990 }
7991 else if (*cc == XCL_RANGE)
7992 {
7993 cc ++;
7994 GETCHARINCTEST(c, cc);
7995 SET_CHAR_OFFSET(c);
7996 GETCHARINCTEST(c, cc);
7997
7998 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
7999 {
8000 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8001 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8002 numberofcmps++;
8003 }
8004 else if (numberofcmps > 0)
8005 {
8006 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8007 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
8008 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8009 numberofcmps = 0;
8010 }
8011 else
8012 {
8013 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8014 numberofcmps = 0;
8015 }
8016 }
8017 #ifdef SUPPORT_UNICODE
8018 else
8019 {
8020 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
8021 if (*cc == XCL_NOTPROP)
8022 invertcmp ^= 0x1;
8023 cc++;
8024 switch(*cc)
8025 {
8026 case PT_ANY:
8027 if (!invertcmp)
8028 jump = JUMP(SLJIT_JUMP);
8029 break;
8030
8031 case PT_LAMP:
8032 OP2U(SLJIT_SUB | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
8033 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8034 OP2U(SLJIT_SUB | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
8035 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8036 OP2U(SLJIT_SUB | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
8037 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
8038 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8039 break;
8040
8041 case PT_GC:
8042 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
8043 SET_TYPE_OFFSET(c);
8044 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
8045 break;
8046
8047 case PT_PC:
8048 jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
8049 break;
8050
8051 case PT_SC:
8052 case PT_SCX:
8053 case PT_BOOL:
8054 case PT_BIDICL:
8055 compares++;
8056 /* Do nothing. */
8057 break;
8058
8059 case PT_SPACE:
8060 case PT_PXSPACE:
8061 SET_CHAR_OFFSET(9);
8062 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
8063 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8064
8065 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
8066 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8067
8068 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
8069 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8070
8071 SET_TYPE_OFFSET(ucp_Zl);
8072 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
8073 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
8074 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8075 break;
8076
8077 case PT_WORD:
8078 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
8079 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8080 /* Fall through. */
8081
8082 case PT_ALNUM:
8083 SET_TYPE_OFFSET(ucp_Ll);
8084 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
8085 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8086 SET_TYPE_OFFSET(ucp_Nd);
8087 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
8088 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
8089 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8090 break;
8091
8092 case PT_CLIST:
8093 other_cases = PRIV(ucd_caseless_sets) + cc[1];
8094
8095 /* At least three characters are required.
8096 Otherwise this case would be handled by the normal code path. */
8097 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
8098 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
8099
8100 /* Optimizing character pairs, if their difference is power of 2. */
8101 if (is_powerof2(other_cases[1] ^ other_cases[0]))
8102 {
8103 if (charoffset == 0)
8104 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8105 else
8106 {
8107 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
8108 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8109 }
8110 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[1]);
8111 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8112 other_cases += 2;
8113 }
8114 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
8115 {
8116 if (charoffset == 0)
8117 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
8118 else
8119 {
8120 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
8121 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8122 }
8123 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[2]);
8124 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8125
8126 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
8127 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
8128
8129 other_cases += 3;
8130 }
8131 else
8132 {
8133 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
8134 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8135 }
8136
8137 while (*other_cases != NOTACHAR)
8138 {
8139 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
8140 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
8141 }
8142 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8143 break;
8144
8145 case PT_UCNC:
8146 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
8147 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8148 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
8149 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8150 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
8151 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8152
8153 SET_CHAR_OFFSET(0xa0);
8154 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
8155 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8156 SET_CHAR_OFFSET(0);
8157 OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
8158 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
8159 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8160 break;
8161
8162 case PT_PXGRAPH:
8163 /* C and Z groups are the farthest two groups. */
8164 SET_TYPE_OFFSET(ucp_Ll);
8165 OP2U(SLJIT_SUB | SLJIT_SET_GREATER, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
8166 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
8167
8168 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
8169
8170 /* In case of ucp_Cf, we overwrite the result. */
8171 SET_CHAR_OFFSET(0x2066);
8172 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
8173 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8174
8175 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
8176 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8177
8178 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
8179 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8180
8181 JUMPHERE(jump);
8182 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
8183 break;
8184
8185 case PT_PXPRINT:
8186 /* C and Z groups are the farthest two groups. */
8187 SET_TYPE_OFFSET(ucp_Ll);
8188 OP2U(SLJIT_SUB | SLJIT_SET_GREATER, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
8189 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
8190
8191 OP2U(SLJIT_SUB | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
8192 OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_NOT_EQUAL);
8193
8194 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
8195
8196 /* In case of ucp_Cf, we overwrite the result. */
8197 SET_CHAR_OFFSET(0x2066);
8198 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
8199 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8200
8201 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
8202 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8203
8204 JUMPHERE(jump);
8205 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
8206 break;
8207
8208 case PT_PXPUNCT:
8209 SET_TYPE_OFFSET(ucp_Sc);
8210 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
8211 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8212
8213 SET_CHAR_OFFSET(0);
8214 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x7f);
8215 OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
8216
8217 SET_TYPE_OFFSET(ucp_Pc);
8218 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
8219 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
8220 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8221 break;
8222
8223 default:
8224 SLJIT_UNREACHABLE();
8225 break;
8226 }
8227 cc += 2;
8228 }
8229 #endif /* SUPPORT_UNICODE */
8230
8231 if (jump != NULL)
8232 add_jump(compiler, compares > 0 ? list : backtracks, jump);
8233 }
8234
8235 if (found != NULL)
8236 set_jumps(found, LABEL());
8237 }
8238
8239 #undef SET_TYPE_OFFSET
8240 #undef SET_CHAR_OFFSET
8241
8242 #endif
8243
compile_simple_assertion_matchingpath(compiler_common * common,PCRE2_UCHAR type,PCRE2_SPTR cc,jump_list ** backtracks)8244 static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks)
8245 {
8246 DEFINE_COMPILER;
8247 int length;
8248 struct sljit_jump *jump[4];
8249 #ifdef SUPPORT_UNICODE
8250 struct sljit_label *label;
8251 #endif /* SUPPORT_UNICODE */
8252
8253 switch(type)
8254 {
8255 case OP_SOD:
8256 if (HAS_VIRTUAL_REGISTERS)
8257 {
8258 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8259 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8260 }
8261 else
8262 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8263 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8264 return cc;
8265
8266 case OP_SOM:
8267 if (HAS_VIRTUAL_REGISTERS)
8268 {
8269 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8270 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
8271 }
8272 else
8273 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
8274 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8275 return cc;
8276
8277 case OP_NOT_WORD_BOUNDARY:
8278 case OP_WORD_BOUNDARY:
8279 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
8280 #ifdef SUPPORT_UNICODE
8281 if (common->invalid_utf)
8282 {
8283 add_jump(compiler, backtracks, CMP((type == OP_NOT_WORD_BOUNDARY) ? SLJIT_NOT_EQUAL : SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8284 return cc;
8285 }
8286 #endif /* SUPPORT_UNICODE */
8287 sljit_set_current_flags(compiler, SLJIT_SET_Z);
8288 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8289 return cc;
8290
8291 case OP_EODN:
8292 /* Requires rather complex checks. */
8293 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
8294 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8295 {
8296 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8297 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8298 if (common->mode == PCRE2_JIT_COMPLETE)
8299 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8300 else
8301 {
8302 jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
8303 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, STR_END, 0);
8304 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
8305 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
8306 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
8307 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
8308 check_partial(common, TRUE);
8309 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8310 JUMPHERE(jump[1]);
8311 }
8312 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8313 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8314 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8315 }
8316 else if (common->nltype == NLTYPE_FIXED)
8317 {
8318 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8319 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8320 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8321 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
8322 }
8323 else
8324 {
8325 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8326 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
8327 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8328 OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, TMP2, 0, STR_END, 0);
8329 jump[2] = JUMP(SLJIT_GREATER);
8330 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
8331 /* Equal. */
8332 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8333 jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
8334 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8335
8336 JUMPHERE(jump[1]);
8337 if (common->nltype == NLTYPE_ANYCRLF)
8338 {
8339 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8340 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
8341 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
8342 }
8343 else
8344 {
8345 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
8346 read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
8347 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
8348 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
8349 sljit_set_current_flags(compiler, SLJIT_SET_Z);
8350 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
8351 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
8352 }
8353 JUMPHERE(jump[2]);
8354 JUMPHERE(jump[3]);
8355 }
8356 JUMPHERE(jump[0]);
8357 if (common->mode != PCRE2_JIT_COMPLETE)
8358 check_partial(common, TRUE);
8359 return cc;
8360
8361 case OP_EOD:
8362 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8363 if (common->mode != PCRE2_JIT_COMPLETE)
8364 check_partial(common, TRUE);
8365 return cc;
8366
8367 case OP_DOLL:
8368 if (HAS_VIRTUAL_REGISTERS)
8369 {
8370 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8371 OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8372 }
8373 else
8374 OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8375 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8376
8377 if (!common->endonly)
8378 compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
8379 else
8380 {
8381 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8382 check_partial(common, FALSE);
8383 }
8384 return cc;
8385
8386 case OP_DOLLM:
8387 jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
8388 if (HAS_VIRTUAL_REGISTERS)
8389 {
8390 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8391 OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8392 }
8393 else
8394 OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8395 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8396 check_partial(common, FALSE);
8397 jump[0] = JUMP(SLJIT_JUMP);
8398 JUMPHERE(jump[1]);
8399
8400 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8401 {
8402 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8403 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8404 if (common->mode == PCRE2_JIT_COMPLETE)
8405 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
8406 else
8407 {
8408 jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
8409 /* STR_PTR = STR_END - IN_UCHARS(1) */
8410 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8411 check_partial(common, TRUE);
8412 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8413 JUMPHERE(jump[1]);
8414 }
8415
8416 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8417 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8418 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8419 }
8420 else
8421 {
8422 peek_char(common, common->nlmax, TMP3, 0, NULL);
8423 check_newlinechar(common, common->nltype, backtracks, FALSE);
8424 }
8425 JUMPHERE(jump[0]);
8426 return cc;
8427
8428 case OP_CIRC:
8429 if (HAS_VIRTUAL_REGISTERS)
8430 {
8431 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8432 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
8433 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8434 OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8435 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8436 }
8437 else
8438 {
8439 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8440 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8441 OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8442 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8443 }
8444 return cc;
8445
8446 case OP_CIRCM:
8447 /* TMP2 might be used by peek_char_back. */
8448 if (HAS_VIRTUAL_REGISTERS)
8449 {
8450 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8451 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8452 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8453 OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8454 }
8455 else
8456 {
8457 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8458 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8459 OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8460 }
8461 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8462 jump[0] = JUMP(SLJIT_JUMP);
8463 JUMPHERE(jump[1]);
8464
8465 if (!common->alt_circumflex)
8466 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8467
8468 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8469 {
8470 OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8471 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, TMP2, 0));
8472 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
8473 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
8474 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8475 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8476 }
8477 else
8478 {
8479 peek_char_back(common, common->nlmax, backtracks);
8480 check_newlinechar(common, common->nltype, backtracks, FALSE);
8481 }
8482 JUMPHERE(jump[0]);
8483 return cc;
8484
8485 case OP_REVERSE:
8486 length = GET(cc, 0);
8487 if (length == 0)
8488 return cc + LINK_SIZE;
8489 if (HAS_VIRTUAL_REGISTERS)
8490 {
8491 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8492 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8493 }
8494 else
8495 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8496 #ifdef SUPPORT_UNICODE
8497 if (common->utf)
8498 {
8499 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, length);
8500 label = LABEL();
8501 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0));
8502 move_back(common, backtracks, FALSE);
8503 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
8504 JUMPTO(SLJIT_NOT_ZERO, label);
8505 }
8506 else
8507 #endif
8508 {
8509 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
8510 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0));
8511 }
8512 check_start_used_ptr(common);
8513 return cc + LINK_SIZE;
8514 }
8515 SLJIT_UNREACHABLE();
8516 return cc;
8517 }
8518
8519 #ifdef SUPPORT_UNICODE
8520
8521 #if PCRE2_CODE_UNIT_WIDTH != 32
8522
do_extuni_utf(jit_arguments * args,PCRE2_SPTR cc)8523 static PCRE2_SPTR SLJIT_FUNC do_extuni_utf(jit_arguments *args, PCRE2_SPTR cc)
8524 {
8525 PCRE2_SPTR start_subject = args->begin;
8526 PCRE2_SPTR end_subject = args->end;
8527 int lgb, rgb, ricount;
8528 PCRE2_SPTR prevcc, endcc, bptr;
8529 BOOL first = TRUE;
8530 uint32_t c;
8531
8532 prevcc = cc;
8533 endcc = NULL;
8534 do
8535 {
8536 GETCHARINC(c, cc);
8537 rgb = UCD_GRAPHBREAK(c);
8538
8539 if (first)
8540 {
8541 lgb = rgb;
8542 endcc = cc;
8543 first = FALSE;
8544 continue;
8545 }
8546
8547 if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8548 break;
8549
8550 /* Not breaking between Regional Indicators is allowed only if there
8551 are an even number of preceding RIs. */
8552
8553 if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
8554 {
8555 ricount = 0;
8556 bptr = prevcc;
8557
8558 /* bptr is pointing to the left-hand character */
8559 while (bptr > start_subject)
8560 {
8561 bptr--;
8562 BACKCHAR(bptr);
8563 GETCHAR(c, bptr);
8564
8565 if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator)
8566 break;
8567
8568 ricount++;
8569 }
8570
8571 if ((ricount & 1) != 0) break; /* Grapheme break required */
8572 }
8573
8574 /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8575 allows any number of them before a following Extended_Pictographic. */
8576
8577 if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8578 lgb != ucp_gbExtended_Pictographic)
8579 lgb = rgb;
8580
8581 prevcc = endcc;
8582 endcc = cc;
8583 }
8584 while (cc < end_subject);
8585
8586 return endcc;
8587 }
8588
8589 #endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
8590
do_extuni_utf_invalid(jit_arguments * args,PCRE2_SPTR cc)8591 static PCRE2_SPTR SLJIT_FUNC do_extuni_utf_invalid(jit_arguments *args, PCRE2_SPTR cc)
8592 {
8593 PCRE2_SPTR start_subject = args->begin;
8594 PCRE2_SPTR end_subject = args->end;
8595 int lgb, rgb, ricount;
8596 PCRE2_SPTR prevcc, endcc, bptr;
8597 BOOL first = TRUE;
8598 uint32_t c;
8599
8600 prevcc = cc;
8601 endcc = NULL;
8602 do
8603 {
8604 GETCHARINC_INVALID(c, cc, end_subject, break);
8605 rgb = UCD_GRAPHBREAK(c);
8606
8607 if (first)
8608 {
8609 lgb = rgb;
8610 endcc = cc;
8611 first = FALSE;
8612 continue;
8613 }
8614
8615 if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8616 break;
8617
8618 /* Not breaking between Regional Indicators is allowed only if there
8619 are an even number of preceding RIs. */
8620
8621 if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
8622 {
8623 ricount = 0;
8624 bptr = prevcc;
8625
8626 /* bptr is pointing to the left-hand character */
8627 while (bptr > start_subject)
8628 {
8629 GETCHARBACK_INVALID(c, bptr, start_subject, break);
8630
8631 if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator)
8632 break;
8633
8634 ricount++;
8635 }
8636
8637 if ((ricount & 1) != 0)
8638 break; /* Grapheme break required */
8639 }
8640
8641 /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8642 allows any number of them before a following Extended_Pictographic. */
8643
8644 if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8645 lgb != ucp_gbExtended_Pictographic)
8646 lgb = rgb;
8647
8648 prevcc = endcc;
8649 endcc = cc;
8650 }
8651 while (cc < end_subject);
8652
8653 return endcc;
8654 }
8655
do_extuni_no_utf(jit_arguments * args,PCRE2_SPTR cc)8656 static PCRE2_SPTR SLJIT_FUNC do_extuni_no_utf(jit_arguments *args, PCRE2_SPTR cc)
8657 {
8658 PCRE2_SPTR start_subject = args->begin;
8659 PCRE2_SPTR end_subject = args->end;
8660 int lgb, rgb, ricount;
8661 PCRE2_SPTR bptr;
8662 uint32_t c;
8663
8664 /* Patch by PH */
8665 /* GETCHARINC(c, cc); */
8666 c = *cc++;
8667
8668 #if PCRE2_CODE_UNIT_WIDTH == 32
8669 if (c >= 0x110000)
8670 return NULL;
8671 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8672 lgb = UCD_GRAPHBREAK(c);
8673
8674 while (cc < end_subject)
8675 {
8676 c = *cc;
8677 #if PCRE2_CODE_UNIT_WIDTH == 32
8678 if (c >= 0x110000)
8679 break;
8680 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8681 rgb = UCD_GRAPHBREAK(c);
8682
8683 if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8684 break;
8685
8686 /* Not breaking between Regional Indicators is allowed only if there
8687 are an even number of preceding RIs. */
8688
8689 if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
8690 {
8691 ricount = 0;
8692 bptr = cc - 1;
8693
8694 /* bptr is pointing to the left-hand character */
8695 while (bptr > start_subject)
8696 {
8697 bptr--;
8698 c = *bptr;
8699 #if PCRE2_CODE_UNIT_WIDTH == 32
8700 if (c >= 0x110000)
8701 break;
8702 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8703
8704 if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break;
8705
8706 ricount++;
8707 }
8708
8709 if ((ricount & 1) != 0)
8710 break; /* Grapheme break required */
8711 }
8712
8713 /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8714 allows any number of them before a following Extended_Pictographic. */
8715
8716 if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8717 lgb != ucp_gbExtended_Pictographic)
8718 lgb = rgb;
8719
8720 cc++;
8721 }
8722
8723 return cc;
8724 }
8725
8726 #endif /* SUPPORT_UNICODE */
8727
compile_char1_matchingpath(compiler_common * common,PCRE2_UCHAR type,PCRE2_SPTR cc,jump_list ** backtracks,BOOL check_str_ptr)8728 static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr)
8729 {
8730 DEFINE_COMPILER;
8731 int length;
8732 unsigned int c, oc, bit;
8733 compare_context context;
8734 struct sljit_jump *jump[3];
8735 jump_list *end_list;
8736 #ifdef SUPPORT_UNICODE
8737 PCRE2_UCHAR propdata[5];
8738 #endif /* SUPPORT_UNICODE */
8739
8740 switch(type)
8741 {
8742 case OP_NOT_DIGIT:
8743 case OP_DIGIT:
8744 /* Digits are usually 0-9, so it is worth to optimize them. */
8745 if (check_str_ptr)
8746 detect_partial_match(common, backtracks);
8747 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8748 if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_digit, FALSE))
8749 read_char7_type(common, backtracks, type == OP_NOT_DIGIT);
8750 else
8751 #endif
8752 read_char8_type(common, backtracks, type == OP_NOT_DIGIT);
8753 /* Flip the starting bit in the negative case. */
8754 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_digit);
8755 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8756 return cc;
8757
8758 case OP_NOT_WHITESPACE:
8759 case OP_WHITESPACE:
8760 if (check_str_ptr)
8761 detect_partial_match(common, backtracks);
8762 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8763 if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_space, FALSE))
8764 read_char7_type(common, backtracks, type == OP_NOT_WHITESPACE);
8765 else
8766 #endif
8767 read_char8_type(common, backtracks, type == OP_NOT_WHITESPACE);
8768 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_space);
8769 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8770 return cc;
8771
8772 case OP_NOT_WORDCHAR:
8773 case OP_WORDCHAR:
8774 if (check_str_ptr)
8775 detect_partial_match(common, backtracks);
8776 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8777 if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_word, FALSE))
8778 read_char7_type(common, backtracks, type == OP_NOT_WORDCHAR);
8779 else
8780 #endif
8781 read_char8_type(common, backtracks, type == OP_NOT_WORDCHAR);
8782 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_word);
8783 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8784 return cc;
8785
8786 case OP_ANY:
8787 if (check_str_ptr)
8788 detect_partial_match(common, backtracks);
8789 read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
8790 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8791 {
8792 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
8793 end_list = NULL;
8794 if (common->mode != PCRE2_JIT_PARTIAL_HARD)
8795 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8796 else
8797 check_str_end(common, &end_list);
8798
8799 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8800 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
8801 set_jumps(end_list, LABEL());
8802 JUMPHERE(jump[0]);
8803 }
8804 else
8805 check_newlinechar(common, common->nltype, backtracks, TRUE);
8806 return cc;
8807
8808 case OP_ALLANY:
8809 if (check_str_ptr)
8810 detect_partial_match(common, backtracks);
8811 #ifdef SUPPORT_UNICODE
8812 if (common->utf)
8813 {
8814 if (common->invalid_utf)
8815 {
8816 read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR);
8817 return cc;
8818 }
8819
8820 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
8821 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8822 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8823 #if PCRE2_CODE_UNIT_WIDTH == 8
8824 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
8825 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
8826 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8827 #elif PCRE2_CODE_UNIT_WIDTH == 16
8828 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
8829 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
8830 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xd800);
8831 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
8832 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8833 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8834 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
8835 JUMPHERE(jump[0]);
8836 return cc;
8837 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
8838 }
8839 #endif /* SUPPORT_UNICODE */
8840 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8841 return cc;
8842
8843 case OP_ANYBYTE:
8844 if (check_str_ptr)
8845 detect_partial_match(common, backtracks);
8846 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8847 return cc;
8848
8849 #ifdef SUPPORT_UNICODE
8850 case OP_NOTPROP:
8851 case OP_PROP:
8852 propdata[0] = XCL_HASPROP;
8853 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
8854 propdata[2] = cc[0];
8855 propdata[3] = cc[1];
8856 propdata[4] = XCL_END;
8857 if (check_str_ptr)
8858 detect_partial_match(common, backtracks);
8859 compile_xclass_matchingpath(common, propdata, backtracks);
8860 return cc + 2;
8861 #endif
8862
8863 case OP_ANYNL:
8864 if (check_str_ptr)
8865 detect_partial_match(common, backtracks);
8866 read_char(common, common->bsr_nlmin, common->bsr_nlmax, NULL, 0);
8867 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
8868 /* We don't need to handle soft partial matching case. */
8869 end_list = NULL;
8870 if (common->mode != PCRE2_JIT_PARTIAL_HARD)
8871 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8872 else
8873 check_str_end(common, &end_list);
8874 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8875 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
8876 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8877 jump[2] = JUMP(SLJIT_JUMP);
8878 JUMPHERE(jump[0]);
8879 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
8880 set_jumps(end_list, LABEL());
8881 JUMPHERE(jump[1]);
8882 JUMPHERE(jump[2]);
8883 return cc;
8884
8885 case OP_NOT_HSPACE:
8886 case OP_HSPACE:
8887 if (check_str_ptr)
8888 detect_partial_match(common, backtracks);
8889
8890 if (type == OP_NOT_HSPACE)
8891 read_char(common, 0x9, 0x3000, backtracks, READ_CHAR_UPDATE_STR_PTR);
8892 else
8893 read_char(common, 0x9, 0x3000, NULL, 0);
8894
8895 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
8896 sljit_set_current_flags(compiler, SLJIT_SET_Z);
8897 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8898 return cc;
8899
8900 case OP_NOT_VSPACE:
8901 case OP_VSPACE:
8902 if (check_str_ptr)
8903 detect_partial_match(common, backtracks);
8904
8905 if (type == OP_NOT_VSPACE)
8906 read_char(common, 0xa, 0x2029, backtracks, READ_CHAR_UPDATE_STR_PTR);
8907 else
8908 read_char(common, 0xa, 0x2029, NULL, 0);
8909
8910 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
8911 sljit_set_current_flags(compiler, SLJIT_SET_Z);
8912 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8913 return cc;
8914
8915 #ifdef SUPPORT_UNICODE
8916 case OP_EXTUNI:
8917 if (check_str_ptr)
8918 detect_partial_match(common, backtracks);
8919
8920 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
8921 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
8922
8923 #if PCRE2_CODE_UNIT_WIDTH != 32
8924 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
8925 common->utf ? (common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_utf)) : SLJIT_FUNC_ADDR(do_extuni_no_utf));
8926 if (common->invalid_utf)
8927 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
8928 #else
8929 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
8930 common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_no_utf));
8931 if (!common->utf || common->invalid_utf)
8932 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
8933 #endif
8934
8935 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
8936
8937 if (common->mode == PCRE2_JIT_PARTIAL_HARD)
8938 {
8939 jump[0] = CMP(SLJIT_LESS, SLJIT_RETURN_REG, 0, STR_END, 0);
8940 /* Since we successfully read a char above, partial matching must occure. */
8941 check_partial(common, TRUE);
8942 JUMPHERE(jump[0]);
8943 }
8944 return cc;
8945 #endif
8946
8947 case OP_CHAR:
8948 case OP_CHARI:
8949 length = 1;
8950 #ifdef SUPPORT_UNICODE
8951 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
8952 #endif
8953
8954 if (check_str_ptr && common->mode != PCRE2_JIT_COMPLETE)
8955 detect_partial_match(common, backtracks);
8956
8957 if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)
8958 {
8959 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
8960 if (length > 1 || (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE))
8961 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
8962
8963 context.length = IN_UCHARS(length);
8964 context.sourcereg = -1;
8965 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
8966 context.ucharptr = 0;
8967 #endif
8968 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
8969 }
8970
8971 #ifdef SUPPORT_UNICODE
8972 if (common->utf)
8973 {
8974 GETCHAR(c, cc);
8975 }
8976 else
8977 #endif
8978 c = *cc;
8979
8980 SLJIT_ASSERT(type == OP_CHARI && char_has_othercase(common, cc));
8981
8982 if (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE)
8983 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8984
8985 oc = char_othercase(common, c);
8986 read_char(common, c < oc ? c : oc, c > oc ? c : oc, NULL, 0);
8987
8988 SLJIT_ASSERT(!is_powerof2(c ^ oc));
8989
8990 if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
8991 {
8992 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, oc);
8993 CMOV(SLJIT_EQUAL, TMP1, SLJIT_IMM, c);
8994 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8995 }
8996 else
8997 {
8998 jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
8999 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
9000 JUMPHERE(jump[0]);
9001 }
9002 return cc + length;
9003
9004 case OP_NOT:
9005 case OP_NOTI:
9006 if (check_str_ptr)
9007 detect_partial_match(common, backtracks);
9008
9009 length = 1;
9010 #ifdef SUPPORT_UNICODE
9011 if (common->utf)
9012 {
9013 #if PCRE2_CODE_UNIT_WIDTH == 8
9014 c = *cc;
9015 if (c < 128 && !common->invalid_utf)
9016 {
9017 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
9018 if (type == OP_NOT || !char_has_othercase(common, cc))
9019 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9020 else
9021 {
9022 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
9023 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
9024 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
9025 }
9026 /* Skip the variable-length character. */
9027 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9028 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
9029 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
9030 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
9031 JUMPHERE(jump[0]);
9032 return cc + 1;
9033 }
9034 else
9035 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
9036 {
9037 GETCHARLEN(c, cc, length);
9038 }
9039 }
9040 else
9041 #endif /* SUPPORT_UNICODE */
9042 c = *cc;
9043
9044 if (type == OP_NOT || !char_has_othercase(common, cc))
9045 {
9046 read_char(common, c, c, backtracks, READ_CHAR_UPDATE_STR_PTR);
9047 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9048 }
9049 else
9050 {
9051 oc = char_othercase(common, c);
9052 read_char(common, c < oc ? c : oc, c > oc ? c : oc, backtracks, READ_CHAR_UPDATE_STR_PTR);
9053 bit = c ^ oc;
9054 if (is_powerof2(bit))
9055 {
9056 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
9057 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
9058 }
9059 else
9060 {
9061 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9062 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
9063 }
9064 }
9065 return cc + length;
9066
9067 case OP_CLASS:
9068 case OP_NCLASS:
9069 if (check_str_ptr)
9070 detect_partial_match(common, backtracks);
9071
9072 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
9073 bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
9074 if (type == OP_NCLASS)
9075 read_char(common, 0, bit, backtracks, READ_CHAR_UPDATE_STR_PTR);
9076 else
9077 read_char(common, 0, bit, NULL, 0);
9078 #else
9079 if (type == OP_NCLASS)
9080 read_char(common, 0, 255, backtracks, READ_CHAR_UPDATE_STR_PTR);
9081 else
9082 read_char(common, 0, 255, NULL, 0);
9083 #endif
9084
9085 if (optimize_class(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
9086 return cc + 32 / sizeof(PCRE2_UCHAR);
9087
9088 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
9089 jump[0] = NULL;
9090 if (common->utf)
9091 {
9092 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
9093 if (type == OP_CLASS)
9094 {
9095 add_jump(compiler, backtracks, jump[0]);
9096 jump[0] = NULL;
9097 }
9098 }
9099 #elif PCRE2_CODE_UNIT_WIDTH != 8
9100 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
9101 if (type == OP_CLASS)
9102 {
9103 add_jump(compiler, backtracks, jump[0]);
9104 jump[0] = NULL;
9105 }
9106 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
9107
9108 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
9109 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
9110 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
9111 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
9112 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
9113 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
9114
9115 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
9116 if (jump[0] != NULL)
9117 JUMPHERE(jump[0]);
9118 #endif
9119 return cc + 32 / sizeof(PCRE2_UCHAR);
9120
9121 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
9122 case OP_XCLASS:
9123 if (check_str_ptr)
9124 detect_partial_match(common, backtracks);
9125 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
9126 return cc + GET(cc, 0) - 1;
9127 #endif
9128 }
9129 SLJIT_UNREACHABLE();
9130 return cc;
9131 }
9132
compile_charn_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,jump_list ** backtracks)9133 static SLJIT_INLINE PCRE2_SPTR compile_charn_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, jump_list **backtracks)
9134 {
9135 /* This function consumes at least one input character. */
9136 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
9137 DEFINE_COMPILER;
9138 PCRE2_SPTR ccbegin = cc;
9139 compare_context context;
9140 int size;
9141
9142 context.length = 0;
9143 do
9144 {
9145 if (cc >= ccend)
9146 break;
9147
9148 if (*cc == OP_CHAR)
9149 {
9150 size = 1;
9151 #ifdef SUPPORT_UNICODE
9152 if (common->utf && HAS_EXTRALEN(cc[1]))
9153 size += GET_EXTRALEN(cc[1]);
9154 #endif
9155 }
9156 else if (*cc == OP_CHARI)
9157 {
9158 size = 1;
9159 #ifdef SUPPORT_UNICODE
9160 if (common->utf)
9161 {
9162 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
9163 size = 0;
9164 else if (HAS_EXTRALEN(cc[1]))
9165 size += GET_EXTRALEN(cc[1]);
9166 }
9167 else
9168 #endif
9169 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
9170 size = 0;
9171 }
9172 else
9173 size = 0;
9174
9175 cc += 1 + size;
9176 context.length += IN_UCHARS(size);
9177 }
9178 while (size > 0 && context.length <= 128);
9179
9180 cc = ccbegin;
9181 if (context.length > 0)
9182 {
9183 /* We have a fixed-length byte sequence. */
9184 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
9185 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
9186
9187 context.sourcereg = -1;
9188 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
9189 context.ucharptr = 0;
9190 #endif
9191 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
9192 return cc;
9193 }
9194
9195 /* A non-fixed length character will be checked if length == 0. */
9196 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
9197 }
9198
9199 /* Forward definitions. */
9200 static void compile_matchingpath(compiler_common *, PCRE2_SPTR, PCRE2_SPTR, backtrack_common *);
9201 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
9202
9203 #define PUSH_BACKTRACK(size, ccstart, error) \
9204 do \
9205 { \
9206 backtrack = sljit_alloc_memory(compiler, (size)); \
9207 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9208 return error; \
9209 memset(backtrack, 0, size); \
9210 backtrack->prev = parent->top; \
9211 backtrack->cc = (ccstart); \
9212 parent->top = backtrack; \
9213 } \
9214 while (0)
9215
9216 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
9217 do \
9218 { \
9219 backtrack = sljit_alloc_memory(compiler, (size)); \
9220 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9221 return; \
9222 memset(backtrack, 0, size); \
9223 backtrack->prev = parent->top; \
9224 backtrack->cc = (ccstart); \
9225 parent->top = backtrack; \
9226 } \
9227 while (0)
9228
9229 #define BACKTRACK_AS(type) ((type *)backtrack)
9230
compile_dnref_search(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks)9231 static void compile_dnref_search(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
9232 {
9233 /* The OVECTOR offset goes to TMP2. */
9234 DEFINE_COMPILER;
9235 int count = GET2(cc, 1 + IMM2_SIZE);
9236 PCRE2_SPTR slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
9237 unsigned int offset;
9238 jump_list *found = NULL;
9239
9240 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
9241
9242 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
9243
9244 count--;
9245 while (count-- > 0)
9246 {
9247 offset = GET2(slot, 0) << 1;
9248 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9249 add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9250 slot += common->name_entry_size;
9251 }
9252
9253 offset = GET2(slot, 0) << 1;
9254 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9255 if (backtracks != NULL && !common->unset_backref)
9256 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9257
9258 set_jumps(found, LABEL());
9259 }
9260
compile_ref_matchingpath(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks,BOOL withchecks,BOOL emptyfail)9261 static void compile_ref_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
9262 {
9263 DEFINE_COMPILER;
9264 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9265 int offset = 0;
9266 struct sljit_jump *jump = NULL;
9267 struct sljit_jump *partial;
9268 struct sljit_jump *nopartial;
9269 #if defined SUPPORT_UNICODE
9270 struct sljit_label *loop;
9271 struct sljit_label *caseless_loop;
9272 jump_list *no_match = NULL;
9273 int source_reg = COUNT_MATCH;
9274 int source_end_reg = ARGUMENTS;
9275 int char1_reg = STACK_LIMIT;
9276 #endif /* SUPPORT_UNICODE */
9277
9278 if (ref)
9279 {
9280 offset = GET2(cc, 1) << 1;
9281 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9282 /* OVECTOR(1) contains the "string begin - 1" constant. */
9283 if (withchecks && !common->unset_backref)
9284 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9285 }
9286 else
9287 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9288
9289 #if defined SUPPORT_UNICODE
9290 if (common->utf && *cc == OP_REFI)
9291 {
9292 SLJIT_ASSERT(common->iref_ptr != 0);
9293
9294 if (ref)
9295 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9296 else
9297 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9298
9299 if (withchecks && emptyfail)
9300 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0));
9301
9302 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr, source_reg, 0);
9303 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw), source_end_reg, 0);
9304 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2, char1_reg, 0);
9305
9306 OP1(SLJIT_MOV, source_reg, 0, TMP1, 0);
9307 OP1(SLJIT_MOV, source_end_reg, 0, TMP2, 0);
9308
9309 loop = LABEL();
9310 jump = CMP(SLJIT_GREATER_EQUAL, source_reg, 0, source_end_reg, 0);
9311 partial = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
9312
9313 /* Read original character. It must be a valid UTF character. */
9314 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
9315 OP1(SLJIT_MOV, STR_PTR, 0, source_reg, 0);
9316
9317 read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR | READ_CHAR_VALID_UTF);
9318
9319 OP1(SLJIT_MOV, source_reg, 0, STR_PTR, 0);
9320 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
9321 OP1(SLJIT_MOV, char1_reg, 0, TMP1, 0);
9322
9323 /* Read second character. */
9324 read_char(common, 0, READ_CHAR_MAX, &no_match, READ_CHAR_UPDATE_STR_PTR);
9325
9326 CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9327
9328 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
9329
9330 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
9331
9332 OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
9333 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
9334 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
9335
9336 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records));
9337
9338 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, other_case));
9339 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, caseset));
9340 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
9341 CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9342
9343 add_jump(compiler, &no_match, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9344 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
9345 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_caseless_sets));
9346
9347 caseless_loop = LABEL();
9348 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9349 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(uint32_t));
9350 OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, TMP1, 0, char1_reg, 0);
9351 JUMPTO(SLJIT_EQUAL, loop);
9352 JUMPTO(SLJIT_LESS, caseless_loop);
9353
9354 set_jumps(no_match, LABEL());
9355 if (common->mode == PCRE2_JIT_COMPLETE)
9356 JUMPHERE(partial);
9357
9358 OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9359 OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9360 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9361 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9362
9363 if (common->mode != PCRE2_JIT_COMPLETE)
9364 {
9365 JUMPHERE(partial);
9366 OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9367 OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9368 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9369
9370 check_partial(common, FALSE);
9371 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9372 }
9373
9374 JUMPHERE(jump);
9375 OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9376 OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9377 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9378 return;
9379 }
9380 else
9381 #endif /* SUPPORT_UNICODE */
9382 {
9383 if (ref)
9384 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
9385 else
9386 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
9387
9388 if (withchecks)
9389 jump = JUMP(SLJIT_ZERO);
9390
9391 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
9392 partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
9393 if (common->mode == PCRE2_JIT_COMPLETE)
9394 add_jump(compiler, backtracks, partial);
9395
9396 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9397 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9398
9399 if (common->mode != PCRE2_JIT_COMPLETE)
9400 {
9401 nopartial = JUMP(SLJIT_JUMP);
9402 JUMPHERE(partial);
9403 /* TMP2 -= STR_END - STR_PTR */
9404 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
9405 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
9406 partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
9407 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
9408 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9409 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9410 JUMPHERE(partial);
9411 check_partial(common, FALSE);
9412 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9413 JUMPHERE(nopartial);
9414 }
9415 }
9416
9417 if (jump != NULL)
9418 {
9419 if (emptyfail)
9420 add_jump(compiler, backtracks, jump);
9421 else
9422 JUMPHERE(jump);
9423 }
9424 }
9425
compile_ref_iterator_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9426 static SLJIT_INLINE PCRE2_SPTR compile_ref_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9427 {
9428 DEFINE_COMPILER;
9429 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9430 backtrack_common *backtrack;
9431 PCRE2_UCHAR type;
9432 int offset = 0;
9433 struct sljit_label *label;
9434 struct sljit_jump *zerolength;
9435 struct sljit_jump *jump = NULL;
9436 PCRE2_SPTR ccbegin = cc;
9437 int min = 0, max = 0;
9438 BOOL minimize;
9439
9440 PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
9441
9442 if (ref)
9443 offset = GET2(cc, 1) << 1;
9444 else
9445 cc += IMM2_SIZE;
9446 type = cc[1 + IMM2_SIZE];
9447
9448 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
9449 minimize = (type & 0x1) != 0;
9450 switch(type)
9451 {
9452 case OP_CRSTAR:
9453 case OP_CRMINSTAR:
9454 min = 0;
9455 max = 0;
9456 cc += 1 + IMM2_SIZE + 1;
9457 break;
9458 case OP_CRPLUS:
9459 case OP_CRMINPLUS:
9460 min = 1;
9461 max = 0;
9462 cc += 1 + IMM2_SIZE + 1;
9463 break;
9464 case OP_CRQUERY:
9465 case OP_CRMINQUERY:
9466 min = 0;
9467 max = 1;
9468 cc += 1 + IMM2_SIZE + 1;
9469 break;
9470 case OP_CRRANGE:
9471 case OP_CRMINRANGE:
9472 min = GET2(cc, 1 + IMM2_SIZE + 1);
9473 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
9474 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
9475 break;
9476 default:
9477 SLJIT_UNREACHABLE();
9478 break;
9479 }
9480
9481 if (!minimize)
9482 {
9483 if (min == 0)
9484 {
9485 allocate_stack(common, 2);
9486 if (ref)
9487 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9488 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9489 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9490 /* Temporary release of STR_PTR. */
9491 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9492 /* Handles both invalid and empty cases. Since the minimum repeat,
9493 is zero the invalid case is basically the same as an empty case. */
9494 if (ref)
9495 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9496 else
9497 {
9498 compile_dnref_search(common, ccbegin, NULL);
9499 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9500 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9501 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9502 }
9503 /* Restore if not zero length. */
9504 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9505 }
9506 else
9507 {
9508 allocate_stack(common, 1);
9509 if (ref)
9510 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9511 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9512 if (ref)
9513 {
9514 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9515 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9516 }
9517 else
9518 {
9519 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
9520 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9521 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9522 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9523 }
9524 }
9525
9526 if (min > 1 || max > 1)
9527 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
9528
9529 label = LABEL();
9530 if (!ref)
9531 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
9532 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
9533
9534 if (min > 1 || max > 1)
9535 {
9536 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
9537 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9538 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
9539 if (min > 1)
9540 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
9541 if (max > 1)
9542 {
9543 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
9544 allocate_stack(common, 1);
9545 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9546 JUMPTO(SLJIT_JUMP, label);
9547 JUMPHERE(jump);
9548 }
9549 }
9550
9551 if (max == 0)
9552 {
9553 /* Includes min > 1 case as well. */
9554 allocate_stack(common, 1);
9555 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9556 JUMPTO(SLJIT_JUMP, label);
9557 }
9558
9559 JUMPHERE(zerolength);
9560 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9561
9562 count_match(common);
9563 return cc;
9564 }
9565
9566 allocate_stack(common, ref ? 2 : 3);
9567 if (ref)
9568 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9569 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9570 if (type != OP_CRMINSTAR)
9571 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9572
9573 if (min == 0)
9574 {
9575 /* Handles both invalid and empty cases. Since the minimum repeat,
9576 is zero the invalid case is basically the same as an empty case. */
9577 if (ref)
9578 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9579 else
9580 {
9581 compile_dnref_search(common, ccbegin, NULL);
9582 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9583 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9584 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9585 }
9586 /* Length is non-zero, we can match real repeats. */
9587 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9588 jump = JUMP(SLJIT_JUMP);
9589 }
9590 else
9591 {
9592 if (ref)
9593 {
9594 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9595 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9596 }
9597 else
9598 {
9599 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
9600 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9601 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9602 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9603 }
9604 }
9605
9606 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9607 if (max > 0)
9608 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
9609
9610 if (!ref)
9611 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
9612 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
9613 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9614
9615 if (min > 1)
9616 {
9617 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9618 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9619 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9620 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
9621 }
9622 else if (max > 0)
9623 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
9624
9625 if (jump != NULL)
9626 JUMPHERE(jump);
9627 JUMPHERE(zerolength);
9628
9629 count_match(common);
9630 return cc;
9631 }
9632
compile_recurse_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9633 static SLJIT_INLINE PCRE2_SPTR compile_recurse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9634 {
9635 DEFINE_COMPILER;
9636 backtrack_common *backtrack;
9637 recurse_entry *entry = common->entries;
9638 recurse_entry *prev = NULL;
9639 sljit_sw start = GET(cc, 1);
9640 PCRE2_SPTR start_cc;
9641 BOOL needs_control_head;
9642
9643 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
9644
9645 /* Inlining simple patterns. */
9646 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
9647 {
9648 start_cc = common->start + start;
9649 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
9650 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
9651 return cc + 1 + LINK_SIZE;
9652 }
9653
9654 while (entry != NULL)
9655 {
9656 if (entry->start == start)
9657 break;
9658 prev = entry;
9659 entry = entry->next;
9660 }
9661
9662 if (entry == NULL)
9663 {
9664 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
9665 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9666 return NULL;
9667 entry->next = NULL;
9668 entry->entry_label = NULL;
9669 entry->backtrack_label = NULL;
9670 entry->entry_calls = NULL;
9671 entry->backtrack_calls = NULL;
9672 entry->start = start;
9673
9674 if (prev != NULL)
9675 prev->next = entry;
9676 else
9677 common->entries = entry;
9678 }
9679
9680 BACKTRACK_AS(recurse_backtrack)->entry = entry;
9681
9682 if (entry->entry_label == NULL)
9683 add_jump(compiler, &entry->entry_calls, JUMP(SLJIT_FAST_CALL));
9684 else
9685 JUMPTO(SLJIT_FAST_CALL, entry->entry_label);
9686 /* Leave if the match is failed. */
9687 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
9688 BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL();
9689 return cc + 1 + LINK_SIZE;
9690 }
9691
do_callout(struct jit_arguments * arguments,pcre2_callout_block * callout_block,PCRE2_SPTR * jit_ovector)9692 static sljit_s32 SLJIT_FUNC do_callout(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
9693 {
9694 PCRE2_SPTR begin;
9695 PCRE2_SIZE *ovector;
9696 sljit_u32 oveccount, capture_top;
9697
9698 if (arguments->callout == NULL)
9699 return 0;
9700
9701 SLJIT_COMPILE_ASSERT(sizeof (PCRE2_SIZE) <= sizeof (sljit_sw), pcre2_size_must_be_lower_than_sljit_sw_size);
9702
9703 begin = arguments->begin;
9704 ovector = (PCRE2_SIZE*)(callout_block + 1);
9705 oveccount = callout_block->capture_top;
9706
9707 SLJIT_ASSERT(oveccount >= 1);
9708
9709 callout_block->version = 2;
9710 callout_block->callout_flags = 0;
9711
9712 /* Offsets in subject. */
9713 callout_block->subject_length = arguments->end - arguments->begin;
9714 callout_block->start_match = jit_ovector[0] - begin;
9715 callout_block->current_position = (PCRE2_SPTR)callout_block->offset_vector - begin;
9716 callout_block->subject = begin;
9717
9718 /* Convert and copy the JIT offset vector to the ovector array. */
9719 callout_block->capture_top = 1;
9720 callout_block->offset_vector = ovector;
9721
9722 ovector[0] = PCRE2_UNSET;
9723 ovector[1] = PCRE2_UNSET;
9724 ovector += 2;
9725 jit_ovector += 2;
9726 capture_top = 1;
9727
9728 /* Convert pointers to sizes. */
9729 while (--oveccount != 0)
9730 {
9731 capture_top++;
9732
9733 ovector[0] = (PCRE2_SIZE)(jit_ovector[0] - begin);
9734 ovector[1] = (PCRE2_SIZE)(jit_ovector[1] - begin);
9735
9736 if (ovector[0] != PCRE2_UNSET)
9737 callout_block->capture_top = capture_top;
9738
9739 ovector += 2;
9740 jit_ovector += 2;
9741 }
9742
9743 return (arguments->callout)(callout_block, arguments->callout_data);
9744 }
9745
9746 #define CALLOUT_ARG_OFFSET(arg) \
9747 SLJIT_OFFSETOF(pcre2_callout_block, arg)
9748
compile_callout_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9749 static SLJIT_INLINE PCRE2_SPTR compile_callout_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9750 {
9751 DEFINE_COMPILER;
9752 backtrack_common *backtrack;
9753 sljit_s32 mov_opcode;
9754 unsigned int callout_length = (*cc == OP_CALLOUT)
9755 ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);
9756 sljit_sw value1;
9757 sljit_sw value2;
9758 sljit_sw value3;
9759 sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * sizeof(sljit_sw);
9760
9761 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
9762
9763 callout_arg_size = (sizeof(pcre2_callout_block) + callout_arg_size + sizeof(sljit_sw) - 1) / sizeof(sljit_sw);
9764
9765 allocate_stack(common, callout_arg_size);
9766
9767 SLJIT_ASSERT(common->capture_last_ptr != 0);
9768 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
9769 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9770 value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0;
9771 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1);
9772 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
9773 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_top), SLJIT_IMM, common->re->top_bracket + 1);
9774
9775 /* These pointer sized fields temporarly stores internal variables. */
9776 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
9777
9778 if (common->mark_ptr != 0)
9779 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
9780 mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
9781 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 1));
9782 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 1 + LINK_SIZE));
9783
9784 if (*cc == OP_CALLOUT)
9785 {
9786 value1 = 0;
9787 value2 = 0;
9788 value3 = 0;
9789 }
9790 else
9791 {
9792 value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1);
9793 value2 = (callout_length - (1 + 4*LINK_SIZE + 2));
9794 value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE));
9795 }
9796
9797 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);
9798 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);
9799 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3);
9800 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
9801
9802 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
9803
9804 /* Needed to save important temporary registers. */
9805 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
9806 /* SLJIT_R0 = arguments */
9807 OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
9808 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
9809 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(32, W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_callout));
9810 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
9811 free_stack(common, callout_arg_size);
9812
9813 /* Check return value. */
9814 OP2U(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
9815 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER));
9816 if (common->abort_label == NULL)
9817 add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL) /* SIG_LESS */);
9818 else
9819 JUMPTO(SLJIT_NOT_EQUAL /* SIG_LESS */, common->abort_label);
9820 return cc + callout_length;
9821 }
9822
9823 #undef CALLOUT_ARG_SIZE
9824 #undef CALLOUT_ARG_OFFSET
9825
assert_needs_str_ptr_saving(PCRE2_SPTR cc)9826 static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(PCRE2_SPTR cc)
9827 {
9828 while (TRUE)
9829 {
9830 switch (*cc)
9831 {
9832 case OP_CALLOUT_STR:
9833 cc += GET(cc, 1 + 2*LINK_SIZE);
9834 break;
9835
9836 case OP_NOT_WORD_BOUNDARY:
9837 case OP_WORD_BOUNDARY:
9838 case OP_CIRC:
9839 case OP_CIRCM:
9840 case OP_DOLL:
9841 case OP_DOLLM:
9842 case OP_CALLOUT:
9843 case OP_ALT:
9844 cc += PRIV(OP_lengths)[*cc];
9845 break;
9846
9847 case OP_KET:
9848 return FALSE;
9849
9850 default:
9851 return TRUE;
9852 }
9853 }
9854 }
9855
compile_assert_matchingpath(compiler_common * common,PCRE2_SPTR cc,assert_backtrack * backtrack,BOOL conditional)9856 static PCRE2_SPTR compile_assert_matchingpath(compiler_common *common, PCRE2_SPTR cc, assert_backtrack *backtrack, BOOL conditional)
9857 {
9858 DEFINE_COMPILER;
9859 int framesize;
9860 int extrasize;
9861 BOOL local_quit_available = FALSE;
9862 BOOL needs_control_head;
9863 int private_data_ptr;
9864 backtrack_common altbacktrack;
9865 PCRE2_SPTR ccbegin;
9866 PCRE2_UCHAR opcode;
9867 PCRE2_UCHAR bra = OP_BRA;
9868 jump_list *tmp = NULL;
9869 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
9870 jump_list **found;
9871 /* Saving previous accept variables. */
9872 BOOL save_local_quit_available = common->local_quit_available;
9873 BOOL save_in_positive_assertion = common->in_positive_assertion;
9874 then_trap_backtrack *save_then_trap = common->then_trap;
9875 struct sljit_label *save_quit_label = common->quit_label;
9876 struct sljit_label *save_accept_label = common->accept_label;
9877 jump_list *save_quit = common->quit;
9878 jump_list *save_positive_assertion_quit = common->positive_assertion_quit;
9879 jump_list *save_accept = common->accept;
9880 struct sljit_jump *jump;
9881 struct sljit_jump *brajump = NULL;
9882
9883 /* Assert captures then. */
9884 common->then_trap = NULL;
9885
9886 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
9887 {
9888 SLJIT_ASSERT(!conditional);
9889 bra = *cc;
9890 cc++;
9891 }
9892 private_data_ptr = PRIVATE_DATA(cc);
9893 SLJIT_ASSERT(private_data_ptr != 0);
9894 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
9895 backtrack->framesize = framesize;
9896 backtrack->private_data_ptr = private_data_ptr;
9897 opcode = *cc;
9898 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
9899 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
9900 ccbegin = cc;
9901 cc += GET(cc, 1);
9902
9903 if (bra == OP_BRAMINZERO)
9904 {
9905 /* This is a braminzero backtrack path. */
9906 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9907 free_stack(common, 1);
9908 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9909 }
9910
9911 if (framesize < 0)
9912 {
9913 extrasize = 1;
9914 if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
9915 extrasize = 0;
9916
9917 if (needs_control_head)
9918 extrasize++;
9919
9920 if (framesize == no_frame)
9921 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
9922
9923 if (extrasize > 0)
9924 allocate_stack(common, extrasize);
9925
9926 if (needs_control_head)
9927 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9928
9929 if (extrasize > 0)
9930 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9931
9932 if (needs_control_head)
9933 {
9934 SLJIT_ASSERT(extrasize == 2);
9935 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9936 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9937 }
9938 }
9939 else
9940 {
9941 extrasize = needs_control_head ? 3 : 2;
9942 allocate_stack(common, framesize + extrasize);
9943
9944 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9945 OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
9946 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
9947 if (needs_control_head)
9948 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9949 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9950
9951 if (needs_control_head)
9952 {
9953 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
9954 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
9955 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9956 }
9957 else
9958 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9959
9960 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize);
9961 }
9962
9963 memset(&altbacktrack, 0, sizeof(backtrack_common));
9964 if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT))
9965 {
9966 /* Control verbs cannot escape from these asserts. */
9967 local_quit_available = TRUE;
9968 common->local_quit_available = TRUE;
9969 common->quit_label = NULL;
9970 common->quit = NULL;
9971 }
9972
9973 common->in_positive_assertion = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK);
9974 common->positive_assertion_quit = NULL;
9975
9976 while (1)
9977 {
9978 common->accept_label = NULL;
9979 common->accept = NULL;
9980 altbacktrack.top = NULL;
9981 altbacktrack.topbacktracks = NULL;
9982
9983 if (*ccbegin == OP_ALT && extrasize > 0)
9984 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9985
9986 altbacktrack.cc = ccbegin;
9987 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
9988 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9989 {
9990 if (local_quit_available)
9991 {
9992 common->local_quit_available = save_local_quit_available;
9993 common->quit_label = save_quit_label;
9994 common->quit = save_quit;
9995 }
9996 common->in_positive_assertion = save_in_positive_assertion;
9997 common->then_trap = save_then_trap;
9998 common->accept_label = save_accept_label;
9999 common->positive_assertion_quit = save_positive_assertion_quit;
10000 common->accept = save_accept;
10001 return NULL;
10002 }
10003 common->accept_label = LABEL();
10004 if (common->accept != NULL)
10005 set_jumps(common->accept, common->accept_label);
10006
10007 /* Reset stack. */
10008 if (framesize < 0)
10009 {
10010 if (framesize == no_frame)
10011 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10012 else if (extrasize > 0)
10013 free_stack(common, extrasize);
10014
10015 if (needs_control_head)
10016 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10017 }
10018 else
10019 {
10020 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
10021 {
10022 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10023 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
10024 if (needs_control_head)
10025 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10026 }
10027 else
10028 {
10029 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10030 if (needs_control_head)
10031 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));
10032 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10033 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
10034 }
10035 }
10036
10037 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
10038 {
10039 /* We know that STR_PTR was stored on the top of the stack. */
10040 if (conditional)
10041 {
10042 if (extrasize > 0)
10043 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? STACK(-2) : STACK(-1));
10044 }
10045 else if (bra == OP_BRAZERO)
10046 {
10047 if (framesize < 0)
10048 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
10049 else
10050 {
10051 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
10052 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize));
10053 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10054 }
10055 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10056 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10057 }
10058 else if (framesize >= 0)
10059 {
10060 /* For OP_BRA and OP_BRAMINZERO. */
10061 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
10062 }
10063 }
10064 add_jump(compiler, found, JUMP(SLJIT_JUMP));
10065
10066 compile_backtrackingpath(common, altbacktrack.top);
10067 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10068 {
10069 if (local_quit_available)
10070 {
10071 common->local_quit_available = save_local_quit_available;
10072 common->quit_label = save_quit_label;
10073 common->quit = save_quit;
10074 }
10075 common->in_positive_assertion = save_in_positive_assertion;
10076 common->then_trap = save_then_trap;
10077 common->accept_label = save_accept_label;
10078 common->positive_assertion_quit = save_positive_assertion_quit;
10079 common->accept = save_accept;
10080 return NULL;
10081 }
10082 set_jumps(altbacktrack.topbacktracks, LABEL());
10083
10084 if (*cc != OP_ALT)
10085 break;
10086
10087 ccbegin = cc;
10088 cc += GET(cc, 1);
10089 }
10090
10091 if (local_quit_available)
10092 {
10093 SLJIT_ASSERT(common->positive_assertion_quit == NULL);
10094 /* Makes the check less complicated below. */
10095 common->positive_assertion_quit = common->quit;
10096 }
10097
10098 /* None of them matched. */
10099 if (common->positive_assertion_quit != NULL)
10100 {
10101 jump = JUMP(SLJIT_JUMP);
10102 set_jumps(common->positive_assertion_quit, LABEL());
10103 SLJIT_ASSERT(framesize != no_stack);
10104 if (framesize < 0)
10105 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
10106 else
10107 {
10108 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10109 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10110 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (extrasize + 1) * sizeof(sljit_sw));
10111 }
10112 JUMPHERE(jump);
10113 }
10114
10115 if (needs_control_head)
10116 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
10117
10118 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
10119 {
10120 /* Assert is failed. */
10121 if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
10122 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10123
10124 if (framesize < 0)
10125 {
10126 /* The topmost item should be 0. */
10127 if (bra == OP_BRAZERO)
10128 {
10129 if (extrasize == 2)
10130 free_stack(common, 1);
10131 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10132 }
10133 else if (extrasize > 0)
10134 free_stack(common, extrasize);
10135 }
10136 else
10137 {
10138 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
10139 /* The topmost item should be 0. */
10140 if (bra == OP_BRAZERO)
10141 {
10142 free_stack(common, framesize + extrasize - 1);
10143 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10144 }
10145 else
10146 free_stack(common, framesize + extrasize);
10147 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10148 }
10149 jump = JUMP(SLJIT_JUMP);
10150 if (bra != OP_BRAZERO)
10151 add_jump(compiler, target, jump);
10152
10153 /* Assert is successful. */
10154 set_jumps(tmp, LABEL());
10155 if (framesize < 0)
10156 {
10157 /* We know that STR_PTR was stored on the top of the stack. */
10158 if (extrasize > 0)
10159 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
10160
10161 /* Keep the STR_PTR on the top of the stack. */
10162 if (bra == OP_BRAZERO)
10163 {
10164 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10165 if (extrasize == 2)
10166 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10167 }
10168 else if (bra == OP_BRAMINZERO)
10169 {
10170 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10171 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10172 }
10173 }
10174 else
10175 {
10176 if (bra == OP_BRA)
10177 {
10178 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10179 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
10180 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
10181 }
10182 else
10183 {
10184 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10185 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
10186 if (extrasize == 2)
10187 {
10188 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10189 if (bra == OP_BRAMINZERO)
10190 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10191 }
10192 else
10193 {
10194 SLJIT_ASSERT(extrasize == 3);
10195 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10196 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
10197 }
10198 }
10199 }
10200
10201 if (bra == OP_BRAZERO)
10202 {
10203 backtrack->matchingpath = LABEL();
10204 SET_LABEL(jump, backtrack->matchingpath);
10205 }
10206 else if (bra == OP_BRAMINZERO)
10207 {
10208 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
10209 JUMPHERE(brajump);
10210 if (framesize >= 0)
10211 {
10212 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10213 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10214 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
10215 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
10216 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10217 }
10218 set_jumps(backtrack->common.topbacktracks, LABEL());
10219 }
10220 }
10221 else
10222 {
10223 /* AssertNot is successful. */
10224 if (framesize < 0)
10225 {
10226 if (extrasize > 0)
10227 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10228
10229 if (bra != OP_BRA)
10230 {
10231 if (extrasize == 2)
10232 free_stack(common, 1);
10233 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10234 }
10235 else if (extrasize > 0)
10236 free_stack(common, extrasize);
10237 }
10238 else
10239 {
10240 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10241 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
10242 /* The topmost item should be 0. */
10243 if (bra != OP_BRA)
10244 {
10245 free_stack(common, framesize + extrasize - 1);
10246 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10247 }
10248 else
10249 free_stack(common, framesize + extrasize);
10250 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10251 }
10252
10253 if (bra == OP_BRAZERO)
10254 backtrack->matchingpath = LABEL();
10255 else if (bra == OP_BRAMINZERO)
10256 {
10257 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
10258 JUMPHERE(brajump);
10259 }
10260
10261 if (bra != OP_BRA)
10262 {
10263 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
10264 set_jumps(backtrack->common.topbacktracks, LABEL());
10265 backtrack->common.topbacktracks = NULL;
10266 }
10267 }
10268
10269 if (local_quit_available)
10270 {
10271 common->local_quit_available = save_local_quit_available;
10272 common->quit_label = save_quit_label;
10273 common->quit = save_quit;
10274 }
10275 common->in_positive_assertion = save_in_positive_assertion;
10276 common->then_trap = save_then_trap;
10277 common->accept_label = save_accept_label;
10278 common->positive_assertion_quit = save_positive_assertion_quit;
10279 common->accept = save_accept;
10280 return cc + 1 + LINK_SIZE;
10281 }
10282
match_once_common(compiler_common * common,PCRE2_UCHAR ket,int framesize,int private_data_ptr,BOOL has_alternatives,BOOL needs_control_head)10283 static SLJIT_INLINE void match_once_common(compiler_common *common, PCRE2_UCHAR ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
10284 {
10285 DEFINE_COMPILER;
10286 int stacksize;
10287
10288 if (framesize < 0)
10289 {
10290 if (framesize == no_frame)
10291 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10292 else
10293 {
10294 stacksize = needs_control_head ? 1 : 0;
10295 if (ket != OP_KET || has_alternatives)
10296 stacksize++;
10297
10298 if (stacksize > 0)
10299 free_stack(common, stacksize);
10300 }
10301
10302 if (needs_control_head)
10303 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1));
10304
10305 /* TMP2 which is set here used by OP_KETRMAX below. */
10306 if (ket == OP_KETRMAX)
10307 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10308 else if (ket == OP_KETRMIN)
10309 {
10310 /* Move the STR_PTR to the private_data_ptr. */
10311 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10312 }
10313 }
10314 else
10315 {
10316 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
10317 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
10318 if (needs_control_head)
10319 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10320
10321 if (ket == OP_KETRMAX)
10322 {
10323 /* TMP2 which is set here used by OP_KETRMAX below. */
10324 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10325 }
10326 }
10327 if (needs_control_head)
10328 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
10329 }
10330
match_capture_common(compiler_common * common,int stacksize,int offset,int private_data_ptr)10331 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
10332 {
10333 DEFINE_COMPILER;
10334
10335 if (common->capture_last_ptr != 0)
10336 {
10337 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
10338 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10339 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10340 stacksize++;
10341 }
10342 if (common->optimized_cbracket[offset >> 1] == 0)
10343 {
10344 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
10345 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
10346 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10347 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10348 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
10349 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10350 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10351 stacksize += 2;
10352 }
10353 return stacksize;
10354 }
10355
do_script_run(PCRE2_SPTR ptr,PCRE2_SPTR endptr)10356 static PCRE2_SPTR SLJIT_FUNC do_script_run(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10357 {
10358 if (PRIV(script_run)(ptr, endptr, FALSE))
10359 return endptr;
10360 return NULL;
10361 }
10362
10363 #ifdef SUPPORT_UNICODE
10364
do_script_run_utf(PCRE2_SPTR ptr,PCRE2_SPTR endptr)10365 static PCRE2_SPTR SLJIT_FUNC do_script_run_utf(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10366 {
10367 if (PRIV(script_run)(ptr, endptr, TRUE))
10368 return endptr;
10369 return NULL;
10370 }
10371
10372 #endif /* SUPPORT_UNICODE */
10373
match_script_run_common(compiler_common * common,int private_data_ptr,backtrack_common * parent)10374 static SLJIT_INLINE void match_script_run_common(compiler_common *common, int private_data_ptr, backtrack_common *parent)
10375 {
10376 DEFINE_COMPILER;
10377
10378 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
10379
10380 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10381 #ifdef SUPPORT_UNICODE
10382 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
10383 common->utf ? SLJIT_FUNC_ADDR(do_script_run_utf) : SLJIT_FUNC_ADDR(do_script_run));
10384 #else
10385 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_script_run));
10386 #endif
10387
10388 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
10389 add_jump(compiler, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
10390 }
10391
10392 /*
10393 Handling bracketed expressions is probably the most complex part.
10394
10395 Stack layout naming characters:
10396 S - Push the current STR_PTR
10397 0 - Push a 0 (NULL)
10398 A - Push the current STR_PTR. Needed for restoring the STR_PTR
10399 before the next alternative. Not pushed if there are no alternatives.
10400 M - Any values pushed by the current alternative. Can be empty, or anything.
10401 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
10402 L - Push the previous local (pointed by localptr) to the stack
10403 () - opional values stored on the stack
10404 ()* - optonal, can be stored multiple times
10405
10406 The following list shows the regular expression templates, their PCRE byte codes
10407 and stack layout supported by pcre-sljit.
10408
10409 (?:) OP_BRA | OP_KET A M
10410 () OP_CBRA | OP_KET C M
10411 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
10412 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
10413 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
10414 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
10415 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
10416 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
10417 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
10418 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
10419 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
10420 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
10421 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
10422 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
10423 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
10424 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
10425 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
10426 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
10427 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
10428 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
10429 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
10430 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
10431
10432
10433 Stack layout naming characters:
10434 A - Push the alternative index (starting from 0) on the stack.
10435 Not pushed if there is no alternatives.
10436 M - Any values pushed by the current alternative. Can be empty, or anything.
10437
10438 The next list shows the possible content of a bracket:
10439 (|) OP_*BRA | OP_ALT ... M A
10440 (?()|) OP_*COND | OP_ALT M A
10441 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
10442 Or nothing, if trace is unnecessary
10443 */
10444
compile_bracket_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)10445 static PCRE2_SPTR compile_bracket_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10446 {
10447 DEFINE_COMPILER;
10448 backtrack_common *backtrack;
10449 PCRE2_UCHAR opcode;
10450 int private_data_ptr = 0;
10451 int offset = 0;
10452 int i, stacksize;
10453 int repeat_ptr = 0, repeat_length = 0;
10454 int repeat_type = 0, repeat_count = 0;
10455 PCRE2_SPTR ccbegin;
10456 PCRE2_SPTR matchingpath;
10457 PCRE2_SPTR slot;
10458 PCRE2_UCHAR bra = OP_BRA;
10459 PCRE2_UCHAR ket;
10460 assert_backtrack *assert;
10461 BOOL has_alternatives;
10462 BOOL needs_control_head = FALSE;
10463 struct sljit_jump *jump;
10464 struct sljit_jump *skip;
10465 struct sljit_label *rmax_label = NULL;
10466 struct sljit_jump *braminzero = NULL;
10467
10468 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
10469
10470 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
10471 {
10472 bra = *cc;
10473 cc++;
10474 opcode = *cc;
10475 }
10476
10477 opcode = *cc;
10478 ccbegin = cc;
10479 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
10480 ket = *matchingpath;
10481 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
10482 {
10483 repeat_ptr = PRIVATE_DATA(matchingpath);
10484 repeat_length = PRIVATE_DATA(matchingpath + 1);
10485 repeat_type = PRIVATE_DATA(matchingpath + 2);
10486 repeat_count = PRIVATE_DATA(matchingpath + 3);
10487 SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
10488 if (repeat_type == OP_UPTO)
10489 ket = OP_KETRMAX;
10490 if (repeat_type == OP_MINUPTO)
10491 ket = OP_KETRMIN;
10492 }
10493
10494 matchingpath = ccbegin + 1 + LINK_SIZE;
10495 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
10496 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
10497 cc += GET(cc, 1);
10498
10499 has_alternatives = *cc == OP_ALT;
10500 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
10501 {
10502 SLJIT_COMPILE_ASSERT(OP_DNRREF == OP_RREF + 1 && OP_FALSE == OP_RREF + 2 && OP_TRUE == OP_RREF + 3,
10503 compile_time_checks_must_be_grouped_together);
10504 has_alternatives = ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL) ? FALSE : TRUE;
10505 }
10506
10507 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
10508 opcode = OP_SCOND;
10509
10510 if (opcode == OP_CBRA || opcode == OP_SCBRA)
10511 {
10512 /* Capturing brackets has a pre-allocated space. */
10513 offset = GET2(ccbegin, 1 + LINK_SIZE);
10514 if (common->optimized_cbracket[offset] == 0)
10515 {
10516 private_data_ptr = OVECTOR_PRIV(offset);
10517 offset <<= 1;
10518 }
10519 else
10520 {
10521 offset <<= 1;
10522 private_data_ptr = OVECTOR(offset);
10523 }
10524 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10525 matchingpath += IMM2_SIZE;
10526 }
10527 else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_ONCE || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
10528 {
10529 /* Other brackets simply allocate the next entry. */
10530 private_data_ptr = PRIVATE_DATA(ccbegin);
10531 SLJIT_ASSERT(private_data_ptr != 0);
10532 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10533 if (opcode == OP_ONCE)
10534 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
10535 }
10536
10537 /* Instructions before the first alternative. */
10538 stacksize = 0;
10539 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10540 stacksize++;
10541 if (bra == OP_BRAZERO)
10542 stacksize++;
10543
10544 if (stacksize > 0)
10545 allocate_stack(common, stacksize);
10546
10547 stacksize = 0;
10548 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10549 {
10550 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10551 stacksize++;
10552 }
10553
10554 if (bra == OP_BRAZERO)
10555 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10556
10557 if (bra == OP_BRAMINZERO)
10558 {
10559 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
10560 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10561 if (ket != OP_KETRMIN)
10562 {
10563 free_stack(common, 1);
10564 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10565 }
10566 else if (opcode == OP_ONCE || opcode >= OP_SBRA)
10567 {
10568 jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10569 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10570 /* Nothing stored during the first run. */
10571 skip = JUMP(SLJIT_JUMP);
10572 JUMPHERE(jump);
10573 /* Checking zero-length iteration. */
10574 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10575 {
10576 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
10577 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10578 }
10579 else
10580 {
10581 /* Except when the whole stack frame must be saved. */
10582 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10583 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));
10584 }
10585 JUMPHERE(skip);
10586 }
10587 else
10588 {
10589 jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10590 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10591 JUMPHERE(jump);
10592 }
10593 }
10594
10595 if (repeat_type != 0)
10596 {
10597 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
10598 if (repeat_type == OP_EXACT)
10599 rmax_label = LABEL();
10600 }
10601
10602 if (ket == OP_KETRMIN)
10603 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10604
10605 if (ket == OP_KETRMAX)
10606 {
10607 rmax_label = LABEL();
10608 if (has_alternatives && opcode >= OP_BRA && opcode < OP_SBRA && repeat_type == 0)
10609 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
10610 }
10611
10612 /* Handling capturing brackets and alternatives. */
10613 if (opcode == OP_ONCE)
10614 {
10615 stacksize = 0;
10616 if (needs_control_head)
10617 {
10618 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10619 stacksize++;
10620 }
10621
10622 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10623 {
10624 /* Neither capturing brackets nor recursions are found in the block. */
10625 if (ket == OP_KETRMIN)
10626 {
10627 stacksize += 2;
10628 if (!needs_control_head)
10629 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10630 }
10631 else
10632 {
10633 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
10634 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10635 if (ket == OP_KETRMAX || has_alternatives)
10636 stacksize++;
10637 }
10638
10639 if (stacksize > 0)
10640 allocate_stack(common, stacksize);
10641
10642 stacksize = 0;
10643 if (needs_control_head)
10644 {
10645 stacksize++;
10646 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10647 }
10648
10649 if (ket == OP_KETRMIN)
10650 {
10651 if (needs_control_head)
10652 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10653 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10654 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
10655 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
10656 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
10657 }
10658 else if (ket == OP_KETRMAX || has_alternatives)
10659 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10660 }
10661 else
10662 {
10663 if (ket != OP_KET || has_alternatives)
10664 stacksize++;
10665
10666 stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
10667 allocate_stack(common, stacksize);
10668
10669 if (needs_control_head)
10670 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10671
10672 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10673 OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10674
10675 stacksize = needs_control_head ? 1 : 0;
10676 if (ket != OP_KET || has_alternatives)
10677 {
10678 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10679 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10680 stacksize++;
10681 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10682 }
10683 else
10684 {
10685 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10686 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10687 }
10688 init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1);
10689 }
10690 }
10691 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
10692 {
10693 /* Saving the previous values. */
10694 if (common->optimized_cbracket[offset >> 1] != 0)
10695 {
10696 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
10697 allocate_stack(common, 2);
10698 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10699 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
10700 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10701 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
10702 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
10703 }
10704 else
10705 {
10706 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10707 allocate_stack(common, 1);
10708 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10709 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10710 }
10711 }
10712 else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
10713 {
10714 /* Saving the previous value. */
10715 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10716 allocate_stack(common, 1);
10717 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10718 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10719 }
10720 else if (has_alternatives)
10721 {
10722 /* Pushing the starting string pointer. */
10723 allocate_stack(common, 1);
10724 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10725 }
10726
10727 /* Generating code for the first alternative. */
10728 if (opcode == OP_COND || opcode == OP_SCOND)
10729 {
10730 if (*matchingpath == OP_CREF)
10731 {
10732 SLJIT_ASSERT(has_alternatives);
10733 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
10734 CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
10735 matchingpath += 1 + IMM2_SIZE;
10736 }
10737 else if (*matchingpath == OP_DNCREF)
10738 {
10739 SLJIT_ASSERT(has_alternatives);
10740
10741 i = GET2(matchingpath, 1 + IMM2_SIZE);
10742 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
10743 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
10744 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
10745 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
10746 slot += common->name_entry_size;
10747 i--;
10748 while (i-- > 0)
10749 {
10750 OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
10751 OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);
10752 slot += common->name_entry_size;
10753 }
10754 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
10755 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
10756 matchingpath += 1 + 2 * IMM2_SIZE;
10757 }
10758 else if ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL)
10759 {
10760 /* Never has other case. */
10761 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
10762 SLJIT_ASSERT(!has_alternatives);
10763
10764 if (*matchingpath == OP_TRUE)
10765 {
10766 stacksize = 1;
10767 matchingpath++;
10768 }
10769 else if (*matchingpath == OP_FALSE || *matchingpath == OP_FAIL)
10770 stacksize = 0;
10771 else if (*matchingpath == OP_RREF)
10772 {
10773 stacksize = GET2(matchingpath, 1);
10774 if (common->currententry == NULL)
10775 stacksize = 0;
10776 else if (stacksize == RREF_ANY)
10777 stacksize = 1;
10778 else if (common->currententry->start == 0)
10779 stacksize = stacksize == 0;
10780 else
10781 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
10782
10783 if (stacksize != 0)
10784 matchingpath += 1 + IMM2_SIZE;
10785 }
10786 else
10787 {
10788 if (common->currententry == NULL || common->currententry->start == 0)
10789 stacksize = 0;
10790 else
10791 {
10792 stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
10793 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
10794 i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
10795 while (stacksize > 0)
10796 {
10797 if ((int)GET2(slot, 0) == i)
10798 break;
10799 slot += common->name_entry_size;
10800 stacksize--;
10801 }
10802 }
10803
10804 if (stacksize != 0)
10805 matchingpath += 1 + 2 * IMM2_SIZE;
10806 }
10807
10808 /* The stacksize == 0 is a common "else" case. */
10809 if (stacksize == 0)
10810 {
10811 if (*cc == OP_ALT)
10812 {
10813 matchingpath = cc + 1 + LINK_SIZE;
10814 cc += GET(cc, 1);
10815 }
10816 else
10817 matchingpath = cc;
10818 }
10819 }
10820 else
10821 {
10822 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
10823 /* Similar code as PUSH_BACKTRACK macro. */
10824 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
10825 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10826 return NULL;
10827 memset(assert, 0, sizeof(assert_backtrack));
10828 assert->common.cc = matchingpath;
10829 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
10830 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
10831 }
10832 }
10833
10834 compile_matchingpath(common, matchingpath, cc, backtrack);
10835 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10836 return NULL;
10837
10838 if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA)
10839 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10840
10841 if (opcode == OP_ONCE)
10842 match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
10843
10844 if (opcode == OP_SCRIPT_RUN)
10845 match_script_run_common(common, private_data_ptr, backtrack);
10846
10847 stacksize = 0;
10848 if (repeat_type == OP_MINUPTO)
10849 {
10850 /* We need to preserve the counter. TMP2 will be used below. */
10851 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10852 stacksize++;
10853 }
10854 if (ket != OP_KET || bra != OP_BRA)
10855 stacksize++;
10856 if (offset != 0)
10857 {
10858 if (common->capture_last_ptr != 0)
10859 stacksize++;
10860 if (common->optimized_cbracket[offset >> 1] == 0)
10861 stacksize += 2;
10862 }
10863 if (has_alternatives && opcode != OP_ONCE)
10864 stacksize++;
10865
10866 if (stacksize > 0)
10867 allocate_stack(common, stacksize);
10868
10869 stacksize = 0;
10870 if (repeat_type == OP_MINUPTO)
10871 {
10872 /* TMP2 was set above. */
10873 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
10874 stacksize++;
10875 }
10876
10877 if (ket != OP_KET || bra != OP_BRA)
10878 {
10879 if (ket != OP_KET)
10880 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10881 else
10882 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10883 stacksize++;
10884 }
10885
10886 if (offset != 0)
10887 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
10888
10889 /* Skip and count the other alternatives. */
10890 i = 1;
10891 while (*cc == OP_ALT)
10892 {
10893 cc += GET(cc, 1);
10894 i++;
10895 }
10896
10897 if (has_alternatives)
10898 {
10899 if (opcode != OP_ONCE)
10900 {
10901 if (i <= 3)
10902 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10903 else
10904 BACKTRACK_AS(bracket_backtrack)->u.matching_put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
10905 }
10906 if (ket != OP_KETRMAX)
10907 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10908 }
10909
10910 /* Must be after the matchingpath label. */
10911 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
10912 {
10913 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
10914 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10915 }
10916
10917 if (ket == OP_KETRMAX)
10918 {
10919 if (repeat_type != 0)
10920 {
10921 if (has_alternatives)
10922 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10923 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10924 JUMPTO(SLJIT_NOT_ZERO, rmax_label);
10925 /* Drop STR_PTR for greedy plus quantifier. */
10926 if (opcode != OP_ONCE)
10927 free_stack(common, 1);
10928 }
10929 else if (opcode < OP_BRA || opcode >= OP_SBRA)
10930 {
10931 if (has_alternatives)
10932 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10933
10934 /* Checking zero-length iteration. */
10935 if (opcode != OP_ONCE)
10936 {
10937 /* This case includes opcodes such as OP_SCRIPT_RUN. */
10938 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
10939 /* Drop STR_PTR for greedy plus quantifier. */
10940 if (bra != OP_BRAZERO)
10941 free_stack(common, 1);
10942 }
10943 else
10944 /* TMP2 must contain the starting STR_PTR. */
10945 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
10946 }
10947 else
10948 JUMPTO(SLJIT_JUMP, rmax_label);
10949 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10950 }
10951
10952 if (repeat_type == OP_EXACT)
10953 {
10954 count_match(common);
10955 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10956 JUMPTO(SLJIT_NOT_ZERO, rmax_label);
10957 }
10958 else if (repeat_type == OP_UPTO)
10959 {
10960 /* We need to preserve the counter. */
10961 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10962 allocate_stack(common, 1);
10963 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10964 }
10965
10966 if (bra == OP_BRAZERO)
10967 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
10968
10969 if (bra == OP_BRAMINZERO)
10970 {
10971 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
10972 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
10973 if (braminzero != NULL)
10974 {
10975 JUMPHERE(braminzero);
10976 /* We need to release the end pointer to perform the
10977 backtrack for the zero-length iteration. When
10978 framesize is < 0, OP_ONCE will do the release itself. */
10979 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
10980 {
10981 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10982 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10983 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
10984 }
10985 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
10986 free_stack(common, 1);
10987 }
10988 /* Continue to the normal backtrack. */
10989 }
10990
10991 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
10992 count_match(common);
10993
10994 cc += 1 + LINK_SIZE;
10995
10996 if (opcode == OP_ONCE)
10997 {
10998 /* We temporarily encode the needs_control_head in the lowest bit.
10999 Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns
11000 the same value for small signed numbers (including negative numbers). */
11001 BACKTRACK_AS(bracket_backtrack)->u.framesize = (int)((unsigned)BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
11002 }
11003 return cc + repeat_length;
11004 }
11005
compile_bracketpos_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11006 static PCRE2_SPTR compile_bracketpos_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11007 {
11008 DEFINE_COMPILER;
11009 backtrack_common *backtrack;
11010 PCRE2_UCHAR opcode;
11011 int private_data_ptr;
11012 int cbraprivptr = 0;
11013 BOOL needs_control_head;
11014 int framesize;
11015 int stacksize;
11016 int offset = 0;
11017 BOOL zero = FALSE;
11018 PCRE2_SPTR ccbegin = NULL;
11019 int stack; /* Also contains the offset of control head. */
11020 struct sljit_label *loop = NULL;
11021 struct jump_list *emptymatch = NULL;
11022
11023 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
11024 if (*cc == OP_BRAPOSZERO)
11025 {
11026 zero = TRUE;
11027 cc++;
11028 }
11029
11030 opcode = *cc;
11031 private_data_ptr = PRIVATE_DATA(cc);
11032 SLJIT_ASSERT(private_data_ptr != 0);
11033 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
11034 switch(opcode)
11035 {
11036 case OP_BRAPOS:
11037 case OP_SBRAPOS:
11038 ccbegin = cc + 1 + LINK_SIZE;
11039 break;
11040
11041 case OP_CBRAPOS:
11042 case OP_SCBRAPOS:
11043 offset = GET2(cc, 1 + LINK_SIZE);
11044 /* This case cannot be optimized in the same was as
11045 normal capturing brackets. */
11046 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
11047 cbraprivptr = OVECTOR_PRIV(offset);
11048 offset <<= 1;
11049 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
11050 break;
11051
11052 default:
11053 SLJIT_UNREACHABLE();
11054 break;
11055 }
11056
11057 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
11058 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
11059 if (framesize < 0)
11060 {
11061 if (offset != 0)
11062 {
11063 stacksize = 2;
11064 if (common->capture_last_ptr != 0)
11065 stacksize++;
11066 }
11067 else
11068 stacksize = 1;
11069
11070 if (needs_control_head)
11071 stacksize++;
11072 if (!zero)
11073 stacksize++;
11074
11075 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
11076 allocate_stack(common, stacksize);
11077 if (framesize == no_frame)
11078 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
11079
11080 stack = 0;
11081 if (offset != 0)
11082 {
11083 stack = 2;
11084 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
11085 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
11086 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
11087 if (common->capture_last_ptr != 0)
11088 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
11089 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
11090 if (needs_control_head)
11091 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11092 if (common->capture_last_ptr != 0)
11093 {
11094 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
11095 stack = 3;
11096 }
11097 }
11098 else
11099 {
11100 if (needs_control_head)
11101 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11102 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11103 stack = 1;
11104 }
11105
11106 if (needs_control_head)
11107 stack++;
11108 if (!zero)
11109 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
11110 if (needs_control_head)
11111 {
11112 stack--;
11113 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
11114 }
11115 }
11116 else
11117 {
11118 stacksize = framesize + 1;
11119 if (!zero)
11120 stacksize++;
11121 if (needs_control_head)
11122 stacksize++;
11123 if (offset == 0)
11124 stacksize++;
11125 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
11126
11127 allocate_stack(common, stacksize);
11128 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11129 if (needs_control_head)
11130 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11131 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11132
11133 stack = 0;
11134 if (!zero)
11135 {
11136 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
11137 stack = 1;
11138 }
11139 if (needs_control_head)
11140 {
11141 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
11142 stack++;
11143 }
11144 if (offset == 0)
11145 {
11146 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
11147 stack++;
11148 }
11149 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
11150 init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize);
11151 stack -= 1 + (offset == 0);
11152 }
11153
11154 if (offset != 0)
11155 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11156
11157 loop = LABEL();
11158 while (*cc != OP_KETRPOS)
11159 {
11160 backtrack->top = NULL;
11161 backtrack->topbacktracks = NULL;
11162 cc += GET(cc, 1);
11163
11164 compile_matchingpath(common, ccbegin, cc, backtrack);
11165 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11166 return NULL;
11167
11168 if (framesize < 0)
11169 {
11170 if (framesize == no_frame)
11171 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11172
11173 if (offset != 0)
11174 {
11175 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11176 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11177 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11178 if (common->capture_last_ptr != 0)
11179 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
11180 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11181 }
11182 else
11183 {
11184 if (opcode == OP_SBRAPOS)
11185 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11186 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11187 }
11188
11189 /* Even if the match is empty, we need to reset the control head. */
11190 if (needs_control_head)
11191 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
11192
11193 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
11194 add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
11195
11196 if (!zero)
11197 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
11198 }
11199 else
11200 {
11201 if (offset != 0)
11202 {
11203 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11204 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11205 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11206 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11207 if (common->capture_last_ptr != 0)
11208 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
11209 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11210 }
11211 else
11212 {
11213 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11214 OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11215 if (opcode == OP_SBRAPOS)
11216 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
11217 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0);
11218 }
11219
11220 /* Even if the match is empty, we need to reset the control head. */
11221 if (needs_control_head)
11222 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
11223
11224 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
11225 add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
11226
11227 if (!zero)
11228 {
11229 if (framesize < 0)
11230 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
11231 else
11232 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
11233 }
11234 }
11235
11236 JUMPTO(SLJIT_JUMP, loop);
11237 flush_stubs(common);
11238
11239 compile_backtrackingpath(common, backtrack->top);
11240 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11241 return NULL;
11242 set_jumps(backtrack->topbacktracks, LABEL());
11243
11244 if (framesize < 0)
11245 {
11246 if (offset != 0)
11247 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11248 else
11249 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11250 }
11251 else
11252 {
11253 if (offset != 0)
11254 {
11255 /* Last alternative. */
11256 if (*cc == OP_KETRPOS)
11257 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11258 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11259 }
11260 else
11261 {
11262 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11263 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
11264 }
11265 }
11266
11267 if (*cc == OP_KETRPOS)
11268 break;
11269 ccbegin = cc + 1 + LINK_SIZE;
11270 }
11271
11272 /* We don't have to restore the control head in case of a failed match. */
11273
11274 backtrack->topbacktracks = NULL;
11275 if (!zero)
11276 {
11277 if (framesize < 0)
11278 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
11279 else /* TMP2 is set to [private_data_ptr] above. */
11280 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));
11281 }
11282
11283 /* None of them matched. */
11284 set_jumps(emptymatch, LABEL());
11285 count_match(common);
11286 return cc + 1 + LINK_SIZE;
11287 }
11288
get_iterator_parameters(compiler_common * common,PCRE2_SPTR cc,PCRE2_UCHAR * opcode,PCRE2_UCHAR * type,sljit_u32 * max,sljit_u32 * exact,PCRE2_SPTR * end)11289 static SLJIT_INLINE PCRE2_SPTR get_iterator_parameters(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *opcode, PCRE2_UCHAR *type, sljit_u32 *max, sljit_u32 *exact, PCRE2_SPTR *end)
11290 {
11291 int class_len;
11292
11293 *opcode = *cc;
11294 *exact = 0;
11295
11296 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
11297 {
11298 cc++;
11299 *type = OP_CHAR;
11300 }
11301 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
11302 {
11303 cc++;
11304 *type = OP_CHARI;
11305 *opcode -= OP_STARI - OP_STAR;
11306 }
11307 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
11308 {
11309 cc++;
11310 *type = OP_NOT;
11311 *opcode -= OP_NOTSTAR - OP_STAR;
11312 }
11313 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
11314 {
11315 cc++;
11316 *type = OP_NOTI;
11317 *opcode -= OP_NOTSTARI - OP_STAR;
11318 }
11319 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
11320 {
11321 cc++;
11322 *opcode -= OP_TYPESTAR - OP_STAR;
11323 *type = OP_END;
11324 }
11325 else
11326 {
11327 SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
11328 *type = *opcode;
11329 cc++;
11330 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(PCRE2_UCHAR))) : GET(cc, 0);
11331 *opcode = cc[class_len - 1];
11332
11333 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
11334 {
11335 *opcode -= OP_CRSTAR - OP_STAR;
11336 *end = cc + class_len;
11337
11338 if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
11339 {
11340 *exact = 1;
11341 *opcode -= OP_PLUS - OP_STAR;
11342 }
11343 }
11344 else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
11345 {
11346 *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
11347 *end = cc + class_len;
11348
11349 if (*opcode == OP_POSPLUS)
11350 {
11351 *exact = 1;
11352 *opcode = OP_POSSTAR;
11353 }
11354 }
11355 else
11356 {
11357 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
11358 *max = GET2(cc, (class_len + IMM2_SIZE));
11359 *exact = GET2(cc, class_len);
11360
11361 if (*max == 0)
11362 {
11363 if (*opcode == OP_CRPOSRANGE)
11364 *opcode = OP_POSSTAR;
11365 else
11366 *opcode -= OP_CRRANGE - OP_STAR;
11367 }
11368 else
11369 {
11370 *max -= *exact;
11371 if (*max == 0)
11372 *opcode = OP_EXACT;
11373 else if (*max == 1)
11374 {
11375 if (*opcode == OP_CRPOSRANGE)
11376 *opcode = OP_POSQUERY;
11377 else
11378 *opcode -= OP_CRRANGE - OP_QUERY;
11379 }
11380 else
11381 {
11382 if (*opcode == OP_CRPOSRANGE)
11383 *opcode = OP_POSUPTO;
11384 else
11385 *opcode -= OP_CRRANGE - OP_UPTO;
11386 }
11387 }
11388 *end = cc + class_len + 2 * IMM2_SIZE;
11389 }
11390 return cc;
11391 }
11392
11393 switch(*opcode)
11394 {
11395 case OP_EXACT:
11396 *exact = GET2(cc, 0);
11397 cc += IMM2_SIZE;
11398 break;
11399
11400 case OP_PLUS:
11401 case OP_MINPLUS:
11402 *exact = 1;
11403 *opcode -= OP_PLUS - OP_STAR;
11404 break;
11405
11406 case OP_POSPLUS:
11407 *exact = 1;
11408 *opcode = OP_POSSTAR;
11409 break;
11410
11411 case OP_UPTO:
11412 case OP_MINUPTO:
11413 case OP_POSUPTO:
11414 *max = GET2(cc, 0);
11415 cc += IMM2_SIZE;
11416 break;
11417 }
11418
11419 if (*type == OP_END)
11420 {
11421 *type = *cc;
11422 *end = next_opcode(common, cc);
11423 cc++;
11424 return cc;
11425 }
11426
11427 *end = cc + 1;
11428 #ifdef SUPPORT_UNICODE
11429 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
11430 #endif
11431 return cc;
11432 }
11433
compile_iterator_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11434 static PCRE2_SPTR compile_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11435 {
11436 DEFINE_COMPILER;
11437 backtrack_common *backtrack;
11438 PCRE2_UCHAR opcode;
11439 PCRE2_UCHAR type;
11440 sljit_u32 max = 0, exact;
11441 sljit_s32 early_fail_ptr = PRIVATE_DATA(cc + 1);
11442 sljit_s32 early_fail_type;
11443 BOOL charpos_enabled;
11444 PCRE2_UCHAR charpos_char;
11445 unsigned int charpos_othercasebit;
11446 PCRE2_SPTR end;
11447 jump_list *no_match = NULL;
11448 jump_list *no_char1_match = NULL;
11449 struct sljit_jump *jump = NULL;
11450 struct sljit_label *label;
11451 int private_data_ptr = PRIVATE_DATA(cc);
11452 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
11453 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
11454 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
11455 int tmp_base, tmp_offset;
11456 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11457 BOOL use_tmp;
11458 #endif
11459
11460 PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL);
11461
11462 early_fail_type = (early_fail_ptr & 0x7);
11463 early_fail_ptr >>= 3;
11464
11465 /* During recursion, these optimizations are disabled. */
11466 if (common->early_fail_start_ptr == 0 && common->fast_forward_bc_ptr == NULL)
11467 {
11468 early_fail_ptr = 0;
11469 early_fail_type = type_skip;
11470 }
11471
11472 SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || early_fail_ptr == 0
11473 || (early_fail_ptr >= common->early_fail_start_ptr && early_fail_ptr <= common->early_fail_end_ptr));
11474
11475 if (early_fail_type == type_fail)
11476 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr));
11477
11478 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
11479
11480 if (type != OP_EXTUNI)
11481 {
11482 tmp_base = TMP3;
11483 tmp_offset = 0;
11484 }
11485 else
11486 {
11487 tmp_base = SLJIT_MEM1(SLJIT_SP);
11488 tmp_offset = POSSESSIVE0;
11489 }
11490
11491 /* Handle fixed part first. */
11492 if (exact > 1)
11493 {
11494 SLJIT_ASSERT(early_fail_ptr == 0);
11495
11496 if (common->mode == PCRE2_JIT_COMPLETE
11497 #ifdef SUPPORT_UNICODE
11498 && !common->utf
11499 #endif
11500 && type != OP_ANYNL && type != OP_EXTUNI)
11501 {
11502 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
11503 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
11504 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11505 label = LABEL();
11506 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
11507 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11508 JUMPTO(SLJIT_NOT_ZERO, label);
11509 }
11510 else
11511 {
11512 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11513 label = LABEL();
11514 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
11515 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11516 JUMPTO(SLJIT_NOT_ZERO, label);
11517 }
11518 }
11519 else if (exact == 1)
11520 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
11521
11522 if (early_fail_type == type_fail_range)
11523 {
11524 /* Range end first, followed by range start. */
11525 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr);
11526 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw));
11527 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0);
11528 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0);
11529 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0));
11530
11531 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11532 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw), STR_PTR, 0);
11533 }
11534
11535 switch(opcode)
11536 {
11537 case OP_STAR:
11538 case OP_UPTO:
11539 SLJIT_ASSERT(early_fail_ptr == 0 || opcode == OP_STAR);
11540
11541 if (type == OP_ANYNL || type == OP_EXTUNI)
11542 {
11543 SLJIT_ASSERT(private_data_ptr == 0);
11544 SLJIT_ASSERT(early_fail_ptr == 0);
11545
11546 allocate_stack(common, 2);
11547 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11548 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
11549
11550 if (opcode == OP_UPTO)
11551 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max);
11552
11553 label = LABEL();
11554 compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
11555 if (opcode == OP_UPTO)
11556 {
11557 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
11558 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
11559 jump = JUMP(SLJIT_ZERO);
11560 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
11561 }
11562
11563 /* We cannot use TMP3 because of allocate_stack. */
11564 allocate_stack(common, 1);
11565 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11566 JUMPTO(SLJIT_JUMP, label);
11567 if (jump != NULL)
11568 JUMPHERE(jump);
11569 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11570 break;
11571 }
11572 #ifdef SUPPORT_UNICODE
11573 else if (type == OP_ALLANY && !common->invalid_utf)
11574 #else
11575 else if (type == OP_ALLANY)
11576 #endif
11577 {
11578 if (opcode == OP_STAR)
11579 {
11580 if (private_data_ptr == 0)
11581 allocate_stack(common, 2);
11582
11583 OP1(SLJIT_MOV, base, offset0, STR_END, 0);
11584 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11585
11586 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
11587 process_partial_match(common);
11588
11589 if (early_fail_ptr != 0)
11590 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
11591 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11592 break;
11593 }
11594 #ifdef SUPPORT_UNICODE
11595 else if (!common->utf)
11596 #else
11597 else
11598 #endif
11599 {
11600 if (private_data_ptr == 0)
11601 allocate_stack(common, 2);
11602
11603 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11604 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
11605
11606 if (common->mode == PCRE2_JIT_COMPLETE)
11607 {
11608 OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
11609 CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
11610 }
11611 else
11612 {
11613 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
11614 process_partial_match(common);
11615 JUMPHERE(jump);
11616 }
11617
11618 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11619
11620 if (early_fail_ptr != 0)
11621 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11622 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11623 break;
11624 }
11625 }
11626
11627 charpos_enabled = FALSE;
11628 charpos_char = 0;
11629 charpos_othercasebit = 0;
11630
11631 if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
11632 {
11633 #ifdef SUPPORT_UNICODE
11634 charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
11635 #else
11636 charpos_enabled = TRUE;
11637 #endif
11638 if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
11639 {
11640 charpos_othercasebit = char_get_othercase_bit(common, end + 1);
11641 if (charpos_othercasebit == 0)
11642 charpos_enabled = FALSE;
11643 }
11644
11645 if (charpos_enabled)
11646 {
11647 charpos_char = end[1];
11648 /* Consume the OP_CHAR opcode. */
11649 end += 2;
11650 #if PCRE2_CODE_UNIT_WIDTH == 8
11651 SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
11652 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
11653 SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
11654 if ((charpos_othercasebit & 0x100) != 0)
11655 charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
11656 #endif
11657 if (charpos_othercasebit != 0)
11658 charpos_char |= charpos_othercasebit;
11659
11660 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE;
11661 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char;
11662 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit;
11663 }
11664 }
11665
11666 if (charpos_enabled)
11667 {
11668 if (opcode == OP_UPTO)
11669 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1);
11670
11671 /* Search the first instance of charpos_char. */
11672 jump = JUMP(SLJIT_JUMP);
11673 label = LABEL();
11674 if (opcode == OP_UPTO)
11675 {
11676 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11677 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO));
11678 }
11679 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
11680 if (early_fail_ptr != 0)
11681 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11682 JUMPHERE(jump);
11683
11684 detect_partial_match(common, &backtrack->topbacktracks);
11685 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
11686 if (charpos_othercasebit != 0)
11687 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
11688 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
11689
11690 if (private_data_ptr == 0)
11691 allocate_stack(common, 2);
11692 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11693 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11694
11695 if (opcode == OP_UPTO)
11696 {
11697 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11698 add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11699 }
11700
11701 /* Search the last instance of charpos_char. */
11702 label = LABEL();
11703 compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11704 if (early_fail_ptr != 0)
11705 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11706 detect_partial_match(common, &no_match);
11707 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
11708 if (charpos_othercasebit != 0)
11709 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
11710
11711 if (opcode == OP_STAR)
11712 {
11713 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
11714 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11715 JUMPTO(SLJIT_JUMP, label);
11716 }
11717 else
11718 {
11719 jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char);
11720 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11721 JUMPHERE(jump);
11722 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11723 JUMPTO(SLJIT_NOT_ZERO, label);
11724 }
11725
11726 set_jumps(no_match, LABEL());
11727 OP2(SLJIT_ADD, STR_PTR, 0, base, offset0, SLJIT_IMM, IN_UCHARS(1));
11728 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11729 }
11730 else
11731 {
11732 if (private_data_ptr == 0)
11733 allocate_stack(common, 2);
11734
11735 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11736 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11737 use_tmp = (!HAS_VIRTUAL_REGISTERS && opcode == OP_STAR);
11738 SLJIT_ASSERT(!use_tmp || tmp_base == TMP3);
11739
11740 if (common->utf)
11741 OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
11742 #endif
11743 if (opcode == OP_UPTO)
11744 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11745
11746 detect_partial_match(common, &no_match);
11747 label = LABEL();
11748 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11749 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11750 if (common->utf)
11751 OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
11752 #endif
11753
11754 if (opcode == OP_UPTO)
11755 {
11756 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11757 add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11758 }
11759
11760 detect_partial_match_to(common, label);
11761 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11762
11763 set_jumps(no_char1_match, LABEL());
11764 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11765 if (common->utf)
11766 {
11767 set_jumps(no_match, LABEL());
11768 if (use_tmp)
11769 {
11770 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
11771 OP1(SLJIT_MOV, base, offset0, TMP3, 0);
11772 }
11773 else
11774 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
11775 }
11776 else
11777 #endif
11778 {
11779 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11780 set_jumps(no_match, LABEL());
11781 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11782 }
11783
11784 if (early_fail_ptr != 0)
11785 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11786 }
11787
11788 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11789 break;
11790
11791 case OP_MINSTAR:
11792 if (private_data_ptr == 0)
11793 allocate_stack(common, 1);
11794 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11795 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11796 if (early_fail_ptr != 0)
11797 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11798 break;
11799
11800 case OP_MINUPTO:
11801 SLJIT_ASSERT(early_fail_ptr == 0);
11802 if (private_data_ptr == 0)
11803 allocate_stack(common, 2);
11804 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11805 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
11806 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11807 break;
11808
11809 case OP_QUERY:
11810 case OP_MINQUERY:
11811 SLJIT_ASSERT(early_fail_ptr == 0);
11812 if (private_data_ptr == 0)
11813 allocate_stack(common, 1);
11814 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11815 if (opcode == OP_QUERY)
11816 compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
11817 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11818 break;
11819
11820 case OP_EXACT:
11821 break;
11822
11823 case OP_POSSTAR:
11824 #if defined SUPPORT_UNICODE
11825 if (type == OP_ALLANY && !common->invalid_utf)
11826 #else
11827 if (type == OP_ALLANY)
11828 #endif
11829 {
11830 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
11831 process_partial_match(common);
11832 if (early_fail_ptr != 0)
11833 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
11834 break;
11835 }
11836
11837 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11838 if (common->utf)
11839 {
11840 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11841 detect_partial_match(common, &no_match);
11842 label = LABEL();
11843 compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11844 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11845 detect_partial_match_to(common, label);
11846
11847 set_jumps(no_match, LABEL());
11848 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
11849 if (early_fail_ptr != 0)
11850 {
11851 if (!HAS_VIRTUAL_REGISTERS && tmp_base == TMP3)
11852 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, TMP3, 0);
11853 else
11854 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11855 }
11856 break;
11857 }
11858 #endif
11859
11860 detect_partial_match(common, &no_match);
11861 label = LABEL();
11862 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11863 detect_partial_match_to(common, label);
11864 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11865
11866 set_jumps(no_char1_match, LABEL());
11867 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11868 set_jumps(no_match, LABEL());
11869 if (early_fail_ptr != 0)
11870 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11871 break;
11872
11873 case OP_POSUPTO:
11874 SLJIT_ASSERT(early_fail_ptr == 0);
11875 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11876 if (common->utf)
11877 {
11878 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
11879 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11880
11881 detect_partial_match(common, &no_match);
11882 label = LABEL();
11883 compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11884 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
11885 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11886 add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11887 detect_partial_match_to(common, label);
11888
11889 set_jumps(no_match, LABEL());
11890 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
11891 break;
11892 }
11893 #endif
11894
11895 if (type == OP_ALLANY)
11896 {
11897 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
11898
11899 if (common->mode == PCRE2_JIT_COMPLETE)
11900 {
11901 OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
11902 CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
11903 }
11904 else
11905 {
11906 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
11907 process_partial_match(common);
11908 JUMPHERE(jump);
11909 }
11910 break;
11911 }
11912
11913 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11914
11915 detect_partial_match(common, &no_match);
11916 label = LABEL();
11917 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11918 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11919 add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11920 detect_partial_match_to(common, label);
11921 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11922
11923 set_jumps(no_char1_match, LABEL());
11924 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11925 set_jumps(no_match, LABEL());
11926 break;
11927
11928 case OP_POSQUERY:
11929 SLJIT_ASSERT(early_fail_ptr == 0);
11930 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11931 compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
11932 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11933 set_jumps(no_match, LABEL());
11934 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
11935 break;
11936
11937 default:
11938 SLJIT_UNREACHABLE();
11939 break;
11940 }
11941
11942 count_match(common);
11943 return end;
11944 }
11945
compile_fail_accept_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11946 static SLJIT_INLINE PCRE2_SPTR compile_fail_accept_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11947 {
11948 DEFINE_COMPILER;
11949 backtrack_common *backtrack;
11950
11951 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
11952
11953 if (*cc == OP_FAIL)
11954 {
11955 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
11956 return cc + 1;
11957 }
11958
11959 if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0)
11960 add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
11961
11962 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
11963 {
11964 /* No need to check notempty conditions. */
11965 if (common->accept_label == NULL)
11966 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
11967 else
11968 JUMPTO(SLJIT_JUMP, common->accept_label);
11969 return cc + 1;
11970 }
11971
11972 if (common->accept_label == NULL)
11973 add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
11974 else
11975 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
11976
11977 if (HAS_VIRTUAL_REGISTERS)
11978 {
11979 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11980 OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
11981 }
11982 else
11983 OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options));
11984
11985 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
11986 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_NOT_ZERO));
11987 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
11988 if (common->accept_label == NULL)
11989 add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO));
11990 else
11991 JUMPTO(SLJIT_ZERO, common->accept_label);
11992
11993 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
11994 if (common->accept_label == NULL)
11995 add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
11996 else
11997 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
11998 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
11999 return cc + 1;
12000 }
12001
compile_close_matchingpath(compiler_common * common,PCRE2_SPTR cc)12002 static SLJIT_INLINE PCRE2_SPTR compile_close_matchingpath(compiler_common *common, PCRE2_SPTR cc)
12003 {
12004 DEFINE_COMPILER;
12005 int offset = GET2(cc, 1);
12006 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
12007
12008 /* Data will be discarded anyway... */
12009 if (common->currententry != NULL)
12010 return cc + 1 + IMM2_SIZE;
12011
12012 if (!optimized_cbracket)
12013 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
12014 offset <<= 1;
12015 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
12016 if (!optimized_cbracket)
12017 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12018 return cc + 1 + IMM2_SIZE;
12019 }
12020
compile_control_verb_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)12021 static SLJIT_INLINE PCRE2_SPTR compile_control_verb_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
12022 {
12023 DEFINE_COMPILER;
12024 backtrack_common *backtrack;
12025 PCRE2_UCHAR opcode = *cc;
12026 PCRE2_SPTR ccend = cc + 1;
12027
12028 if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG ||
12029 opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
12030 ccend += 2 + cc[1];
12031
12032 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
12033
12034 if (opcode == OP_SKIP)
12035 {
12036 allocate_stack(common, 1);
12037 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
12038 return ccend;
12039 }
12040
12041 if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
12042 {
12043 if (HAS_VIRTUAL_REGISTERS)
12044 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12045 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
12046 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
12047 OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
12048 }
12049
12050 return ccend;
12051 }
12052
12053 static PCRE2_UCHAR then_trap_opcode[1] = { OP_THEN_TRAP };
12054
compile_then_trap_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,backtrack_common * parent)12055 static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
12056 {
12057 DEFINE_COMPILER;
12058 backtrack_common *backtrack;
12059 BOOL needs_control_head;
12060 int size;
12061
12062 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
12063 common->then_trap = BACKTRACK_AS(then_trap_backtrack);
12064 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
12065 BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
12066 BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
12067
12068 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
12069 size = 3 + (size < 0 ? 0 : size);
12070
12071 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12072 allocate_stack(common, size);
12073 if (size > 3)
12074 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
12075 else
12076 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
12077 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
12078 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
12079 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
12080
12081 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
12082 if (size >= 0)
12083 init_frame(common, cc, ccend, size - 1, 0);
12084 }
12085
compile_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,backtrack_common * parent)12086 static void compile_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
12087 {
12088 DEFINE_COMPILER;
12089 backtrack_common *backtrack;
12090 BOOL has_then_trap = FALSE;
12091 then_trap_backtrack *save_then_trap = NULL;
12092
12093 SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
12094
12095 if (common->has_then && common->then_offsets[cc - common->start] != 0)
12096 {
12097 SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
12098 has_then_trap = TRUE;
12099 save_then_trap = common->then_trap;
12100 /* Tail item on backtrack. */
12101 compile_then_trap_matchingpath(common, cc, ccend, parent);
12102 }
12103
12104 while (cc < ccend)
12105 {
12106 switch(*cc)
12107 {
12108 case OP_SOD:
12109 case OP_SOM:
12110 case OP_NOT_WORD_BOUNDARY:
12111 case OP_WORD_BOUNDARY:
12112 case OP_EODN:
12113 case OP_EOD:
12114 case OP_DOLL:
12115 case OP_DOLLM:
12116 case OP_CIRC:
12117 case OP_CIRCM:
12118 case OP_REVERSE:
12119 cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
12120 break;
12121
12122 case OP_NOT_DIGIT:
12123 case OP_DIGIT:
12124 case OP_NOT_WHITESPACE:
12125 case OP_WHITESPACE:
12126 case OP_NOT_WORDCHAR:
12127 case OP_WORDCHAR:
12128 case OP_ANY:
12129 case OP_ALLANY:
12130 case OP_ANYBYTE:
12131 case OP_NOTPROP:
12132 case OP_PROP:
12133 case OP_ANYNL:
12134 case OP_NOT_HSPACE:
12135 case OP_HSPACE:
12136 case OP_NOT_VSPACE:
12137 case OP_VSPACE:
12138 case OP_EXTUNI:
12139 case OP_NOT:
12140 case OP_NOTI:
12141 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
12142 break;
12143
12144 case OP_SET_SOM:
12145 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
12146 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
12147 allocate_stack(common, 1);
12148 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
12149 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
12150 cc++;
12151 break;
12152
12153 case OP_CHAR:
12154 case OP_CHARI:
12155 if (common->mode == PCRE2_JIT_COMPLETE)
12156 cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
12157 else
12158 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
12159 break;
12160
12161 case OP_STAR:
12162 case OP_MINSTAR:
12163 case OP_PLUS:
12164 case OP_MINPLUS:
12165 case OP_QUERY:
12166 case OP_MINQUERY:
12167 case OP_UPTO:
12168 case OP_MINUPTO:
12169 case OP_EXACT:
12170 case OP_POSSTAR:
12171 case OP_POSPLUS:
12172 case OP_POSQUERY:
12173 case OP_POSUPTO:
12174 case OP_STARI:
12175 case OP_MINSTARI:
12176 case OP_PLUSI:
12177 case OP_MINPLUSI:
12178 case OP_QUERYI:
12179 case OP_MINQUERYI:
12180 case OP_UPTOI:
12181 case OP_MINUPTOI:
12182 case OP_EXACTI:
12183 case OP_POSSTARI:
12184 case OP_POSPLUSI:
12185 case OP_POSQUERYI:
12186 case OP_POSUPTOI:
12187 case OP_NOTSTAR:
12188 case OP_NOTMINSTAR:
12189 case OP_NOTPLUS:
12190 case OP_NOTMINPLUS:
12191 case OP_NOTQUERY:
12192 case OP_NOTMINQUERY:
12193 case OP_NOTUPTO:
12194 case OP_NOTMINUPTO:
12195 case OP_NOTEXACT:
12196 case OP_NOTPOSSTAR:
12197 case OP_NOTPOSPLUS:
12198 case OP_NOTPOSQUERY:
12199 case OP_NOTPOSUPTO:
12200 case OP_NOTSTARI:
12201 case OP_NOTMINSTARI:
12202 case OP_NOTPLUSI:
12203 case OP_NOTMINPLUSI:
12204 case OP_NOTQUERYI:
12205 case OP_NOTMINQUERYI:
12206 case OP_NOTUPTOI:
12207 case OP_NOTMINUPTOI:
12208 case OP_NOTEXACTI:
12209 case OP_NOTPOSSTARI:
12210 case OP_NOTPOSPLUSI:
12211 case OP_NOTPOSQUERYI:
12212 case OP_NOTPOSUPTOI:
12213 case OP_TYPESTAR:
12214 case OP_TYPEMINSTAR:
12215 case OP_TYPEPLUS:
12216 case OP_TYPEMINPLUS:
12217 case OP_TYPEQUERY:
12218 case OP_TYPEMINQUERY:
12219 case OP_TYPEUPTO:
12220 case OP_TYPEMINUPTO:
12221 case OP_TYPEEXACT:
12222 case OP_TYPEPOSSTAR:
12223 case OP_TYPEPOSPLUS:
12224 case OP_TYPEPOSQUERY:
12225 case OP_TYPEPOSUPTO:
12226 cc = compile_iterator_matchingpath(common, cc, parent);
12227 break;
12228
12229 case OP_CLASS:
12230 case OP_NCLASS:
12231 if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE)
12232 cc = compile_iterator_matchingpath(common, cc, parent);
12233 else
12234 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
12235 break;
12236
12237 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
12238 case OP_XCLASS:
12239 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
12240 cc = compile_iterator_matchingpath(common, cc, parent);
12241 else
12242 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
12243 break;
12244 #endif
12245
12246 case OP_REF:
12247 case OP_REFI:
12248 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
12249 cc = compile_ref_iterator_matchingpath(common, cc, parent);
12250 else
12251 {
12252 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
12253 cc += 1 + IMM2_SIZE;
12254 }
12255 break;
12256
12257 case OP_DNREF:
12258 case OP_DNREFI:
12259 if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
12260 cc = compile_ref_iterator_matchingpath(common, cc, parent);
12261 else
12262 {
12263 compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
12264 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
12265 cc += 1 + 2 * IMM2_SIZE;
12266 }
12267 break;
12268
12269 case OP_RECURSE:
12270 cc = compile_recurse_matchingpath(common, cc, parent);
12271 break;
12272
12273 case OP_CALLOUT:
12274 case OP_CALLOUT_STR:
12275 cc = compile_callout_matchingpath(common, cc, parent);
12276 break;
12277
12278 case OP_ASSERT:
12279 case OP_ASSERT_NOT:
12280 case OP_ASSERTBACK:
12281 case OP_ASSERTBACK_NOT:
12282 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12283 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12284 break;
12285
12286 case OP_BRAMINZERO:
12287 PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
12288 cc = bracketend(cc + 1);
12289 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
12290 {
12291 allocate_stack(common, 1);
12292 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
12293 }
12294 else
12295 {
12296 allocate_stack(common, 2);
12297 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12298 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
12299 }
12300 BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
12301 count_match(common);
12302 break;
12303
12304 case OP_ASSERT_NA:
12305 case OP_ASSERTBACK_NA:
12306 case OP_ONCE:
12307 case OP_SCRIPT_RUN:
12308 case OP_BRA:
12309 case OP_CBRA:
12310 case OP_COND:
12311 case OP_SBRA:
12312 case OP_SCBRA:
12313 case OP_SCOND:
12314 cc = compile_bracket_matchingpath(common, cc, parent);
12315 break;
12316
12317 case OP_BRAZERO:
12318 if (cc[1] > OP_ASSERTBACK_NOT)
12319 cc = compile_bracket_matchingpath(common, cc, parent);
12320 else
12321 {
12322 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12323 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12324 }
12325 break;
12326
12327 case OP_BRAPOS:
12328 case OP_CBRAPOS:
12329 case OP_SBRAPOS:
12330 case OP_SCBRAPOS:
12331 case OP_BRAPOSZERO:
12332 cc = compile_bracketpos_matchingpath(common, cc, parent);
12333 break;
12334
12335 case OP_MARK:
12336 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
12337 SLJIT_ASSERT(common->mark_ptr != 0);
12338 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
12339 allocate_stack(common, common->has_skip_arg ? 5 : 1);
12340 if (HAS_VIRTUAL_REGISTERS)
12341 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12342 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
12343 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
12344 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
12345 OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
12346 if (common->has_skip_arg)
12347 {
12348 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12349 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
12350 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
12351 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
12352 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
12353 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
12354 }
12355 cc += 1 + 2 + cc[1];
12356 break;
12357
12358 case OP_PRUNE:
12359 case OP_PRUNE_ARG:
12360 case OP_SKIP:
12361 case OP_SKIP_ARG:
12362 case OP_THEN:
12363 case OP_THEN_ARG:
12364 case OP_COMMIT:
12365 case OP_COMMIT_ARG:
12366 cc = compile_control_verb_matchingpath(common, cc, parent);
12367 break;
12368
12369 case OP_FAIL:
12370 case OP_ACCEPT:
12371 case OP_ASSERT_ACCEPT:
12372 cc = compile_fail_accept_matchingpath(common, cc, parent);
12373 break;
12374
12375 case OP_CLOSE:
12376 cc = compile_close_matchingpath(common, cc);
12377 break;
12378
12379 case OP_SKIPZERO:
12380 cc = bracketend(cc + 1);
12381 break;
12382
12383 default:
12384 SLJIT_UNREACHABLE();
12385 return;
12386 }
12387 if (cc == NULL)
12388 return;
12389 }
12390
12391 if (has_then_trap)
12392 {
12393 /* Head item on backtrack. */
12394 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
12395 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
12396 BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
12397 common->then_trap = save_then_trap;
12398 }
12399 SLJIT_ASSERT(cc == ccend);
12400 }
12401
12402 #undef PUSH_BACKTRACK
12403 #undef PUSH_BACKTRACK_NOVALUE
12404 #undef BACKTRACK_AS
12405
12406 #define COMPILE_BACKTRACKINGPATH(current) \
12407 do \
12408 { \
12409 compile_backtrackingpath(common, (current)); \
12410 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
12411 return; \
12412 } \
12413 while (0)
12414
12415 #define CURRENT_AS(type) ((type *)current)
12416
compile_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)12417 static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12418 {
12419 DEFINE_COMPILER;
12420 PCRE2_SPTR cc = current->cc;
12421 PCRE2_UCHAR opcode;
12422 PCRE2_UCHAR type;
12423 sljit_u32 max = 0, exact;
12424 struct sljit_label *label = NULL;
12425 struct sljit_jump *jump = NULL;
12426 jump_list *jumplist = NULL;
12427 PCRE2_SPTR end;
12428 int private_data_ptr = PRIVATE_DATA(cc);
12429 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
12430 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
12431 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
12432
12433 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
12434
12435 switch(opcode)
12436 {
12437 case OP_STAR:
12438 case OP_UPTO:
12439 if (type == OP_ANYNL || type == OP_EXTUNI)
12440 {
12441 SLJIT_ASSERT(private_data_ptr == 0);
12442 set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12443 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12444 free_stack(common, 1);
12445 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12446 }
12447 else
12448 {
12449 if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled)
12450 {
12451 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12452 OP1(SLJIT_MOV, TMP2, 0, base, offset1);
12453 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12454
12455 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
12456 label = LABEL();
12457 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
12458 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12459 if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0)
12460 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit);
12461 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12462 move_back(common, NULL, TRUE);
12463 CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label);
12464 }
12465 else
12466 {
12467 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12468 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
12469 move_back(common, NULL, TRUE);
12470 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12471 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12472 }
12473 JUMPHERE(jump);
12474 if (private_data_ptr == 0)
12475 free_stack(common, 2);
12476 }
12477 break;
12478
12479 case OP_MINSTAR:
12480 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12481 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12482 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12483 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12484 set_jumps(jumplist, LABEL());
12485 if (private_data_ptr == 0)
12486 free_stack(common, 1);
12487 break;
12488
12489 case OP_MINUPTO:
12490 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
12491 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12492 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
12493 add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
12494
12495 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
12496 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12497 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12498 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12499
12500 set_jumps(jumplist, LABEL());
12501 if (private_data_ptr == 0)
12502 free_stack(common, 2);
12503 break;
12504
12505 case OP_QUERY:
12506 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12507 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12508 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12509 jump = JUMP(SLJIT_JUMP);
12510 set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12511 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12512 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12513 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12514 JUMPHERE(jump);
12515 if (private_data_ptr == 0)
12516 free_stack(common, 1);
12517 break;
12518
12519 case OP_MINQUERY:
12520 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12521 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12522 jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12523 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12524 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12525 set_jumps(jumplist, LABEL());
12526 JUMPHERE(jump);
12527 if (private_data_ptr == 0)
12528 free_stack(common, 1);
12529 break;
12530
12531 case OP_EXACT:
12532 case OP_POSSTAR:
12533 case OP_POSQUERY:
12534 case OP_POSUPTO:
12535 break;
12536
12537 default:
12538 SLJIT_UNREACHABLE();
12539 break;
12540 }
12541
12542 set_jumps(current->topbacktracks, LABEL());
12543 }
12544
compile_ref_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)12545 static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12546 {
12547 DEFINE_COMPILER;
12548 PCRE2_SPTR cc = current->cc;
12549 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
12550 PCRE2_UCHAR type;
12551
12552 type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
12553
12554 if ((type & 0x1) == 0)
12555 {
12556 /* Maximize case. */
12557 set_jumps(current->topbacktracks, LABEL());
12558 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12559 free_stack(common, 1);
12560 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12561 return;
12562 }
12563
12564 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12565 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12566 set_jumps(current->topbacktracks, LABEL());
12567 free_stack(common, ref ? 2 : 3);
12568 }
12569
compile_recurse_backtrackingpath(compiler_common * common,struct backtrack_common * current)12570 static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12571 {
12572 DEFINE_COMPILER;
12573 recurse_entry *entry;
12574
12575 if (!CURRENT_AS(recurse_backtrack)->inlined_pattern)
12576 {
12577 entry = CURRENT_AS(recurse_backtrack)->entry;
12578 if (entry->backtrack_label == NULL)
12579 add_jump(compiler, &entry->backtrack_calls, JUMP(SLJIT_FAST_CALL));
12580 else
12581 JUMPTO(SLJIT_FAST_CALL, entry->backtrack_label);
12582 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(recurse_backtrack)->matchingpath);
12583 }
12584 else
12585 compile_backtrackingpath(common, current->top);
12586
12587 set_jumps(current->topbacktracks, LABEL());
12588 }
12589
compile_assert_backtrackingpath(compiler_common * common,struct backtrack_common * current)12590 static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12591 {
12592 DEFINE_COMPILER;
12593 PCRE2_SPTR cc = current->cc;
12594 PCRE2_UCHAR bra = OP_BRA;
12595 struct sljit_jump *brajump = NULL;
12596
12597 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
12598 if (*cc == OP_BRAZERO)
12599 {
12600 bra = *cc;
12601 cc++;
12602 }
12603
12604 if (bra == OP_BRAZERO)
12605 {
12606 SLJIT_ASSERT(current->topbacktracks == NULL);
12607 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12608 }
12609
12610 if (CURRENT_AS(assert_backtrack)->framesize < 0)
12611 {
12612 set_jumps(current->topbacktracks, LABEL());
12613
12614 if (bra == OP_BRAZERO)
12615 {
12616 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12617 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12618 free_stack(common, 1);
12619 }
12620 return;
12621 }
12622
12623 if (bra == OP_BRAZERO)
12624 {
12625 if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
12626 {
12627 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12628 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12629 free_stack(common, 1);
12630 return;
12631 }
12632 free_stack(common, 1);
12633 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12634 }
12635
12636 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
12637 {
12638 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
12639 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12640 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12641 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(assert_backtrack)->framesize - 1) * sizeof(sljit_sw));
12642 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, TMP1, 0);
12643
12644 set_jumps(current->topbacktracks, LABEL());
12645 }
12646 else
12647 set_jumps(current->topbacktracks, LABEL());
12648
12649 if (bra == OP_BRAZERO)
12650 {
12651 /* We know there is enough place on the stack. */
12652 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
12653 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12654 JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
12655 JUMPHERE(brajump);
12656 }
12657 }
12658
compile_bracket_backtrackingpath(compiler_common * common,struct backtrack_common * current)12659 static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12660 {
12661 DEFINE_COMPILER;
12662 int opcode, stacksize, alt_count, alt_max;
12663 int offset = 0;
12664 int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
12665 int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
12666 PCRE2_SPTR cc = current->cc;
12667 PCRE2_SPTR ccbegin;
12668 PCRE2_SPTR ccprev;
12669 PCRE2_UCHAR bra = OP_BRA;
12670 PCRE2_UCHAR ket;
12671 assert_backtrack *assert;
12672 BOOL has_alternatives;
12673 BOOL needs_control_head = FALSE;
12674 struct sljit_jump *brazero = NULL;
12675 struct sljit_jump *next_alt = NULL;
12676 struct sljit_jump *once = NULL;
12677 struct sljit_jump *cond = NULL;
12678 struct sljit_label *rmin_label = NULL;
12679 struct sljit_label *exact_label = NULL;
12680 struct sljit_put_label *put_label = NULL;
12681
12682 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
12683 {
12684 bra = *cc;
12685 cc++;
12686 }
12687
12688 opcode = *cc;
12689 ccbegin = bracketend(cc) - 1 - LINK_SIZE;
12690 ket = *ccbegin;
12691 if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
12692 {
12693 repeat_ptr = PRIVATE_DATA(ccbegin);
12694 repeat_type = PRIVATE_DATA(ccbegin + 2);
12695 repeat_count = PRIVATE_DATA(ccbegin + 3);
12696 SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
12697 if (repeat_type == OP_UPTO)
12698 ket = OP_KETRMAX;
12699 if (repeat_type == OP_MINUPTO)
12700 ket = OP_KETRMIN;
12701 }
12702 ccbegin = cc;
12703 cc += GET(cc, 1);
12704 has_alternatives = *cc == OP_ALT;
12705 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12706 has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
12707 if (opcode == OP_CBRA || opcode == OP_SCBRA)
12708 offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
12709 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
12710 opcode = OP_SCOND;
12711
12712 alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
12713
12714 /* Decoding the needs_control_head in framesize. */
12715 if (opcode == OP_ONCE)
12716 {
12717 needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
12718 CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
12719 }
12720
12721 if (ket != OP_KET && repeat_type != 0)
12722 {
12723 /* TMP1 is used in OP_KETRMIN below. */
12724 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12725 free_stack(common, 1);
12726 if (repeat_type == OP_UPTO)
12727 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
12728 else
12729 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
12730 }
12731
12732 if (ket == OP_KETRMAX)
12733 {
12734 if (bra == OP_BRAZERO)
12735 {
12736 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12737 free_stack(common, 1);
12738 brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12739 }
12740 }
12741 else if (ket == OP_KETRMIN)
12742 {
12743 if (bra != OP_BRAMINZERO)
12744 {
12745 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12746 if (repeat_type != 0)
12747 {
12748 /* TMP1 was set a few lines above. */
12749 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12750 /* Drop STR_PTR for non-greedy plus quantifier. */
12751 if (opcode != OP_ONCE)
12752 free_stack(common, 1);
12753 }
12754 else if (opcode >= OP_SBRA || opcode == OP_ONCE)
12755 {
12756 /* Checking zero-length iteration. */
12757 if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
12758 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12759 else
12760 {
12761 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12762 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12763 }
12764 /* Drop STR_PTR for non-greedy plus quantifier. */
12765 if (opcode != OP_ONCE)
12766 free_stack(common, 1);
12767 }
12768 else
12769 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12770 }
12771 rmin_label = LABEL();
12772 if (repeat_type != 0)
12773 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12774 }
12775 else if (bra == OP_BRAZERO)
12776 {
12777 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12778 free_stack(common, 1);
12779 brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12780 }
12781 else if (repeat_type == OP_EXACT)
12782 {
12783 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12784 exact_label = LABEL();
12785 }
12786
12787 if (offset != 0)
12788 {
12789 if (common->capture_last_ptr != 0)
12790 {
12791 SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
12792 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12793 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12794 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
12795 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12796 free_stack(common, 3);
12797 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
12798 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
12799 }
12800 else if (common->optimized_cbracket[offset >> 1] == 0)
12801 {
12802 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12803 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12804 free_stack(common, 2);
12805 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12806 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12807 }
12808 }
12809
12810 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
12811 {
12812 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12813 {
12814 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12815 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12816 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
12817 }
12818 once = JUMP(SLJIT_JUMP);
12819 }
12820 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12821 {
12822 if (has_alternatives)
12823 {
12824 /* Always exactly one alternative. */
12825 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12826 free_stack(common, 1);
12827
12828 alt_max = 2;
12829 next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12830 }
12831 }
12832 else if (has_alternatives)
12833 {
12834 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12835 free_stack(common, 1);
12836
12837 if (alt_max > 3)
12838 {
12839 sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
12840
12841 SLJIT_ASSERT(CURRENT_AS(bracket_backtrack)->u.matching_put_label);
12842 sljit_set_put_label(CURRENT_AS(bracket_backtrack)->u.matching_put_label, LABEL());
12843 sljit_emit_op0(compiler, SLJIT_ENDBR);
12844 }
12845 else
12846 next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12847 }
12848
12849 COMPILE_BACKTRACKINGPATH(current->top);
12850 if (current->topbacktracks)
12851 set_jumps(current->topbacktracks, LABEL());
12852
12853 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12854 {
12855 /* Conditional block always has at most one alternative. */
12856 if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
12857 {
12858 SLJIT_ASSERT(has_alternatives);
12859 assert = CURRENT_AS(bracket_backtrack)->u.assert;
12860 if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
12861 {
12862 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
12863 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12864 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12865 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
12866 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
12867 }
12868 cond = JUMP(SLJIT_JUMP);
12869 set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
12870 }
12871 else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
12872 {
12873 SLJIT_ASSERT(has_alternatives);
12874 cond = JUMP(SLJIT_JUMP);
12875 set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
12876 }
12877 else
12878 SLJIT_ASSERT(!has_alternatives);
12879 }
12880
12881 if (has_alternatives)
12882 {
12883 alt_count = 1;
12884 do
12885 {
12886 current->top = NULL;
12887 current->topbacktracks = NULL;
12888 current->nextbacktracks = NULL;
12889 /* Conditional blocks always have an additional alternative, even if it is empty. */
12890 if (*cc == OP_ALT)
12891 {
12892 ccprev = cc + 1 + LINK_SIZE;
12893 cc += GET(cc, 1);
12894 if (opcode != OP_COND && opcode != OP_SCOND)
12895 {
12896 if (opcode != OP_ONCE)
12897 {
12898 if (private_data_ptr != 0)
12899 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12900 else
12901 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12902 }
12903 else
12904 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
12905 }
12906 compile_matchingpath(common, ccprev, cc, current);
12907 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
12908 return;
12909
12910 if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA)
12911 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12912
12913 if (opcode == OP_SCRIPT_RUN)
12914 match_script_run_common(common, private_data_ptr, current);
12915 }
12916
12917 /* Instructions after the current alternative is successfully matched. */
12918 /* There is a similar code in compile_bracket_matchingpath. */
12919 if (opcode == OP_ONCE)
12920 match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
12921
12922 stacksize = 0;
12923 if (repeat_type == OP_MINUPTO)
12924 {
12925 /* We need to preserve the counter. TMP2 will be used below. */
12926 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
12927 stacksize++;
12928 }
12929 if (ket != OP_KET || bra != OP_BRA)
12930 stacksize++;
12931 if (offset != 0)
12932 {
12933 if (common->capture_last_ptr != 0)
12934 stacksize++;
12935 if (common->optimized_cbracket[offset >> 1] == 0)
12936 stacksize += 2;
12937 }
12938 if (opcode != OP_ONCE)
12939 stacksize++;
12940
12941 if (stacksize > 0)
12942 allocate_stack(common, stacksize);
12943
12944 stacksize = 0;
12945 if (repeat_type == OP_MINUPTO)
12946 {
12947 /* TMP2 was set above. */
12948 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
12949 stacksize++;
12950 }
12951
12952 if (ket != OP_KET || bra != OP_BRA)
12953 {
12954 if (ket != OP_KET)
12955 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
12956 else
12957 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
12958 stacksize++;
12959 }
12960
12961 if (offset != 0)
12962 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
12963
12964 if (opcode != OP_ONCE)
12965 {
12966 if (alt_max <= 3)
12967 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
12968 else
12969 put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
12970 }
12971
12972 if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
12973 {
12974 /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
12975 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
12976 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
12977 }
12978
12979 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
12980
12981 if (opcode != OP_ONCE)
12982 {
12983 if (alt_max <= 3)
12984 {
12985 JUMPHERE(next_alt);
12986 alt_count++;
12987 if (alt_count < alt_max)
12988 {
12989 SLJIT_ASSERT(alt_count == 2 && alt_max == 3);
12990 next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
12991 }
12992 }
12993 else
12994 {
12995 sljit_set_put_label(put_label, LABEL());
12996 sljit_emit_op0(compiler, SLJIT_ENDBR);
12997 }
12998 }
12999
13000 COMPILE_BACKTRACKINGPATH(current->top);
13001 if (current->topbacktracks)
13002 set_jumps(current->topbacktracks, LABEL());
13003 SLJIT_ASSERT(!current->nextbacktracks);
13004 }
13005 while (*cc == OP_ALT);
13006
13007 if (cond != NULL)
13008 {
13009 SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
13010 assert = CURRENT_AS(bracket_backtrack)->u.assert;
13011 if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
13012 {
13013 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
13014 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13015 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
13016 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
13017 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
13018 }
13019 JUMPHERE(cond);
13020 }
13021
13022 /* Free the STR_PTR. */
13023 if (private_data_ptr == 0)
13024 free_stack(common, 1);
13025 }
13026
13027 if (offset != 0)
13028 {
13029 /* Using both tmp register is better for instruction scheduling. */
13030 if (common->optimized_cbracket[offset >> 1] != 0)
13031 {
13032 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13033 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13034 free_stack(common, 2);
13035 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
13036 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
13037 }
13038 else
13039 {
13040 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13041 free_stack(common, 1);
13042 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
13043 }
13044 }
13045 else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
13046 {
13047 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
13048 free_stack(common, 1);
13049 }
13050 else if (opcode == OP_ONCE)
13051 {
13052 cc = ccbegin + GET(ccbegin, 1);
13053 stacksize = needs_control_head ? 1 : 0;
13054
13055 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
13056 {
13057 /* Reset head and drop saved frame. */
13058 stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
13059 }
13060 else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
13061 {
13062 /* The STR_PTR must be released. */
13063 stacksize++;
13064 }
13065
13066 if (stacksize > 0)
13067 free_stack(common, stacksize);
13068
13069 JUMPHERE(once);
13070 /* Restore previous private_data_ptr */
13071 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
13072 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1));
13073 else if (ket == OP_KETRMIN)
13074 {
13075 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13076 /* See the comment below. */
13077 free_stack(common, 2);
13078 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
13079 }
13080 }
13081
13082 if (repeat_type == OP_EXACT)
13083 {
13084 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
13085 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
13086 CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
13087 }
13088 else if (ket == OP_KETRMAX)
13089 {
13090 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13091 if (bra != OP_BRAZERO)
13092 free_stack(common, 1);
13093
13094 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
13095 if (bra == OP_BRAZERO)
13096 {
13097 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13098 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
13099 JUMPHERE(brazero);
13100 free_stack(common, 1);
13101 }
13102 }
13103 else if (ket == OP_KETRMIN)
13104 {
13105 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13106
13107 /* OP_ONCE removes everything in case of a backtrack, so we don't
13108 need to explicitly release the STR_PTR. The extra release would
13109 affect badly the free_stack(2) above. */
13110 if (opcode != OP_ONCE)
13111 free_stack(common, 1);
13112 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
13113 if (opcode == OP_ONCE)
13114 free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
13115 else if (bra == OP_BRAMINZERO)
13116 free_stack(common, 1);
13117 }
13118 else if (bra == OP_BRAZERO)
13119 {
13120 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13121 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
13122 JUMPHERE(brazero);
13123 }
13124 }
13125
compile_bracketpos_backtrackingpath(compiler_common * common,struct backtrack_common * current)13126 static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13127 {
13128 DEFINE_COMPILER;
13129 int offset;
13130 struct sljit_jump *jump;
13131
13132 if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
13133 {
13134 if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
13135 {
13136 offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
13137 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13138 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13139 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
13140 if (common->capture_last_ptr != 0)
13141 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
13142 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
13143 if (common->capture_last_ptr != 0)
13144 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
13145 }
13146 set_jumps(current->topbacktracks, LABEL());
13147 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
13148 return;
13149 }
13150
13151 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
13152 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13153 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracketpos_backtrack)->framesize - 1) * sizeof(sljit_sw));
13154
13155 if (current->topbacktracks)
13156 {
13157 jump = JUMP(SLJIT_JUMP);
13158 set_jumps(current->topbacktracks, LABEL());
13159 /* Drop the stack frame. */
13160 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
13161 JUMPHERE(jump);
13162 }
13163 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1));
13164 }
13165
compile_braminzero_backtrackingpath(compiler_common * common,struct backtrack_common * current)13166 static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13167 {
13168 assert_backtrack backtrack;
13169
13170 current->top = NULL;
13171 current->topbacktracks = NULL;
13172 current->nextbacktracks = NULL;
13173 if (current->cc[1] > OP_ASSERTBACK_NOT)
13174 {
13175 /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
13176 compile_bracket_matchingpath(common, current->cc, current);
13177 compile_bracket_backtrackingpath(common, current->top);
13178 }
13179 else
13180 {
13181 memset(&backtrack, 0, sizeof(backtrack));
13182 backtrack.common.cc = current->cc;
13183 backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
13184 /* Manual call of compile_assert_matchingpath. */
13185 compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
13186 }
13187 SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
13188 }
13189
compile_control_verb_backtrackingpath(compiler_common * common,struct backtrack_common * current)13190 static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13191 {
13192 DEFINE_COMPILER;
13193 PCRE2_UCHAR opcode = *current->cc;
13194 struct sljit_label *loop;
13195 struct sljit_jump *jump;
13196
13197 if (opcode == OP_THEN || opcode == OP_THEN_ARG)
13198 {
13199 if (common->then_trap != NULL)
13200 {
13201 SLJIT_ASSERT(common->control_head_ptr != 0);
13202
13203 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
13204 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
13205 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
13206 jump = JUMP(SLJIT_JUMP);
13207
13208 loop = LABEL();
13209 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13210 JUMPHERE(jump);
13211 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop);
13212 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop);
13213 add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
13214 return;
13215 }
13216 else if (!common->local_quit_available && common->in_positive_assertion)
13217 {
13218 add_jump(compiler, &common->positive_assertion_quit, JUMP(SLJIT_JUMP));
13219 return;
13220 }
13221 }
13222
13223 if (common->local_quit_available)
13224 {
13225 /* Abort match with a fail. */
13226 if (common->quit_label == NULL)
13227 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
13228 else
13229 JUMPTO(SLJIT_JUMP, common->quit_label);
13230 return;
13231 }
13232
13233 if (opcode == OP_SKIP_ARG)
13234 {
13235 SLJIT_ASSERT(common->control_head_ptr != 0 && TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
13236 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
13237 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
13238 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_search_mark));
13239
13240 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0);
13241 add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0));
13242 return;
13243 }
13244
13245 if (opcode == OP_SKIP)
13246 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13247 else
13248 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
13249 add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
13250 }
13251
compile_then_trap_backtrackingpath(compiler_common * common,struct backtrack_common * current)13252 static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13253 {
13254 DEFINE_COMPILER;
13255 struct sljit_jump *jump;
13256 int size;
13257
13258 if (CURRENT_AS(then_trap_backtrack)->then_trap)
13259 {
13260 common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
13261 return;
13262 }
13263
13264 size = CURRENT_AS(then_trap_backtrack)->framesize;
13265 size = 3 + (size < 0 ? 0 : size);
13266
13267 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
13268 free_stack(common, size);
13269 jump = JUMP(SLJIT_JUMP);
13270
13271 set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
13272 /* STACK_TOP is set by THEN. */
13273 if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
13274 {
13275 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13276 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(then_trap_backtrack)->framesize - 1) * sizeof(sljit_sw));
13277 }
13278 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13279 free_stack(common, 3);
13280
13281 JUMPHERE(jump);
13282 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
13283 }
13284
compile_backtrackingpath(compiler_common * common,struct backtrack_common * current)13285 static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13286 {
13287 DEFINE_COMPILER;
13288 then_trap_backtrack *save_then_trap = common->then_trap;
13289
13290 while (current)
13291 {
13292 if (current->nextbacktracks != NULL)
13293 set_jumps(current->nextbacktracks, LABEL());
13294 switch(*current->cc)
13295 {
13296 case OP_SET_SOM:
13297 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13298 free_stack(common, 1);
13299 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
13300 break;
13301
13302 case OP_STAR:
13303 case OP_MINSTAR:
13304 case OP_PLUS:
13305 case OP_MINPLUS:
13306 case OP_QUERY:
13307 case OP_MINQUERY:
13308 case OP_UPTO:
13309 case OP_MINUPTO:
13310 case OP_EXACT:
13311 case OP_POSSTAR:
13312 case OP_POSPLUS:
13313 case OP_POSQUERY:
13314 case OP_POSUPTO:
13315 case OP_STARI:
13316 case OP_MINSTARI:
13317 case OP_PLUSI:
13318 case OP_MINPLUSI:
13319 case OP_QUERYI:
13320 case OP_MINQUERYI:
13321 case OP_UPTOI:
13322 case OP_MINUPTOI:
13323 case OP_EXACTI:
13324 case OP_POSSTARI:
13325 case OP_POSPLUSI:
13326 case OP_POSQUERYI:
13327 case OP_POSUPTOI:
13328 case OP_NOTSTAR:
13329 case OP_NOTMINSTAR:
13330 case OP_NOTPLUS:
13331 case OP_NOTMINPLUS:
13332 case OP_NOTQUERY:
13333 case OP_NOTMINQUERY:
13334 case OP_NOTUPTO:
13335 case OP_NOTMINUPTO:
13336 case OP_NOTEXACT:
13337 case OP_NOTPOSSTAR:
13338 case OP_NOTPOSPLUS:
13339 case OP_NOTPOSQUERY:
13340 case OP_NOTPOSUPTO:
13341 case OP_NOTSTARI:
13342 case OP_NOTMINSTARI:
13343 case OP_NOTPLUSI:
13344 case OP_NOTMINPLUSI:
13345 case OP_NOTQUERYI:
13346 case OP_NOTMINQUERYI:
13347 case OP_NOTUPTOI:
13348 case OP_NOTMINUPTOI:
13349 case OP_NOTEXACTI:
13350 case OP_NOTPOSSTARI:
13351 case OP_NOTPOSPLUSI:
13352 case OP_NOTPOSQUERYI:
13353 case OP_NOTPOSUPTOI:
13354 case OP_TYPESTAR:
13355 case OP_TYPEMINSTAR:
13356 case OP_TYPEPLUS:
13357 case OP_TYPEMINPLUS:
13358 case OP_TYPEQUERY:
13359 case OP_TYPEMINQUERY:
13360 case OP_TYPEUPTO:
13361 case OP_TYPEMINUPTO:
13362 case OP_TYPEEXACT:
13363 case OP_TYPEPOSSTAR:
13364 case OP_TYPEPOSPLUS:
13365 case OP_TYPEPOSQUERY:
13366 case OP_TYPEPOSUPTO:
13367 case OP_CLASS:
13368 case OP_NCLASS:
13369 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
13370 case OP_XCLASS:
13371 #endif
13372 compile_iterator_backtrackingpath(common, current);
13373 break;
13374
13375 case OP_REF:
13376 case OP_REFI:
13377 case OP_DNREF:
13378 case OP_DNREFI:
13379 compile_ref_iterator_backtrackingpath(common, current);
13380 break;
13381
13382 case OP_RECURSE:
13383 compile_recurse_backtrackingpath(common, current);
13384 break;
13385
13386 case OP_ASSERT:
13387 case OP_ASSERT_NOT:
13388 case OP_ASSERTBACK:
13389 case OP_ASSERTBACK_NOT:
13390 compile_assert_backtrackingpath(common, current);
13391 break;
13392
13393 case OP_ASSERT_NA:
13394 case OP_ASSERTBACK_NA:
13395 case OP_ONCE:
13396 case OP_SCRIPT_RUN:
13397 case OP_BRA:
13398 case OP_CBRA:
13399 case OP_COND:
13400 case OP_SBRA:
13401 case OP_SCBRA:
13402 case OP_SCOND:
13403 compile_bracket_backtrackingpath(common, current);
13404 break;
13405
13406 case OP_BRAZERO:
13407 if (current->cc[1] > OP_ASSERTBACK_NOT)
13408 compile_bracket_backtrackingpath(common, current);
13409 else
13410 compile_assert_backtrackingpath(common, current);
13411 break;
13412
13413 case OP_BRAPOS:
13414 case OP_CBRAPOS:
13415 case OP_SBRAPOS:
13416 case OP_SCBRAPOS:
13417 case OP_BRAPOSZERO:
13418 compile_bracketpos_backtrackingpath(common, current);
13419 break;
13420
13421 case OP_BRAMINZERO:
13422 compile_braminzero_backtrackingpath(common, current);
13423 break;
13424
13425 case OP_MARK:
13426 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
13427 if (common->has_skip_arg)
13428 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13429 free_stack(common, common->has_skip_arg ? 5 : 1);
13430 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
13431 if (common->has_skip_arg)
13432 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
13433 break;
13434
13435 case OP_THEN:
13436 case OP_THEN_ARG:
13437 case OP_PRUNE:
13438 case OP_PRUNE_ARG:
13439 case OP_SKIP:
13440 case OP_SKIP_ARG:
13441 compile_control_verb_backtrackingpath(common, current);
13442 break;
13443
13444 case OP_COMMIT:
13445 case OP_COMMIT_ARG:
13446 if (!common->local_quit_available)
13447 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13448 if (common->quit_label == NULL)
13449 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
13450 else
13451 JUMPTO(SLJIT_JUMP, common->quit_label);
13452 break;
13453
13454 case OP_CALLOUT:
13455 case OP_CALLOUT_STR:
13456 case OP_FAIL:
13457 case OP_ACCEPT:
13458 case OP_ASSERT_ACCEPT:
13459 set_jumps(current->topbacktracks, LABEL());
13460 break;
13461
13462 case OP_THEN_TRAP:
13463 /* A virtual opcode for then traps. */
13464 compile_then_trap_backtrackingpath(common, current);
13465 break;
13466
13467 default:
13468 SLJIT_UNREACHABLE();
13469 break;
13470 }
13471 current = current->prev;
13472 }
13473 common->then_trap = save_then_trap;
13474 }
13475
compile_recurse(compiler_common * common)13476 static SLJIT_INLINE void compile_recurse(compiler_common *common)
13477 {
13478 DEFINE_COMPILER;
13479 PCRE2_SPTR cc = common->start + common->currententry->start;
13480 PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
13481 PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE);
13482 uint32_t recurse_flags = 0;
13483 int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &recurse_flags);
13484 int alt_count, alt_max, local_size;
13485 backtrack_common altbacktrack;
13486 jump_list *match = NULL;
13487 struct sljit_jump *next_alt = NULL;
13488 struct sljit_jump *accept_exit = NULL;
13489 struct sljit_label *quit;
13490 struct sljit_put_label *put_label = NULL;
13491
13492 /* Recurse captures then. */
13493 common->then_trap = NULL;
13494
13495 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
13496
13497 alt_max = no_alternatives(cc);
13498 alt_count = 0;
13499
13500 /* Matching path. */
13501 SLJIT_ASSERT(common->currententry->entry_label == NULL && common->recursive_head_ptr != 0);
13502 common->currententry->entry_label = LABEL();
13503 set_jumps(common->currententry->entry_calls, common->currententry->entry_label);
13504
13505 sljit_emit_fast_enter(compiler, TMP2, 0);
13506 count_match(common);
13507
13508 local_size = (alt_max > 1) ? 2 : 1;
13509
13510 /* (Reversed) stack layout:
13511 [private data][return address][optional: str ptr] ... [optional: alternative index][recursive_head_ptr] */
13512
13513 allocate_stack(common, private_data_size + local_size);
13514 /* Save return address. */
13515 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0);
13516
13517 copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, recurse_flags);
13518
13519 /* This variable is saved and restored all time when we enter or exit from a recursive context. */
13520 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
13521
13522 if (recurse_flags & recurse_flag_control_head_found)
13523 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13524
13525 if (alt_max > 1)
13526 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
13527
13528 memset(&altbacktrack, 0, sizeof(backtrack_common));
13529 common->quit_label = NULL;
13530 common->accept_label = NULL;
13531 common->quit = NULL;
13532 common->accept = NULL;
13533 altbacktrack.cc = ccbegin;
13534 cc += GET(cc, 1);
13535 while (1)
13536 {
13537 altbacktrack.top = NULL;
13538 altbacktrack.topbacktracks = NULL;
13539
13540 if (altbacktrack.cc != ccbegin)
13541 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13542
13543 compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
13544 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13545 return;
13546
13547 allocate_stack(common, (alt_max > 1 || (recurse_flags & recurse_flag_accept_found)) ? 2 : 1);
13548 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13549
13550 if (alt_max > 1 || (recurse_flags & recurse_flag_accept_found))
13551 {
13552 if (alt_max > 3)
13553 put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(1));
13554 else
13555 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
13556 }
13557
13558 add_jump(compiler, &match, JUMP(SLJIT_JUMP));
13559
13560 if (alt_count == 0)
13561 {
13562 /* Backtracking path entry. */
13563 SLJIT_ASSERT(common->currententry->backtrack_label == NULL);
13564 common->currententry->backtrack_label = LABEL();
13565 set_jumps(common->currententry->backtrack_calls, common->currententry->backtrack_label);
13566
13567 sljit_emit_fast_enter(compiler, TMP1, 0);
13568
13569 if (recurse_flags & recurse_flag_accept_found)
13570 accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
13571
13572 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13573 /* Save return address. */
13574 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0);
13575
13576 copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags);
13577
13578 if (alt_max > 1)
13579 {
13580 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13581 free_stack(common, 2);
13582
13583 if (alt_max > 3)
13584 {
13585 sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
13586 sljit_set_put_label(put_label, LABEL());
13587 sljit_emit_op0(compiler, SLJIT_ENDBR);
13588 }
13589 else
13590 next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13591 }
13592 else
13593 free_stack(common, (recurse_flags & recurse_flag_accept_found) ? 2 : 1);
13594 }
13595 else if (alt_max > 3)
13596 {
13597 sljit_set_put_label(put_label, LABEL());
13598 sljit_emit_op0(compiler, SLJIT_ENDBR);
13599 }
13600 else
13601 {
13602 JUMPHERE(next_alt);
13603 if (alt_count + 1 < alt_max)
13604 {
13605 SLJIT_ASSERT(alt_count == 1 && alt_max == 3);
13606 next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
13607 }
13608 }
13609
13610 alt_count++;
13611
13612 compile_backtrackingpath(common, altbacktrack.top);
13613 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13614 return;
13615 set_jumps(altbacktrack.topbacktracks, LABEL());
13616
13617 if (*cc != OP_ALT)
13618 break;
13619
13620 altbacktrack.cc = cc + 1 + LINK_SIZE;
13621 cc += GET(cc, 1);
13622 }
13623
13624 /* No alternative is matched. */
13625
13626 quit = LABEL();
13627
13628 copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, recurse_flags);
13629
13630 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
13631 free_stack(common, private_data_size + local_size);
13632 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
13633 OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13634
13635 if (common->quit != NULL)
13636 {
13637 SLJIT_ASSERT(recurse_flags & recurse_flag_quit_found);
13638
13639 set_jumps(common->quit, LABEL());
13640 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13641 copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, recurse_flags);
13642 JUMPTO(SLJIT_JUMP, quit);
13643 }
13644
13645 if (recurse_flags & recurse_flag_accept_found)
13646 {
13647 JUMPHERE(accept_exit);
13648 free_stack(common, 2);
13649
13650 /* Save return address. */
13651 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0);
13652
13653 copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, recurse_flags);
13654
13655 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
13656 free_stack(common, private_data_size + local_size);
13657 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
13658 OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13659 }
13660
13661 if (common->accept != NULL)
13662 {
13663 SLJIT_ASSERT(recurse_flags & recurse_flag_accept_found);
13664
13665 set_jumps(common->accept, LABEL());
13666
13667 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13668 OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
13669
13670 allocate_stack(common, 2);
13671 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
13672 }
13673
13674 set_jumps(match, LABEL());
13675
13676 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
13677
13678 copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags);
13679
13680 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1));
13681 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
13682 OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13683 }
13684
13685 #undef COMPILE_BACKTRACKINGPATH
13686 #undef CURRENT_AS
13687
13688 #define PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS \
13689 (PCRE2_JIT_INVALID_UTF)
13690
jit_compile(pcre2_code * code,sljit_u32 mode)13691 static int jit_compile(pcre2_code *code, sljit_u32 mode)
13692 {
13693 pcre2_real_code *re = (pcre2_real_code *)code;
13694 struct sljit_compiler *compiler;
13695 backtrack_common rootbacktrack;
13696 compiler_common common_data;
13697 compiler_common *common = &common_data;
13698 const sljit_u8 *tables = re->tables;
13699 void *allocator_data = &re->memctl;
13700 int private_data_size;
13701 PCRE2_SPTR ccend;
13702 executable_functions *functions;
13703 void *executable_func;
13704 sljit_uw executable_size;
13705 sljit_uw total_length;
13706 struct sljit_label *mainloop_label = NULL;
13707 struct sljit_label *continue_match_label;
13708 struct sljit_label *empty_match_found_label = NULL;
13709 struct sljit_label *empty_match_backtrack_label = NULL;
13710 struct sljit_label *reset_match_label;
13711 struct sljit_label *quit_label;
13712 struct sljit_jump *jump;
13713 struct sljit_jump *minlength_check_failed = NULL;
13714 struct sljit_jump *empty_match = NULL;
13715 struct sljit_jump *end_anchor_failed = NULL;
13716 jump_list *reqcu_not_found = NULL;
13717
13718 SLJIT_ASSERT(tables);
13719
13720 #if HAS_VIRTUAL_REGISTERS == 1
13721 SLJIT_ASSERT(sljit_get_register_index(TMP3) < 0 && sljit_get_register_index(ARGUMENTS) < 0 && sljit_get_register_index(RETURN_ADDR) < 0);
13722 #elif HAS_VIRTUAL_REGISTERS == 0
13723 SLJIT_ASSERT(sljit_get_register_index(TMP3) >= 0 && sljit_get_register_index(ARGUMENTS) >= 0 && sljit_get_register_index(RETURN_ADDR) >= 0);
13724 #else
13725 #error "Invalid value for HAS_VIRTUAL_REGISTERS"
13726 #endif
13727
13728 memset(&rootbacktrack, 0, sizeof(backtrack_common));
13729 memset(common, 0, sizeof(compiler_common));
13730 common->re = re;
13731 common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
13732 rootbacktrack.cc = common->name_table + re->name_count * re->name_entry_size;
13733
13734 #ifdef SUPPORT_UNICODE
13735 common->invalid_utf = (mode & PCRE2_JIT_INVALID_UTF) != 0;
13736 #endif /* SUPPORT_UNICODE */
13737 mode &= ~PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS;
13738
13739 common->start = rootbacktrack.cc;
13740 common->read_only_data_head = NULL;
13741 common->fcc = tables + fcc_offset;
13742 common->lcc = (sljit_sw)(tables + lcc_offset);
13743 common->mode = mode;
13744 common->might_be_empty = (re->minlength == 0) || (re->flags & PCRE2_MATCH_EMPTY);
13745 common->allow_empty_partial = (re->max_lookbehind > 0) || (re->flags & PCRE2_MATCH_EMPTY);
13746 common->nltype = NLTYPE_FIXED;
13747 switch(re->newline_convention)
13748 {
13749 case PCRE2_NEWLINE_CR: common->newline = CHAR_CR; break;
13750 case PCRE2_NEWLINE_LF: common->newline = CHAR_NL; break;
13751 case PCRE2_NEWLINE_CRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
13752 case PCRE2_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
13753 case PCRE2_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
13754 case PCRE2_NEWLINE_NUL: common->newline = CHAR_NUL; break;
13755 default: return PCRE2_ERROR_INTERNAL;
13756 }
13757 common->nlmax = READ_CHAR_MAX;
13758 common->nlmin = 0;
13759 if (re->bsr_convention == PCRE2_BSR_UNICODE)
13760 common->bsr_nltype = NLTYPE_ANY;
13761 else if (re->bsr_convention == PCRE2_BSR_ANYCRLF)
13762 common->bsr_nltype = NLTYPE_ANYCRLF;
13763 else
13764 {
13765 #ifdef BSR_ANYCRLF
13766 common->bsr_nltype = NLTYPE_ANYCRLF;
13767 #else
13768 common->bsr_nltype = NLTYPE_ANY;
13769 #endif
13770 }
13771 common->bsr_nlmax = READ_CHAR_MAX;
13772 common->bsr_nlmin = 0;
13773 common->endonly = (re->overall_options & PCRE2_DOLLAR_ENDONLY) != 0;
13774 common->ctypes = (sljit_sw)(tables + ctypes_offset);
13775 common->name_count = re->name_count;
13776 common->name_entry_size = re->name_entry_size;
13777 common->unset_backref = (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) != 0;
13778 common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0;
13779 #ifdef SUPPORT_UNICODE
13780 /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
13781 common->utf = (re->overall_options & PCRE2_UTF) != 0;
13782 common->ucp = (re->overall_options & PCRE2_UCP) != 0;
13783 if (common->utf)
13784 {
13785 if (common->nltype == NLTYPE_ANY)
13786 common->nlmax = 0x2029;
13787 else if (common->nltype == NLTYPE_ANYCRLF)
13788 common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
13789 else
13790 {
13791 /* We only care about the first newline character. */
13792 common->nlmax = common->newline & 0xff;
13793 }
13794
13795 if (common->nltype == NLTYPE_FIXED)
13796 common->nlmin = common->newline & 0xff;
13797 else
13798 common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
13799
13800 if (common->bsr_nltype == NLTYPE_ANY)
13801 common->bsr_nlmax = 0x2029;
13802 else
13803 common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
13804 common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
13805 }
13806 else
13807 common->invalid_utf = FALSE;
13808 #endif /* SUPPORT_UNICODE */
13809 ccend = bracketend(common->start);
13810
13811 /* Calculate the local space size on the stack. */
13812 common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
13813 common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, allocator_data);
13814 if (!common->optimized_cbracket)
13815 return PCRE2_ERROR_NOMEMORY;
13816 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
13817 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
13818 #else
13819 memset(common->optimized_cbracket, 1, re->top_bracket + 1);
13820 #endif
13821
13822 SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
13823 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
13824 common->capture_last_ptr = common->ovector_start;
13825 common->ovector_start += sizeof(sljit_sw);
13826 #endif
13827 if (!check_opcode_types(common, common->start, ccend))
13828 {
13829 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13830 return PCRE2_ERROR_NOMEMORY;
13831 }
13832
13833 /* Checking flags and updating ovector_start. */
13834 if (mode == PCRE2_JIT_COMPLETE && (re->flags & PCRE2_LASTSET) != 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13835 {
13836 common->req_char_ptr = common->ovector_start;
13837 common->ovector_start += sizeof(sljit_sw);
13838 }
13839 if (mode != PCRE2_JIT_COMPLETE)
13840 {
13841 common->start_used_ptr = common->ovector_start;
13842 common->ovector_start += sizeof(sljit_sw);
13843 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13844 {
13845 common->hit_start = common->ovector_start;
13846 common->ovector_start += sizeof(sljit_sw);
13847 }
13848 }
13849 if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0)
13850 {
13851 common->match_end_ptr = common->ovector_start;
13852 common->ovector_start += sizeof(sljit_sw);
13853 }
13854 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
13855 common->control_head_ptr = 1;
13856 #endif
13857 if (common->control_head_ptr != 0)
13858 {
13859 common->control_head_ptr = common->ovector_start;
13860 common->ovector_start += sizeof(sljit_sw);
13861 }
13862 if (common->has_set_som)
13863 {
13864 /* Saving the real start pointer is necessary. */
13865 common->start_ptr = common->ovector_start;
13866 common->ovector_start += sizeof(sljit_sw);
13867 }
13868
13869 /* Aligning ovector to even number of sljit words. */
13870 if ((common->ovector_start & sizeof(sljit_sw)) != 0)
13871 common->ovector_start += sizeof(sljit_sw);
13872
13873 if (common->start_ptr == 0)
13874 common->start_ptr = OVECTOR(0);
13875
13876 /* Capturing brackets cannot be optimized if callouts are allowed. */
13877 if (common->capture_last_ptr != 0)
13878 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
13879
13880 SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
13881 common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
13882
13883 total_length = ccend - common->start;
13884 common->private_data_ptrs = (sljit_s32*)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data);
13885 if (!common->private_data_ptrs)
13886 {
13887 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13888 return PCRE2_ERROR_NOMEMORY;
13889 }
13890 memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
13891
13892 private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
13893
13894 if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && !common->has_skip_in_assert_back)
13895 detect_early_fail(common, common->start, &private_data_size, 0, 0, TRUE);
13896
13897 set_private_data_ptrs(common, &private_data_size, ccend);
13898
13899 SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr);
13900
13901 if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
13902 {
13903 SLJIT_FREE(common->private_data_ptrs, allocator_data);
13904 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13905 return PCRE2_ERROR_NOMEMORY;
13906 }
13907
13908 if (common->has_then)
13909 {
13910 common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length);
13911 memset(common->then_offsets, 0, total_length);
13912 set_then_offsets(common, common->start, NULL);
13913 }
13914
13915 compiler = sljit_create_compiler(allocator_data, NULL);
13916 if (!compiler)
13917 {
13918 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13919 SLJIT_FREE(common->private_data_ptrs, allocator_data);
13920 return PCRE2_ERROR_NOMEMORY;
13921 }
13922 common->compiler = compiler;
13923
13924 /* Main pcre2_jit_exec entry. */
13925 SLJIT_ASSERT((private_data_size & (sizeof(sljit_sw) - 1)) == 0);
13926 sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, W), 5, 5, 0, 0, private_data_size);
13927
13928 /* Register init. */
13929 reset_ovector(common, (re->top_bracket + 1) * 2);
13930 if (common->req_char_ptr != 0)
13931 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
13932
13933 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
13934 OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
13935 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
13936 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
13937 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
13938 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
13939 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, end));
13940 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, start));
13941 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
13942 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
13943
13944 if (common->early_fail_start_ptr < common->early_fail_end_ptr)
13945 reset_early_fail(common);
13946
13947 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13948 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
13949 if (common->mark_ptr != 0)
13950 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
13951 if (common->control_head_ptr != 0)
13952 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13953
13954 /* Main part of the matching */
13955 if ((re->overall_options & PCRE2_ANCHORED) == 0)
13956 {
13957 mainloop_label = mainloop_entry(common);
13958 continue_match_label = LABEL();
13959 /* Forward search if possible. */
13960 if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13961 {
13962 if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common))
13963 ;
13964 else if ((re->flags & PCRE2_FIRSTSET) != 0)
13965 fast_forward_first_char(common);
13966 else if ((re->flags & PCRE2_STARTLINE) != 0)
13967 fast_forward_newline(common);
13968 else if ((re->flags & PCRE2_FIRSTMAPSET) != 0)
13969 fast_forward_start_bits(common);
13970 }
13971 }
13972 else
13973 continue_match_label = LABEL();
13974
13975 if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13976 {
13977 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13978 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(re->minlength));
13979 minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
13980 }
13981 if (common->req_char_ptr != 0)
13982 reqcu_not_found = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0);
13983
13984 /* Store the current STR_PTR in OVECTOR(0). */
13985 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
13986 /* Copy the limit of allowed recursions. */
13987 OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
13988 if (common->capture_last_ptr != 0)
13989 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, 0);
13990 if (common->fast_forward_bc_ptr != NULL)
13991 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3, STR_PTR, 0);
13992
13993 if (common->start_ptr != OVECTOR(0))
13994 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
13995
13996 /* Copy the beginning of the string. */
13997 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13998 {
13999 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
14000 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
14001 JUMPHERE(jump);
14002 }
14003 else if (mode == PCRE2_JIT_PARTIAL_HARD)
14004 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
14005
14006 compile_matchingpath(common, common->start, ccend, &rootbacktrack);
14007 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14008 {
14009 sljit_free_compiler(compiler);
14010 SLJIT_FREE(common->optimized_cbracket, allocator_data);
14011 SLJIT_FREE(common->private_data_ptrs, allocator_data);
14012 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14013 return PCRE2_ERROR_NOMEMORY;
14014 }
14015
14016 if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
14017 end_anchor_failed = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0);
14018
14019 if (common->might_be_empty)
14020 {
14021 empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
14022 empty_match_found_label = LABEL();
14023 }
14024
14025 common->accept_label = LABEL();
14026 if (common->accept != NULL)
14027 set_jumps(common->accept, common->accept_label);
14028
14029 /* This means we have a match. Update the ovector. */
14030 copy_ovector(common, re->top_bracket + 1);
14031 common->quit_label = common->abort_label = LABEL();
14032 if (common->quit != NULL)
14033 set_jumps(common->quit, common->quit_label);
14034 if (common->abort != NULL)
14035 set_jumps(common->abort, common->abort_label);
14036 if (minlength_check_failed != NULL)
14037 SET_LABEL(minlength_check_failed, common->abort_label);
14038
14039 sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN);
14040 sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
14041
14042 if (common->failed_match != NULL)
14043 {
14044 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
14045 set_jumps(common->failed_match, LABEL());
14046 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
14047 JUMPTO(SLJIT_JUMP, common->abort_label);
14048 }
14049
14050 if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
14051 JUMPHERE(end_anchor_failed);
14052
14053 if (mode != PCRE2_JIT_COMPLETE)
14054 {
14055 common->partialmatchlabel = LABEL();
14056 set_jumps(common->partialmatch, common->partialmatchlabel);
14057 return_with_partial_match(common, common->quit_label);
14058 }
14059
14060 if (common->might_be_empty)
14061 empty_match_backtrack_label = LABEL();
14062 compile_backtrackingpath(common, rootbacktrack.top);
14063 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14064 {
14065 sljit_free_compiler(compiler);
14066 SLJIT_FREE(common->optimized_cbracket, allocator_data);
14067 SLJIT_FREE(common->private_data_ptrs, allocator_data);
14068 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14069 return PCRE2_ERROR_NOMEMORY;
14070 }
14071
14072 SLJIT_ASSERT(rootbacktrack.prev == NULL);
14073 reset_match_label = LABEL();
14074
14075 if (mode == PCRE2_JIT_PARTIAL_SOFT)
14076 {
14077 /* Update hit_start only in the first time. */
14078 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
14079 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
14080 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
14081 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
14082 JUMPHERE(jump);
14083 }
14084
14085 /* Check we have remaining characters. */
14086 if ((re->overall_options & PCRE2_ANCHORED) == 0 && common->match_end_ptr != 0)
14087 {
14088 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
14089 }
14090
14091 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
14092 (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3) : common->start_ptr);
14093
14094 if ((re->overall_options & PCRE2_ANCHORED) == 0)
14095 {
14096 if (common->ff_newline_shortcut != NULL)
14097 {
14098 /* There cannot be more newlines if PCRE2_FIRSTLINE is set. */
14099 if ((re->overall_options & PCRE2_FIRSTLINE) == 0)
14100 {
14101 if (common->match_end_ptr != 0)
14102 {
14103 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
14104 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
14105 CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, common->ff_newline_shortcut);
14106 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
14107 }
14108 else
14109 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
14110 }
14111 }
14112 else
14113 CMPTO(SLJIT_LESS, STR_PTR, 0, (common->match_end_ptr == 0) ? STR_END : TMP1, 0, mainloop_label);
14114 }
14115
14116 /* No more remaining characters. */
14117 if (reqcu_not_found != NULL)
14118 set_jumps(reqcu_not_found, LABEL());
14119
14120 if (mode == PCRE2_JIT_PARTIAL_SOFT)
14121 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
14122
14123 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
14124 JUMPTO(SLJIT_JUMP, common->quit_label);
14125
14126 flush_stubs(common);
14127
14128 if (common->might_be_empty)
14129 {
14130 JUMPHERE(empty_match);
14131 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
14132 OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
14133 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
14134 JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label);
14135 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
14136 JUMPTO(SLJIT_ZERO, empty_match_found_label);
14137 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
14138 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
14139 JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
14140 }
14141
14142 common->fast_forward_bc_ptr = NULL;
14143 common->early_fail_start_ptr = 0;
14144 common->early_fail_end_ptr = 0;
14145 common->currententry = common->entries;
14146 common->local_quit_available = TRUE;
14147 quit_label = common->quit_label;
14148 if (common->currententry != NULL)
14149 {
14150 /* A free bit for each private data. */
14151 common->recurse_bitset_size = ((private_data_size / (int)sizeof(sljit_sw)) + 7) >> 3;
14152 SLJIT_ASSERT(common->recurse_bitset_size > 0);
14153 common->recurse_bitset = (sljit_u8*)SLJIT_MALLOC(common->recurse_bitset_size, allocator_data);;
14154
14155 if (common->recurse_bitset != NULL)
14156 {
14157 do
14158 {
14159 /* Might add new entries. */
14160 compile_recurse(common);
14161 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14162 break;
14163 flush_stubs(common);
14164 common->currententry = common->currententry->next;
14165 }
14166 while (common->currententry != NULL);
14167
14168 SLJIT_FREE(common->recurse_bitset, allocator_data);
14169 }
14170
14171 if (common->currententry != NULL)
14172 {
14173 /* The common->recurse_bitset has been freed. */
14174 SLJIT_ASSERT(sljit_get_compiler_error(compiler) || common->recurse_bitset == NULL);
14175
14176 sljit_free_compiler(compiler);
14177 SLJIT_FREE(common->optimized_cbracket, allocator_data);
14178 SLJIT_FREE(common->private_data_ptrs, allocator_data);
14179 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14180 return PCRE2_ERROR_NOMEMORY;
14181 }
14182 }
14183 common->local_quit_available = FALSE;
14184 common->quit_label = quit_label;
14185
14186 /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
14187 /* This is a (really) rare case. */
14188 set_jumps(common->stackalloc, LABEL());
14189 /* RETURN_ADDR is not a saved register. */
14190 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
14191
14192 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
14193
14194 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
14195 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
14196 OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE);
14197 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack));
14198 OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0);
14199
14200 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(sljit_stack_resize));
14201
14202 jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
14203 OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0);
14204 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0);
14205 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
14206 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
14207 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
14208
14209 /* Allocation failed. */
14210 JUMPHERE(jump);
14211 /* We break the return address cache here, but this is a really rare case. */
14212 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_JIT_STACKLIMIT);
14213 JUMPTO(SLJIT_JUMP, common->quit_label);
14214
14215 /* Call limit reached. */
14216 set_jumps(common->calllimit, LABEL());
14217 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_MATCHLIMIT);
14218 JUMPTO(SLJIT_JUMP, common->quit_label);
14219
14220 if (common->revertframes != NULL)
14221 {
14222 set_jumps(common->revertframes, LABEL());
14223 do_revertframes(common);
14224 }
14225 if (common->wordboundary != NULL)
14226 {
14227 set_jumps(common->wordboundary, LABEL());
14228 check_wordboundary(common);
14229 }
14230 if (common->anynewline != NULL)
14231 {
14232 set_jumps(common->anynewline, LABEL());
14233 check_anynewline(common);
14234 }
14235 if (common->hspace != NULL)
14236 {
14237 set_jumps(common->hspace, LABEL());
14238 check_hspace(common);
14239 }
14240 if (common->vspace != NULL)
14241 {
14242 set_jumps(common->vspace, LABEL());
14243 check_vspace(common);
14244 }
14245 if (common->casefulcmp != NULL)
14246 {
14247 set_jumps(common->casefulcmp, LABEL());
14248 do_casefulcmp(common);
14249 }
14250 if (common->caselesscmp != NULL)
14251 {
14252 set_jumps(common->caselesscmp, LABEL());
14253 do_caselesscmp(common);
14254 }
14255 if (common->reset_match != NULL)
14256 {
14257 set_jumps(common->reset_match, LABEL());
14258 do_reset_match(common, (re->top_bracket + 1) * 2);
14259 CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
14260 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
14261 JUMPTO(SLJIT_JUMP, reset_match_label);
14262 }
14263 #ifdef SUPPORT_UNICODE
14264 #if PCRE2_CODE_UNIT_WIDTH == 8
14265 if (common->utfreadchar != NULL)
14266 {
14267 set_jumps(common->utfreadchar, LABEL());
14268 do_utfreadchar(common);
14269 }
14270 if (common->utfreadtype8 != NULL)
14271 {
14272 set_jumps(common->utfreadtype8, LABEL());
14273 do_utfreadtype8(common);
14274 }
14275 if (common->utfpeakcharback != NULL)
14276 {
14277 set_jumps(common->utfpeakcharback, LABEL());
14278 do_utfpeakcharback(common);
14279 }
14280 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
14281 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
14282 if (common->utfreadchar_invalid != NULL)
14283 {
14284 set_jumps(common->utfreadchar_invalid, LABEL());
14285 do_utfreadchar_invalid(common);
14286 }
14287 if (common->utfreadnewline_invalid != NULL)
14288 {
14289 set_jumps(common->utfreadnewline_invalid, LABEL());
14290 do_utfreadnewline_invalid(common);
14291 }
14292 if (common->utfmoveback_invalid)
14293 {
14294 set_jumps(common->utfmoveback_invalid, LABEL());
14295 do_utfmoveback_invalid(common);
14296 }
14297 if (common->utfpeakcharback_invalid)
14298 {
14299 set_jumps(common->utfpeakcharback_invalid, LABEL());
14300 do_utfpeakcharback_invalid(common);
14301 }
14302 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 */
14303 if (common->getucd != NULL)
14304 {
14305 set_jumps(common->getucd, LABEL());
14306 do_getucd(common);
14307 }
14308 if (common->getucdtype != NULL)
14309 {
14310 set_jumps(common->getucdtype, LABEL());
14311 do_getucdtype(common);
14312 }
14313 #endif /* SUPPORT_UNICODE */
14314
14315 SLJIT_FREE(common->optimized_cbracket, allocator_data);
14316 SLJIT_FREE(common->private_data_ptrs, allocator_data);
14317
14318 executable_func = sljit_generate_code(compiler);
14319 executable_size = sljit_get_generated_code_size(compiler);
14320 sljit_free_compiler(compiler);
14321
14322 if (executable_func == NULL)
14323 {
14324 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14325 return PCRE2_ERROR_NOMEMORY;
14326 }
14327
14328 /* Reuse the function descriptor if possible. */
14329 if (re->executable_jit != NULL)
14330 functions = (executable_functions *)re->executable_jit;
14331 else
14332 {
14333 functions = SLJIT_MALLOC(sizeof(executable_functions), allocator_data);
14334 if (functions == NULL)
14335 {
14336 /* This case is highly unlikely since we just recently
14337 freed a lot of memory. Not impossible though. */
14338 sljit_free_code(executable_func, NULL);
14339 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14340 return PCRE2_ERROR_NOMEMORY;
14341 }
14342 memset(functions, 0, sizeof(executable_functions));
14343 functions->top_bracket = re->top_bracket + 1;
14344 functions->limit_match = re->limit_match;
14345 re->executable_jit = functions;
14346 }
14347
14348 /* Turn mode into an index. */
14349 if (mode == PCRE2_JIT_COMPLETE)
14350 mode = 0;
14351 else
14352 mode = (mode == PCRE2_JIT_PARTIAL_SOFT) ? 1 : 2;
14353
14354 SLJIT_ASSERT(mode < JIT_NUMBER_OF_COMPILE_MODES);
14355 functions->executable_funcs[mode] = executable_func;
14356 functions->read_only_data_heads[mode] = common->read_only_data_head;
14357 functions->executable_sizes[mode] = executable_size;
14358 return 0;
14359 }
14360
14361 #endif
14362
14363 /*************************************************
14364 * JIT compile a Regular Expression *
14365 *************************************************/
14366
14367 /* This function used JIT to convert a previously-compiled pattern into machine
14368 code.
14369
14370 Arguments:
14371 code a compiled pattern
14372 options JIT option bits
14373
14374 Returns: 0: success or (*NOJIT) was used
14375 <0: an error code
14376 */
14377
14378 #define PUBLIC_JIT_COMPILE_OPTIONS \
14379 (PCRE2_JIT_COMPLETE|PCRE2_JIT_PARTIAL_SOFT|PCRE2_JIT_PARTIAL_HARD|PCRE2_JIT_INVALID_UTF)
14380
14381 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_jit_compile(pcre2_code * code,uint32_t options)14382 pcre2_jit_compile(pcre2_code *code, uint32_t options)
14383 {
14384 pcre2_real_code *re = (pcre2_real_code *)code;
14385 #ifdef SUPPORT_JIT
14386 executable_functions *functions;
14387 static int executable_allocator_is_working = 0;
14388 #endif
14389
14390 if (code == NULL)
14391 return PCRE2_ERROR_NULL;
14392
14393 if ((options & ~PUBLIC_JIT_COMPILE_OPTIONS) != 0)
14394 return PCRE2_ERROR_JIT_BADOPTION;
14395
14396 /* Support for invalid UTF was first introduced in JIT, with the option
14397 PCRE2_JIT_INVALID_UTF. Later, support was added to the interpreter, and the
14398 compile-time option PCRE2_MATCH_INVALID_UTF was created. This is now the
14399 preferred feature, with the earlier option deprecated. However, for backward
14400 compatibility, if the earlier option is set, it forces the new option so that
14401 if JIT matching falls back to the interpreter, there is still support for
14402 invalid UTF. However, if this function has already been successfully called
14403 without PCRE2_JIT_INVALID_UTF and without PCRE2_MATCH_INVALID_UTF (meaning that
14404 non-invalid-supporting JIT code was compiled), give an error.
14405
14406 If in the future support for PCRE2_JIT_INVALID_UTF is withdrawn, the following
14407 actions are needed:
14408
14409 1. Remove the definition from pcre2.h.in and from the list in
14410 PUBLIC_JIT_COMPILE_OPTIONS above.
14411
14412 2. Replace PCRE2_JIT_INVALID_UTF with a local flag in this module.
14413
14414 3. Replace PCRE2_JIT_INVALID_UTF in pcre2_jit_test.c.
14415
14416 4. Delete the following short block of code. The setting of "re" and
14417 "functions" can be moved into the JIT-only block below, but if that is
14418 done, (void)re and (void)functions will be needed in the non-JIT case, to
14419 avoid compiler warnings.
14420 */
14421
14422 #ifdef SUPPORT_JIT
14423 functions = (executable_functions *)re->executable_jit;
14424 #endif
14425
14426 if ((options & PCRE2_JIT_INVALID_UTF) != 0)
14427 {
14428 if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) == 0)
14429 {
14430 #ifdef SUPPORT_JIT
14431 if (functions != NULL) return PCRE2_ERROR_JIT_BADOPTION;
14432 #endif
14433 re->overall_options |= PCRE2_MATCH_INVALID_UTF;
14434 }
14435 }
14436
14437 /* The above tests are run with and without JIT support. This means that
14438 PCRE2_JIT_INVALID_UTF propagates back into the regex options (ensuring
14439 interpreter support) even in the absence of JIT. But now, if there is no JIT
14440 support, give an error return. */
14441
14442 #ifndef SUPPORT_JIT
14443 return PCRE2_ERROR_JIT_BADOPTION;
14444 #else /* SUPPORT_JIT */
14445
14446 /* There is JIT support. Do the necessary. */
14447
14448 if ((re->flags & PCRE2_NOJIT) != 0) return 0;
14449
14450 if (executable_allocator_is_working == 0)
14451 {
14452 /* Checks whether the executable allocator is working. This check
14453 might run multiple times in multi-threaded environments, but the
14454 result should not be affected by it. */
14455 void *ptr = SLJIT_MALLOC_EXEC(32, NULL);
14456
14457 executable_allocator_is_working = -1;
14458
14459 if (ptr != NULL)
14460 {
14461 SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr), NULL);
14462 executable_allocator_is_working = 1;
14463 }
14464 }
14465
14466 if (executable_allocator_is_working < 0)
14467 return PCRE2_ERROR_NOMEMORY;
14468
14469 if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0)
14470 options |= PCRE2_JIT_INVALID_UTF;
14471
14472 if ((options & PCRE2_JIT_COMPLETE) != 0 && (functions == NULL
14473 || functions->executable_funcs[0] == NULL)) {
14474 uint32_t excluded_options = (PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD);
14475 int result = jit_compile(code, options & ~excluded_options);
14476 if (result != 0)
14477 return result;
14478 }
14479
14480 if ((options & PCRE2_JIT_PARTIAL_SOFT) != 0 && (functions == NULL
14481 || functions->executable_funcs[1] == NULL)) {
14482 uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_HARD);
14483 int result = jit_compile(code, options & ~excluded_options);
14484 if (result != 0)
14485 return result;
14486 }
14487
14488 if ((options & PCRE2_JIT_PARTIAL_HARD) != 0 && (functions == NULL
14489 || functions->executable_funcs[2] == NULL)) {
14490 uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT);
14491 int result = jit_compile(code, options & ~excluded_options);
14492 if (result != 0)
14493 return result;
14494 }
14495
14496 return 0;
14497
14498 #endif /* SUPPORT_JIT */
14499 }
14500
14501 /* JIT compiler uses an all-in-one approach. This improves security,
14502 since the code generator functions are not exported. */
14503
14504 #define INCLUDED_FROM_PCRE2_JIT_COMPILE
14505
14506 #include "pcre2_jit_match.c"
14507 #include "pcre2_jit_misc.c"
14508
14509 /* End of pcre2_jit_compile.c */
14510