1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9                     This module by Zoltan Herczeg
10      Original API code Copyright (c) 1997-2012 University of Cambridge
11           New API code Copyright (c) 2016-2024 University of Cambridge
12 
13 -----------------------------------------------------------------------------
14 Redistribution and use in source and binary forms, with or without
15 modification, are permitted provided that the following conditions are met:
16 
17     * Redistributions of source code must retain the above copyright notice,
18       this list of conditions and the following disclaimer.
19 
20     * Redistributions in binary form must reproduce the above copyright
21       notice, this list of conditions and the following disclaimer in the
22       documentation and/or other materials provided with the distribution.
23 
24     * Neither the name of the University of Cambridge nor the names of its
25       contributors may be used to endorse or promote products derived from
26       this software without specific prior written permission.
27 
28 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 POSSIBILITY OF SUCH DAMAGE.
39 -----------------------------------------------------------------------------
40 */
41 
42 #ifdef HAVE_CONFIG_H
43 #include "config.h"
44 #endif
45 
46 #if defined(__has_feature)
47 #if __has_feature(memory_sanitizer)
48 #include <sanitizer/msan_interface.h>
49 #endif /* __has_feature(memory_sanitizer) */
50 #endif /* defined(__has_feature) */
51 
52 #include "pcre2_internal.h"
53 
54 #ifdef SUPPORT_JIT
55 
56 /* All-in-one: Since we use the JIT compiler only from here,
57 we just include it. This way we don't need to touch the build
58 system files. */
59 
60 #define SLJIT_CONFIG_AUTO 1
61 #define SLJIT_CONFIG_STATIC 1
62 #define SLJIT_VERBOSE 0
63 
64 #ifdef PCRE2_DEBUG
65 #define SLJIT_DEBUG 1
66 #else
67 #define SLJIT_DEBUG 0
68 #endif
69 
70 #define SLJIT_MALLOC(size, allocator_data) pcre2_jit_malloc(size, allocator_data)
71 #define SLJIT_FREE(ptr, allocator_data) pcre2_jit_free(ptr, allocator_data)
72 
pcre2_jit_malloc(size_t size,void * allocator_data)73 static void * pcre2_jit_malloc(size_t size, void *allocator_data)
74 {
75 pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
76 return allocator->malloc(size, allocator->memory_data);
77 }
78 
pcre2_jit_free(void * ptr,void * allocator_data)79 static void pcre2_jit_free(void *ptr, void *allocator_data)
80 {
81 pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
82 allocator->free(ptr, allocator->memory_data);
83 }
84 
85 #include "sljit/sljitLir.c"
86 
87 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
88 #error Unsupported architecture
89 #endif
90 
91 /* Defines for debugging purposes. */
92 
93 /* 1 - Use unoptimized capturing brackets.
94    2 - Enable capture_last_ptr (includes option 1). */
95 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
96 
97 /* 1 - Always have a control head. */
98 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
99 
100 /* Allocate memory for the regex stack on the real machine stack.
101 Fast, but limited size. */
102 #define MACHINE_STACK_SIZE 32768
103 
104 /* Growth rate for stack allocated by the OS. Should be the multiply
105 of page size. */
106 #define STACK_GROWTH_RATE 8192
107 
108 /* Enable to check that the allocation could destroy temporaries. */
109 #if defined SLJIT_DEBUG && SLJIT_DEBUG
110 #define DESTROY_REGISTERS 1
111 #endif
112 
113 /*
114 Short summary about the backtracking mechanism empolyed by the jit code generator:
115 
116 The code generator follows the recursive nature of the PERL compatible regular
117 expressions. The basic blocks of regular expressions are condition checkers
118 whose execute different commands depending on the result of the condition check.
119 The relationship between the operators can be horizontal (concatenation) and
120 vertical (sub-expression) (See struct backtrack_common for more details).
121 
122   'ab' - 'a' and 'b' regexps are concatenated
123   'a+' - 'a' is the sub-expression of the '+' operator
124 
125 The condition checkers are boolean (true/false) checkers. Machine code is generated
126 for the checker itself and for the actions depending on the result of the checker.
127 The 'true' case is called as the matching path (expected path), and the other is called as
128 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
129 branches on the matching path.
130 
131  Greedy star operator (*) :
132    Matching path: match happens.
133    Backtrack path: match failed.
134  Non-greedy star operator (*?) :
135    Matching path: no need to perform a match.
136    Backtrack path: match is required.
137 
138 The following example shows how the code generated for a capturing bracket
139 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
140 we have the following regular expression:
141 
142    A(B|C)D
143 
144 The generated code will be the following:
145 
146  A matching path
147  '(' matching path (pushing arguments to the stack)
148  B matching path
149  ')' matching path (pushing arguments to the stack)
150  D matching path
151  return with successful match
152 
153  D backtrack path
154  ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
155  B backtrack path
156  C expected path
157  jump to D matching path
158  C backtrack path
159  A backtrack path
160 
161  Notice, that the order of backtrack code paths are the opposite of the fast
162  code paths. In this way the topmost value on the stack is always belong
163  to the current backtrack code path. The backtrack path must check
164  whether there is a next alternative. If so, it needs to jump back to
165  the matching path eventually. Otherwise it needs to clear out its own stack
166  frame and continue the execution on the backtrack code paths.
167 */
168 
169 /*
170 Saved stack frames:
171 
172 Atomic blocks and asserts require reloading the values of private data
173 when the backtrack mechanism performed. Because of OP_RECURSE, the data
174 are not necessarly known in compile time, thus we need a dynamic restore
175 mechanism.
176 
177 The stack frames are stored in a chain list, and have the following format:
178 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
179 
180 Thus we can restore the private data to a particular point in the stack.
181 */
182 
183 typedef struct jit_arguments {
184   /* Pointers first. */
185   struct sljit_stack *stack;
186   PCRE2_SPTR str;
187   PCRE2_SPTR begin;
188   PCRE2_SPTR end;
189   pcre2_match_data *match_data;
190   PCRE2_SPTR startchar_ptr;
191   PCRE2_UCHAR *mark_ptr;
192   int (*callout)(pcre2_callout_block *, void *);
193   void *callout_data;
194   /* Everything else after. */
195   sljit_uw offset_limit;
196   sljit_u32 limit_match;
197   sljit_u32 oveccount;
198   sljit_u32 options;
199 } jit_arguments;
200 
201 #define JIT_NUMBER_OF_COMPILE_MODES 3
202 
203 typedef struct executable_functions {
204   void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
205   void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
206   sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
207   sljit_u32 top_bracket;
208   sljit_u32 limit_match;
209 } executable_functions;
210 
211 typedef struct jump_list {
212   struct sljit_jump *jump;
213   struct jump_list *next;
214 } jump_list;
215 
216 typedef struct stub_list {
217   struct sljit_jump *start;
218   struct sljit_label *quit;
219   struct stub_list *next;
220 } stub_list;
221 
222 enum frame_types {
223   no_frame = -1,
224   no_stack = -2
225 };
226 
227 enum control_types {
228   type_mark = 0,
229   type_then_trap = 1
230 };
231 
232 enum  early_fail_types {
233   type_skip = 0,
234   type_fail = 1,
235   type_fail_range = 2
236 };
237 
238 typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
239 
240 /* The following structure is the key data type for the recursive
241 code generator. It is allocated by compile_matchingpath, and contains
242 the arguments for compile_backtrackingpath. Must be the first member
243 of its descendants. */
244 typedef struct backtrack_common {
245   /* Backtracking path of an opcode, which falls back
246      to our opcode, if it cannot resume matching. */
247   struct backtrack_common *prev;
248   /* Backtracks for opcodes without backtracking path.
249      These opcodes are between 'prev' and the current
250      opcode, and they never resume the match. */
251   jump_list *simple_backtracks;
252   /* Internal backtracking list for block constructs
253      which contains other opcodes, such as brackets,
254      asserts, conditionals, etc. */
255   struct backtrack_common *top;
256   /* Backtracks used internally by the opcode. For component
257      opcodes, this list is also used by those opcodes without
258      backtracking path which follows the 'top' backtrack. */
259   jump_list *own_backtracks;
260   /* Opcode pointer. */
261   PCRE2_SPTR cc;
262 } backtrack_common;
263 
264 typedef struct assert_backtrack {
265   backtrack_common common;
266   jump_list *condfailed;
267   /* Less than 0 if a frame is not needed. */
268   int framesize;
269   /* Points to our private memory word on the stack. */
270   int private_data_ptr;
271   /* For iterators. */
272   struct sljit_label *matchingpath;
273 } assert_backtrack;
274 
275 typedef struct bracket_backtrack {
276   backtrack_common common;
277   /* Where to coninue if an alternative is successfully matched. */
278   struct sljit_label *alternative_matchingpath;
279   /* For rmin and rmax iterators. */
280   struct sljit_label *recursive_matchingpath;
281   /* For greedy ? operator. */
282   struct sljit_label *zero_matchingpath;
283   /* Contains the branches of a failed condition. */
284   union {
285     /* Both for OP_COND, OP_SCOND. */
286     jump_list *condfailed;
287     assert_backtrack *assert;
288     /* For OP_ONCE. Less than 0 if not needed. */
289     int framesize;
290     /* For brackets with >3 alternatives. */
291     struct sljit_jump *matching_mov_addr;
292   } u;
293   /* Points to our private memory word on the stack. */
294   int private_data_ptr;
295 } bracket_backtrack;
296 
297 typedef struct bracketpos_backtrack {
298   backtrack_common common;
299   /* Points to our private memory word on the stack. */
300   int private_data_ptr;
301   /* Reverting stack is needed. */
302   int framesize;
303   /* Allocated stack size. */
304   int stacksize;
305 } bracketpos_backtrack;
306 
307 typedef struct braminzero_backtrack {
308   backtrack_common common;
309   struct sljit_label *matchingpath;
310 } braminzero_backtrack;
311 
312 typedef struct char_iterator_backtrack {
313   backtrack_common common;
314   /* Next iteration. */
315   struct sljit_label *matchingpath;
316   union {
317     jump_list *backtracks;
318     struct {
319       unsigned int othercasebit;
320       PCRE2_UCHAR chr;
321       BOOL enabled;
322     } charpos;
323   } u;
324 } char_iterator_backtrack;
325 
326 typedef struct ref_iterator_backtrack {
327   backtrack_common common;
328   /* Next iteration. */
329   struct sljit_label *matchingpath;
330 } ref_iterator_backtrack;
331 
332 typedef struct recurse_entry {
333   struct recurse_entry *next;
334   /* Contains the function entry label. */
335   struct sljit_label *entry_label;
336   /* Contains the function entry label. */
337   struct sljit_label *backtrack_label;
338   /* Collects the entry calls until the function is not created. */
339   jump_list *entry_calls;
340   /* Collects the backtrack calls until the function is not created. */
341   jump_list *backtrack_calls;
342   /* Points to the starting opcode. */
343   sljit_sw start;
344 } recurse_entry;
345 
346 typedef struct recurse_backtrack {
347   backtrack_common common;
348   /* Return to the matching path. */
349   struct sljit_label *matchingpath;
350   /* Recursive pattern. */
351   recurse_entry *entry;
352   /* Pattern is inlined. */
353   BOOL inlined_pattern;
354 } recurse_backtrack;
355 
356 typedef struct vreverse_backtrack {
357   backtrack_common common;
358   /* Return to the matching path. */
359   struct sljit_label *matchingpath;
360 } vreverse_backtrack;
361 
362 #define OP_THEN_TRAP OP_TABLE_LENGTH
363 
364 typedef struct then_trap_backtrack {
365   backtrack_common common;
366   /* If then_trap is not NULL, this structure contains the real
367   then_trap for the backtracking path. */
368   struct then_trap_backtrack *then_trap;
369   /* Points to the starting opcode. */
370   sljit_sw start;
371   /* Exit point for the then opcodes of this alternative. */
372   jump_list *quit;
373   /* Frame size of the current alternative. */
374   int framesize;
375 } then_trap_backtrack;
376 
377 #define MAX_N_CHARS 12
378 #define MAX_DIFF_CHARS 5
379 
380 typedef struct fast_forward_char_data {
381   /* Number of characters in the chars array, 255 for any character. */
382   sljit_u8 count;
383   /* Number of last UTF-8 characters in the chars array. */
384   sljit_u8 last_count;
385   /* Available characters in the current position. */
386   PCRE2_UCHAR chars[MAX_DIFF_CHARS];
387 } fast_forward_char_data;
388 
389 #define MAX_CLASS_RANGE_SIZE 4
390 #define MAX_CLASS_CHARS_SIZE 3
391 
392 typedef struct compiler_common {
393   /* The sljit ceneric compiler. */
394   struct sljit_compiler *compiler;
395   /* Compiled regular expression. */
396   pcre2_real_code *re;
397   /* First byte code. */
398   PCRE2_SPTR start;
399   /* Maps private data offset to each opcode. */
400   sljit_s32 *private_data_ptrs;
401   /* Chain list of read-only data ptrs. */
402   void *read_only_data_head;
403   /* Tells whether the capturing bracket is optimized. */
404   sljit_u8 *optimized_cbracket;
405   /* Tells whether the starting offset is a target of then. */
406   sljit_u8 *then_offsets;
407   /* Current position where a THEN must jump. */
408   then_trap_backtrack *then_trap;
409   /* Starting offset of private data for capturing brackets. */
410   sljit_s32 cbra_ptr;
411   /* Output vector starting point. Must be divisible by 2. */
412   sljit_s32 ovector_start;
413   /* Points to the starting character of the current match. */
414   sljit_s32 start_ptr;
415   /* Last known position of the requested byte. */
416   sljit_s32 req_char_ptr;
417   /* Head of the last recursion. */
418   sljit_s32 recursive_head_ptr;
419   /* First inspected character for partial matching.
420      (Needed for avoiding zero length partial matches.) */
421   sljit_s32 start_used_ptr;
422   /* Starting pointer for partial soft matches. */
423   sljit_s32 hit_start;
424   /* Pointer of the match end position. */
425   sljit_s32 match_end_ptr;
426   /* Points to the marked string. */
427   sljit_s32 mark_ptr;
428   /* Head of the recursive control verb management chain.
429      Each item must have a previous offset and type
430      (see control_types) values. See do_search_mark. */
431   sljit_s32 control_head_ptr;
432   /* Points to the last matched capture block index. */
433   sljit_s32 capture_last_ptr;
434   /* Fast forward skipping byte code pointer. */
435   PCRE2_SPTR fast_forward_bc_ptr;
436   /* Locals used by fast fail optimization. */
437   sljit_s32 early_fail_start_ptr;
438   sljit_s32 early_fail_end_ptr;
439   /* Variables used by recursive call generator. */
440   sljit_s32 recurse_bitset_size;
441   uint8_t *recurse_bitset;
442 
443   /* Flipped and lower case tables. */
444   const sljit_u8 *fcc;
445   sljit_sw lcc;
446   /* Mode can be PCRE2_JIT_COMPLETE and others. */
447   int mode;
448   /* TRUE, when empty match is accepted for partial matching. */
449   BOOL allow_empty_partial;
450   /* TRUE, when minlength is greater than 0. */
451   BOOL might_be_empty;
452   /* \K is found in the pattern. */
453   BOOL has_set_som;
454   /* (*SKIP:arg) is found in the pattern. */
455   BOOL has_skip_arg;
456   /* (*THEN) is found in the pattern. */
457   BOOL has_then;
458   /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
459   BOOL has_skip_in_assert_back;
460   /* Quit is redirected by recurse, negative assertion, or positive assertion in conditional block. */
461   BOOL local_quit_available;
462   /* Currently in a positive assertion. */
463   BOOL in_positive_assertion;
464   /* Newline control. */
465   int nltype;
466   sljit_u32 nlmax;
467   sljit_u32 nlmin;
468   int newline;
469   int bsr_nltype;
470   sljit_u32 bsr_nlmax;
471   sljit_u32 bsr_nlmin;
472   /* Dollar endonly. */
473   int endonly;
474   /* Tables. */
475   sljit_sw ctypes;
476   /* Named capturing brackets. */
477   PCRE2_SPTR name_table;
478   sljit_sw name_count;
479   sljit_sw name_entry_size;
480 
481   /* Labels and jump lists. */
482   struct sljit_label *partialmatchlabel;
483   struct sljit_label *quit_label;
484   struct sljit_label *abort_label;
485   struct sljit_label *accept_label;
486   struct sljit_label *ff_newline_shortcut;
487   stub_list *stubs;
488   recurse_entry *entries;
489   recurse_entry *currententry;
490   jump_list *partialmatch;
491   jump_list *quit;
492   jump_list *positive_assertion_quit;
493   jump_list *abort;
494   jump_list *failed_match;
495   jump_list *accept;
496   jump_list *calllimit;
497   jump_list *stackalloc;
498   jump_list *revertframes;
499   jump_list *wordboundary;
500   jump_list *ucp_wordboundary;
501   jump_list *anynewline;
502   jump_list *hspace;
503   jump_list *vspace;
504   jump_list *casefulcmp;
505   jump_list *caselesscmp;
506   jump_list *reset_match;
507   /* Same as reset_match, but resets the STR_PTR as well. */
508   jump_list *restart_match;
509   BOOL unset_backref;
510   BOOL alt_circumflex;
511 #ifdef SUPPORT_UNICODE
512   BOOL utf;
513   BOOL invalid_utf;
514   BOOL ucp;
515   /* Points to saving area for iref. */
516   sljit_s32 iref_ptr;
517   jump_list *getucd;
518   jump_list *getucdtype;
519 #if PCRE2_CODE_UNIT_WIDTH == 8
520   jump_list *utfreadchar;
521   jump_list *utfreadtype8;
522   jump_list *utfpeakcharback;
523 #endif
524 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
525   jump_list *utfreadchar_invalid;
526   jump_list *utfreadnewline_invalid;
527   jump_list *utfmoveback_invalid;
528   jump_list *utfpeakcharback_invalid;
529 #endif
530 #endif /* SUPPORT_UNICODE */
531 } compiler_common;
532 
533 /* For byte_sequence_compare. */
534 
535 typedef struct compare_context {
536   int length;
537   int sourcereg;
538 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
539   int ucharptr;
540   union {
541     sljit_s32 asint;
542     sljit_u16 asushort;
543 #if PCRE2_CODE_UNIT_WIDTH == 8
544     sljit_u8 asbyte;
545     sljit_u8 asuchars[4];
546 #elif PCRE2_CODE_UNIT_WIDTH == 16
547     sljit_u16 asuchars[2];
548 #elif PCRE2_CODE_UNIT_WIDTH == 32
549     sljit_u32 asuchars[1];
550 #endif
551   } c;
552   union {
553     sljit_s32 asint;
554     sljit_u16 asushort;
555 #if PCRE2_CODE_UNIT_WIDTH == 8
556     sljit_u8 asbyte;
557     sljit_u8 asuchars[4];
558 #elif PCRE2_CODE_UNIT_WIDTH == 16
559     sljit_u16 asuchars[2];
560 #elif PCRE2_CODE_UNIT_WIDTH == 32
561     sljit_u32 asuchars[1];
562 #endif
563   } oc;
564 #endif
565 } compare_context;
566 
567 /* Undefine sljit macros. */
568 #undef CMP
569 
570 /* Used for accessing the elements of the stack. */
571 #define STACK(i)      ((i) * SSIZE_OF(sw))
572 
573 #ifdef SLJIT_PREF_SHIFT_REG
574 #if SLJIT_PREF_SHIFT_REG == SLJIT_R2
575 /* Nothing. */
576 #elif SLJIT_PREF_SHIFT_REG == SLJIT_R3
577 #define SHIFT_REG_IS_R3
578 #else
579 #error "Unsupported shift register"
580 #endif
581 #endif
582 
583 #define TMP1          SLJIT_R0
584 #ifdef SHIFT_REG_IS_R3
585 #define TMP2          SLJIT_R3
586 #define TMP3          SLJIT_R2
587 #else
588 #define TMP2          SLJIT_R2
589 #define TMP3          SLJIT_R3
590 #endif
591 #define STR_PTR       SLJIT_R1
592 #define STR_END       SLJIT_S0
593 #define STACK_TOP     SLJIT_S1
594 #define STACK_LIMIT   SLJIT_S2
595 #define COUNT_MATCH   SLJIT_S3
596 #define ARGUMENTS     SLJIT_S4
597 #define RETURN_ADDR   SLJIT_R4
598 
599 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
600 #define HAS_VIRTUAL_REGISTERS 1
601 #else
602 #define HAS_VIRTUAL_REGISTERS 0
603 #endif
604 
605 /* Local space layout. */
606 /* These two locals can be used by the current opcode. */
607 #define LOCALS0          (0 * sizeof(sljit_sw))
608 #define LOCALS1          (1 * sizeof(sljit_sw))
609 /* Two local variables for possessive quantifiers (char1 cannot use them). */
610 #define POSSESSIVE0      (2 * sizeof(sljit_sw))
611 #define POSSESSIVE1      (3 * sizeof(sljit_sw))
612 /* Max limit of recursions. */
613 #define LIMIT_MATCH      (4 * sizeof(sljit_sw))
614 /* The output vector is stored on the stack, and contains pointers
615 to characters. The vector data is divided into two groups: the first
616 group contains the start / end character pointers, and the second is
617 the start pointers when the end of the capturing group has not yet reached. */
618 #define OVECTOR_START    (common->ovector_start)
619 #define OVECTOR(i)       (OVECTOR_START + (i) * SSIZE_OF(sw))
620 #define OVECTOR_PRIV(i)  (common->cbra_ptr + (i) * SSIZE_OF(sw))
621 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
622 
623 #if PCRE2_CODE_UNIT_WIDTH == 8
624 #define MOV_UCHAR  SLJIT_MOV_U8
625 #define IN_UCHARS(x) (x)
626 #elif PCRE2_CODE_UNIT_WIDTH == 16
627 #define MOV_UCHAR  SLJIT_MOV_U16
628 #define UCHAR_SHIFT (1)
629 #define IN_UCHARS(x) ((x) * 2)
630 #elif PCRE2_CODE_UNIT_WIDTH == 32
631 #define MOV_UCHAR  SLJIT_MOV_U32
632 #define UCHAR_SHIFT (2)
633 #define IN_UCHARS(x) ((x) * 4)
634 #else
635 #error Unsupported compiling mode
636 #endif
637 
638 /* Shortcuts. */
639 #define DEFINE_COMPILER \
640   struct sljit_compiler *compiler = common->compiler
641 #define OP1(op, dst, dstw, src, srcw) \
642   sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
643 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
644   sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
645 #define OP2U(op, src1, src1w, src2, src2w) \
646   sljit_emit_op2u(compiler, (op), (src1), (src1w), (src2), (src2w))
647 #define OP_SRC(op, src, srcw) \
648   sljit_emit_op_src(compiler, (op), (src), (srcw))
649 #define LABEL() \
650   sljit_emit_label(compiler)
651 #define JUMP(type) \
652   sljit_emit_jump(compiler, (type))
653 #define JUMPTO(type, label) \
654   sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
655 #define JUMPHERE(jump) \
656   sljit_set_label((jump), sljit_emit_label(compiler))
657 #define SET_LABEL(jump, label) \
658   sljit_set_label((jump), (label))
659 #define CMP(type, src1, src1w, src2, src2w) \
660   sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
661 #define CMPTO(type, src1, src1w, src2, src2w, label) \
662   sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
663 #define OP_FLAGS(op, dst, dstw, type) \
664   sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
665 #define SELECT(type, dst_reg, src1, src1w, src2_reg) \
666   sljit_emit_select(compiler, (type), (dst_reg), (src1), (src1w), (src2_reg))
667 #define GET_LOCAL_BASE(dst, dstw, offset) \
668   sljit_get_local_base(compiler, (dst), (dstw), (offset))
669 
670 #define READ_CHAR_MAX 0x7fffffff
671 
672 #define INVALID_UTF_CHAR -1
673 #define UNASSIGNED_UTF_CHAR 888
674 
675 #if defined SUPPORT_UNICODE
676 #if PCRE2_CODE_UNIT_WIDTH == 8
677 
678 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
679   { \
680   if (ptr[0] <= 0x7f) \
681     c = *ptr++; \
682   else if (ptr + 1 < end && ptr[1] >= 0x80 && ptr[1] < 0xc0) \
683     { \
684     c = ptr[1] - 0x80; \
685     \
686     if (ptr[0] >= 0xc2 && ptr[0] <= 0xdf) \
687       { \
688       c |= (ptr[0] - 0xc0) << 6; \
689       ptr += 2; \
690       } \
691     else if (ptr + 2 < end && ptr[2] >= 0x80 && ptr[2] < 0xc0) \
692       { \
693       c = c << 6 | (ptr[2] - 0x80); \
694       \
695       if (ptr[0] >= 0xe0 && ptr[0] <= 0xef) \
696         { \
697         c |= (ptr[0] - 0xe0) << 12; \
698         ptr += 3; \
699         \
700         if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
701           { \
702           invalid_action; \
703           } \
704         } \
705       else if (ptr + 3 < end && ptr[3] >= 0x80 && ptr[3] < 0xc0) \
706         { \
707         c = c << 6 | (ptr[3] - 0x80); \
708         \
709         if (ptr[0] >= 0xf0 && ptr[0] <= 0xf4) \
710           { \
711           c |= (ptr[0] - 0xf0) << 18; \
712           ptr += 4; \
713           \
714           if (c >= 0x110000 || c < 0x10000) \
715             { \
716             invalid_action; \
717             } \
718           } \
719         else \
720           { \
721           invalid_action; \
722           } \
723         } \
724       else \
725         { \
726         invalid_action; \
727         } \
728       } \
729     else \
730       { \
731       invalid_action; \
732       } \
733     } \
734   else \
735     { \
736     invalid_action; \
737     } \
738   }
739 
740 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
741   { \
742   c = ptr[-1]; \
743   if (c <= 0x7f) \
744     ptr--; \
745   else if (ptr - 1 > start && ptr[-1] >= 0x80 && ptr[-1] < 0xc0) \
746     { \
747     c -= 0x80; \
748     \
749     if (ptr[-2] >= 0xc2 && ptr[-2] <= 0xdf) \
750       { \
751       c |= (ptr[-2] - 0xc0) << 6; \
752       ptr -= 2; \
753       } \
754     else if (ptr - 2 > start && ptr[-2] >= 0x80 && ptr[-2] < 0xc0) \
755       { \
756       c = c << 6 | (ptr[-2] - 0x80); \
757       \
758       if (ptr[-3] >= 0xe0 && ptr[-3] <= 0xef) \
759         { \
760         c |= (ptr[-3] - 0xe0) << 12; \
761         ptr -= 3; \
762         \
763         if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
764           { \
765           invalid_action; \
766           } \
767         } \
768       else if (ptr - 3 > start && ptr[-3] >= 0x80 && ptr[-3] < 0xc0) \
769         { \
770         c = c << 6 | (ptr[-3] - 0x80); \
771         \
772         if (ptr[-4] >= 0xf0 && ptr[-4] <= 0xf4) \
773           { \
774           c |= (ptr[-4] - 0xf0) << 18; \
775           ptr -= 4; \
776           \
777           if (c >= 0x110000 || c < 0x10000) \
778             { \
779             invalid_action; \
780             } \
781           } \
782         else \
783           { \
784           invalid_action; \
785           } \
786         } \
787       else \
788         { \
789         invalid_action; \
790         } \
791       } \
792     else \
793       { \
794       invalid_action; \
795       } \
796     } \
797   else \
798     { \
799     invalid_action; \
800     } \
801   }
802 
803 #elif PCRE2_CODE_UNIT_WIDTH == 16
804 
805 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
806   { \
807   if (ptr[0] < 0xd800 || ptr[0] >= 0xe000) \
808     c = *ptr++; \
809   else if (ptr[0] < 0xdc00 && ptr + 1 < end && ptr[1] >= 0xdc00 && ptr[1] < 0xe000) \
810     { \
811     c = (((ptr[0] - 0xd800) << 10) | (ptr[1] - 0xdc00)) + 0x10000; \
812     ptr += 2; \
813     } \
814   else \
815     { \
816     invalid_action; \
817     } \
818   }
819 
820 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
821   { \
822   c = ptr[-1]; \
823   if (c < 0xd800 || c >= 0xe000) \
824     ptr--; \
825   else if (c >= 0xdc00 && ptr - 1 > start && ptr[-2] >= 0xd800 && ptr[-2] < 0xdc00) \
826     { \
827     c = (((ptr[-2] - 0xd800) << 10) | (c - 0xdc00)) + 0x10000; \
828     ptr -= 2; \
829     } \
830   else \
831     { \
832     invalid_action; \
833     } \
834   }
835 
836 
837 #elif PCRE2_CODE_UNIT_WIDTH == 32
838 
839 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
840   { \
841   if (ptr[0] < 0xd800 || (ptr[0] >= 0xe000 && ptr[0] < 0x110000)) \
842     c = *ptr++; \
843   else \
844     { \
845     invalid_action; \
846     } \
847   }
848 
849 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
850   { \
851   c = ptr[-1]; \
852   if (ptr[-1] < 0xd800 || (ptr[-1] >= 0xe000 && ptr[-1] < 0x110000)) \
853     ptr--; \
854   else \
855     { \
856     invalid_action; \
857     } \
858   }
859 
860 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
861 #endif /* SUPPORT_UNICODE */
862 
bracketend(PCRE2_SPTR cc)863 static PCRE2_SPTR bracketend(PCRE2_SPTR cc)
864 {
865 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
866 do cc += GET(cc, 1); while (*cc == OP_ALT);
867 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
868 cc += 1 + LINK_SIZE;
869 return cc;
870 }
871 
no_alternatives(PCRE2_SPTR cc)872 static int no_alternatives(PCRE2_SPTR cc)
873 {
874 int count = 0;
875 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
876 do
877   {
878   cc += GET(cc, 1);
879   count++;
880   }
881 while (*cc == OP_ALT);
882 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
883 return count;
884 }
885 
find_vreverse(PCRE2_SPTR cc)886 static BOOL find_vreverse(PCRE2_SPTR cc)
887 {
888   SLJIT_ASSERT(*cc == OP_ASSERTBACK || *cc == OP_ASSERTBACK_NOT ||  *cc == OP_ASSERTBACK_NA);
889 
890   do
891     {
892     if (cc[1 + LINK_SIZE] == OP_VREVERSE)
893       return TRUE;
894     cc += GET(cc, 1);
895     }
896   while (*cc == OP_ALT);
897 
898   return FALSE;
899 }
900 
901 /* Functions whose might need modification for all new supported opcodes:
902  next_opcode
903  check_opcode_types
904  set_private_data_ptrs
905  get_framesize
906  init_frame
907  get_recurse_data_length
908  copy_recurse_data
909  compile_matchingpath
910  compile_backtrackingpath
911 */
912 
next_opcode(compiler_common * common,PCRE2_SPTR cc)913 static PCRE2_SPTR next_opcode(compiler_common *common, PCRE2_SPTR cc)
914 {
915 SLJIT_UNUSED_ARG(common);
916 switch(*cc)
917   {
918   case OP_SOD:
919   case OP_SOM:
920   case OP_SET_SOM:
921   case OP_NOT_WORD_BOUNDARY:
922   case OP_WORD_BOUNDARY:
923   case OP_NOT_DIGIT:
924   case OP_DIGIT:
925   case OP_NOT_WHITESPACE:
926   case OP_WHITESPACE:
927   case OP_NOT_WORDCHAR:
928   case OP_WORDCHAR:
929   case OP_ANY:
930   case OP_ALLANY:
931   case OP_NOTPROP:
932   case OP_PROP:
933   case OP_ANYNL:
934   case OP_NOT_HSPACE:
935   case OP_HSPACE:
936   case OP_NOT_VSPACE:
937   case OP_VSPACE:
938   case OP_EXTUNI:
939   case OP_EODN:
940   case OP_EOD:
941   case OP_CIRC:
942   case OP_CIRCM:
943   case OP_DOLL:
944   case OP_DOLLM:
945   case OP_CRSTAR:
946   case OP_CRMINSTAR:
947   case OP_CRPLUS:
948   case OP_CRMINPLUS:
949   case OP_CRQUERY:
950   case OP_CRMINQUERY:
951   case OP_CRRANGE:
952   case OP_CRMINRANGE:
953   case OP_CRPOSSTAR:
954   case OP_CRPOSPLUS:
955   case OP_CRPOSQUERY:
956   case OP_CRPOSRANGE:
957   case OP_CLASS:
958   case OP_NCLASS:
959   case OP_REF:
960   case OP_REFI:
961   case OP_DNREF:
962   case OP_DNREFI:
963   case OP_RECURSE:
964   case OP_CALLOUT:
965   case OP_ALT:
966   case OP_KET:
967   case OP_KETRMAX:
968   case OP_KETRMIN:
969   case OP_KETRPOS:
970   case OP_REVERSE:
971   case OP_VREVERSE:
972   case OP_ASSERT:
973   case OP_ASSERT_NOT:
974   case OP_ASSERTBACK:
975   case OP_ASSERTBACK_NOT:
976   case OP_ASSERT_NA:
977   case OP_ASSERTBACK_NA:
978   case OP_ONCE:
979   case OP_SCRIPT_RUN:
980   case OP_BRA:
981   case OP_BRAPOS:
982   case OP_CBRA:
983   case OP_CBRAPOS:
984   case OP_COND:
985   case OP_SBRA:
986   case OP_SBRAPOS:
987   case OP_SCBRA:
988   case OP_SCBRAPOS:
989   case OP_SCOND:
990   case OP_CREF:
991   case OP_DNCREF:
992   case OP_RREF:
993   case OP_DNRREF:
994   case OP_FALSE:
995   case OP_TRUE:
996   case OP_BRAZERO:
997   case OP_BRAMINZERO:
998   case OP_BRAPOSZERO:
999   case OP_PRUNE:
1000   case OP_SKIP:
1001   case OP_THEN:
1002   case OP_COMMIT:
1003   case OP_FAIL:
1004   case OP_ACCEPT:
1005   case OP_ASSERT_ACCEPT:
1006   case OP_CLOSE:
1007   case OP_SKIPZERO:
1008   case OP_NOT_UCP_WORD_BOUNDARY:
1009   case OP_UCP_WORD_BOUNDARY:
1010   return cc + PRIV(OP_lengths)[*cc];
1011 
1012   case OP_CHAR:
1013   case OP_CHARI:
1014   case OP_NOT:
1015   case OP_NOTI:
1016   case OP_STAR:
1017   case OP_MINSTAR:
1018   case OP_PLUS:
1019   case OP_MINPLUS:
1020   case OP_QUERY:
1021   case OP_MINQUERY:
1022   case OP_UPTO:
1023   case OP_MINUPTO:
1024   case OP_EXACT:
1025   case OP_POSSTAR:
1026   case OP_POSPLUS:
1027   case OP_POSQUERY:
1028   case OP_POSUPTO:
1029   case OP_STARI:
1030   case OP_MINSTARI:
1031   case OP_PLUSI:
1032   case OP_MINPLUSI:
1033   case OP_QUERYI:
1034   case OP_MINQUERYI:
1035   case OP_UPTOI:
1036   case OP_MINUPTOI:
1037   case OP_EXACTI:
1038   case OP_POSSTARI:
1039   case OP_POSPLUSI:
1040   case OP_POSQUERYI:
1041   case OP_POSUPTOI:
1042   case OP_NOTSTAR:
1043   case OP_NOTMINSTAR:
1044   case OP_NOTPLUS:
1045   case OP_NOTMINPLUS:
1046   case OP_NOTQUERY:
1047   case OP_NOTMINQUERY:
1048   case OP_NOTUPTO:
1049   case OP_NOTMINUPTO:
1050   case OP_NOTEXACT:
1051   case OP_NOTPOSSTAR:
1052   case OP_NOTPOSPLUS:
1053   case OP_NOTPOSQUERY:
1054   case OP_NOTPOSUPTO:
1055   case OP_NOTSTARI:
1056   case OP_NOTMINSTARI:
1057   case OP_NOTPLUSI:
1058   case OP_NOTMINPLUSI:
1059   case OP_NOTQUERYI:
1060   case OP_NOTMINQUERYI:
1061   case OP_NOTUPTOI:
1062   case OP_NOTMINUPTOI:
1063   case OP_NOTEXACTI:
1064   case OP_NOTPOSSTARI:
1065   case OP_NOTPOSPLUSI:
1066   case OP_NOTPOSQUERYI:
1067   case OP_NOTPOSUPTOI:
1068   cc += PRIV(OP_lengths)[*cc];
1069 #ifdef SUPPORT_UNICODE
1070   if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1071 #endif
1072   return cc;
1073 
1074   /* Special cases. */
1075   case OP_TYPESTAR:
1076   case OP_TYPEMINSTAR:
1077   case OP_TYPEPLUS:
1078   case OP_TYPEMINPLUS:
1079   case OP_TYPEQUERY:
1080   case OP_TYPEMINQUERY:
1081   case OP_TYPEUPTO:
1082   case OP_TYPEMINUPTO:
1083   case OP_TYPEEXACT:
1084   case OP_TYPEPOSSTAR:
1085   case OP_TYPEPOSPLUS:
1086   case OP_TYPEPOSQUERY:
1087   case OP_TYPEPOSUPTO:
1088   return cc + PRIV(OP_lengths)[*cc] - 1;
1089 
1090   case OP_ANYBYTE:
1091 #ifdef SUPPORT_UNICODE
1092   if (common->utf) return NULL;
1093 #endif
1094   return cc + 1;
1095 
1096   case OP_CALLOUT_STR:
1097   return cc + GET(cc, 1 + 2*LINK_SIZE);
1098 
1099 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1100   case OP_XCLASS:
1101   return cc + GET(cc, 1);
1102 #endif
1103 
1104   case OP_MARK:
1105   case OP_COMMIT_ARG:
1106   case OP_PRUNE_ARG:
1107   case OP_SKIP_ARG:
1108   case OP_THEN_ARG:
1109   return cc + 1 + 2 + cc[1];
1110 
1111   default:
1112   SLJIT_UNREACHABLE();
1113   return NULL;
1114   }
1115 }
1116 
check_opcode_types(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend)1117 static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend)
1118 {
1119 int count;
1120 PCRE2_SPTR slot;
1121 PCRE2_SPTR assert_back_end = cc - 1;
1122 PCRE2_SPTR assert_na_end = cc - 1;
1123 
1124 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
1125 while (cc < ccend)
1126   {
1127   switch(*cc)
1128     {
1129     case OP_SET_SOM:
1130     common->has_set_som = TRUE;
1131     common->might_be_empty = TRUE;
1132     cc += 1;
1133     break;
1134 
1135     case OP_REFI:
1136 #ifdef SUPPORT_UNICODE
1137     if (common->iref_ptr == 0)
1138       {
1139       common->iref_ptr = common->ovector_start;
1140       common->ovector_start += 3 * sizeof(sljit_sw);
1141       }
1142 #endif /* SUPPORT_UNICODE */
1143     /* Fall through. */
1144     case OP_REF:
1145     common->optimized_cbracket[GET2(cc, 1)] = 0;
1146     cc += 1 + IMM2_SIZE;
1147     break;
1148 
1149     case OP_ASSERT_NA:
1150     case OP_ASSERTBACK_NA:
1151     slot = bracketend(cc);
1152     if (slot > assert_na_end)
1153       assert_na_end = slot;
1154     cc += 1 + LINK_SIZE;
1155     break;
1156 
1157     case OP_CBRAPOS:
1158     case OP_SCBRAPOS:
1159     common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
1160     cc += 1 + LINK_SIZE + IMM2_SIZE;
1161     break;
1162 
1163     case OP_COND:
1164     case OP_SCOND:
1165     /* Only AUTO_CALLOUT can insert this opcode. We do
1166        not intend to support this case. */
1167     if (cc[1 + LINK_SIZE] == OP_CALLOUT || cc[1 + LINK_SIZE] == OP_CALLOUT_STR)
1168       return FALSE;
1169     cc += 1 + LINK_SIZE;
1170     break;
1171 
1172     case OP_CREF:
1173     common->optimized_cbracket[GET2(cc, 1)] = 0;
1174     cc += 1 + IMM2_SIZE;
1175     break;
1176 
1177     case OP_DNREF:
1178     case OP_DNREFI:
1179     case OP_DNCREF:
1180     count = GET2(cc, 1 + IMM2_SIZE);
1181     slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
1182     while (count-- > 0)
1183       {
1184       common->optimized_cbracket[GET2(slot, 0)] = 0;
1185       slot += common->name_entry_size;
1186       }
1187     cc += 1 + 2 * IMM2_SIZE;
1188     break;
1189 
1190     case OP_RECURSE:
1191     /* Set its value only once. */
1192     if (common->recursive_head_ptr == 0)
1193       {
1194       common->recursive_head_ptr = common->ovector_start;
1195       common->ovector_start += sizeof(sljit_sw);
1196       }
1197     cc += 1 + LINK_SIZE;
1198     break;
1199 
1200     case OP_CALLOUT:
1201     case OP_CALLOUT_STR:
1202     if (common->capture_last_ptr == 0)
1203       {
1204       common->capture_last_ptr = common->ovector_start;
1205       common->ovector_start += sizeof(sljit_sw);
1206       }
1207     cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE);
1208     break;
1209 
1210     case OP_ASSERTBACK:
1211     slot = bracketend(cc);
1212     if (slot > assert_back_end)
1213       assert_back_end = slot;
1214     cc += 1 + LINK_SIZE;
1215     break;
1216 
1217     case OP_THEN_ARG:
1218     common->has_then = TRUE;
1219     common->control_head_ptr = 1;
1220     /* Fall through. */
1221 
1222     case OP_COMMIT_ARG:
1223     case OP_PRUNE_ARG:
1224     if (cc < assert_na_end)
1225       return FALSE;
1226     /* Fall through */
1227     case OP_MARK:
1228     if (common->mark_ptr == 0)
1229       {
1230       common->mark_ptr = common->ovector_start;
1231       common->ovector_start += sizeof(sljit_sw);
1232       }
1233     cc += 1 + 2 + cc[1];
1234     break;
1235 
1236     case OP_THEN:
1237     common->has_then = TRUE;
1238     common->control_head_ptr = 1;
1239     cc += 1;
1240     break;
1241 
1242     case OP_SKIP:
1243     if (cc < assert_back_end)
1244       common->has_skip_in_assert_back = TRUE;
1245     if (cc < assert_na_end)
1246       return FALSE;
1247     cc += 1;
1248     break;
1249 
1250     case OP_SKIP_ARG:
1251     common->control_head_ptr = 1;
1252     common->has_skip_arg = TRUE;
1253     if (cc < assert_back_end)
1254       common->has_skip_in_assert_back = TRUE;
1255     if (cc < assert_na_end)
1256       return FALSE;
1257     cc += 1 + 2 + cc[1];
1258     break;
1259 
1260     case OP_PRUNE:
1261     case OP_COMMIT:
1262     case OP_ASSERT_ACCEPT:
1263     if (cc < assert_na_end)
1264       return FALSE;
1265     cc++;
1266     break;
1267 
1268     default:
1269     cc = next_opcode(common, cc);
1270     if (cc == NULL)
1271       return FALSE;
1272     break;
1273     }
1274   }
1275 return TRUE;
1276 }
1277 
1278 #define EARLY_FAIL_ENHANCE_MAX (3 + 3)
1279 
1280 /*
1281   Start represent the number of allowed early fail enhancements
1282 
1283   The 0-2 values has a special meaning:
1284     0 - skip is allowed for all iterators
1285     1 - fail is allowed for all iterators
1286     2 - fail is allowed for greedy iterators
1287     3 - only ranged early fail is allowed
1288   >3 - (start - 3) number of remaining ranged early fails allowed
1289 
1290 return: the updated value of start
1291 */
detect_early_fail(compiler_common * common,PCRE2_SPTR cc,int * private_data_start,sljit_s32 depth,int start)1292 static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc,
1293    int *private_data_start, sljit_s32 depth, int start)
1294 {
1295 PCRE2_SPTR begin = cc;
1296 PCRE2_SPTR next_alt;
1297 PCRE2_SPTR end;
1298 PCRE2_SPTR accelerated_start;
1299 int result = 0;
1300 int count, prev_count;
1301 
1302 SLJIT_ASSERT(*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA);
1303 SLJIT_ASSERT(*cc != OP_CBRA || common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] != 0);
1304 SLJIT_ASSERT(start < EARLY_FAIL_ENHANCE_MAX);
1305 
1306 next_alt = cc + GET(cc, 1);
1307 if (*next_alt == OP_ALT && start < 1)
1308   start = 1;
1309 
1310 do
1311   {
1312   count = start;
1313   cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1314 
1315   while (TRUE)
1316     {
1317     accelerated_start = NULL;
1318 
1319     switch(*cc)
1320       {
1321       case OP_SOD:
1322       case OP_SOM:
1323       case OP_SET_SOM:
1324       case OP_NOT_WORD_BOUNDARY:
1325       case OP_WORD_BOUNDARY:
1326       case OP_EODN:
1327       case OP_EOD:
1328       case OP_CIRC:
1329       case OP_CIRCM:
1330       case OP_DOLL:
1331       case OP_DOLLM:
1332       case OP_NOT_UCP_WORD_BOUNDARY:
1333       case OP_UCP_WORD_BOUNDARY:
1334       /* Zero width assertions. */
1335       cc++;
1336       continue;
1337 
1338       case OP_NOT_DIGIT:
1339       case OP_DIGIT:
1340       case OP_NOT_WHITESPACE:
1341       case OP_WHITESPACE:
1342       case OP_NOT_WORDCHAR:
1343       case OP_WORDCHAR:
1344       case OP_ANY:
1345       case OP_ALLANY:
1346       case OP_ANYBYTE:
1347       case OP_NOT_HSPACE:
1348       case OP_HSPACE:
1349       case OP_NOT_VSPACE:
1350       case OP_VSPACE:
1351       if (count < 1)
1352         count = 1;
1353       cc++;
1354       continue;
1355 
1356       case OP_ANYNL:
1357       case OP_EXTUNI:
1358       if (count < 3)
1359         count = 3;
1360       cc++;
1361       continue;
1362 
1363       case OP_NOTPROP:
1364       case OP_PROP:
1365       if (count < 1)
1366         count = 1;
1367       cc += 1 + 2;
1368       continue;
1369 
1370       case OP_CHAR:
1371       case OP_CHARI:
1372       case OP_NOT:
1373       case OP_NOTI:
1374       if (count < 1)
1375         count = 1;
1376       cc += 2;
1377 #ifdef SUPPORT_UNICODE
1378       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1379 #endif
1380       continue;
1381 
1382       case OP_TYPEMINSTAR:
1383       case OP_TYPEMINPLUS:
1384       if (count == 2)
1385         count = 3;
1386       /* Fall through */
1387 
1388       case OP_TYPESTAR:
1389       case OP_TYPEPLUS:
1390       case OP_TYPEPOSSTAR:
1391       case OP_TYPEPOSPLUS:
1392       /* The type or prop opcode is skipped in the next iteration. */
1393       cc += 1;
1394 
1395       if (cc[0] != OP_ANYNL && cc[0] != OP_EXTUNI)
1396         {
1397         accelerated_start = cc - 1;
1398         break;
1399         }
1400 
1401       if (count < 3)
1402         count = 3;
1403       continue;
1404 
1405       case OP_TYPEEXACT:
1406       if (count < 1)
1407         count = 1;
1408       cc += 1 + IMM2_SIZE;
1409       continue;
1410 
1411       case OP_TYPEUPTO:
1412       case OP_TYPEMINUPTO:
1413       case OP_TYPEPOSUPTO:
1414       cc += IMM2_SIZE;
1415       /* Fall through */
1416 
1417       case OP_TYPEQUERY:
1418       case OP_TYPEMINQUERY:
1419       case OP_TYPEPOSQUERY:
1420       /* The type or prop opcode is skipped in the next iteration. */
1421       if (count < 3)
1422         count = 3;
1423       cc += 1;
1424       continue;
1425 
1426       case OP_MINSTAR:
1427       case OP_MINPLUS:
1428       case OP_MINSTARI:
1429       case OP_MINPLUSI:
1430       case OP_NOTMINSTAR:
1431       case OP_NOTMINPLUS:
1432       case OP_NOTMINSTARI:
1433       case OP_NOTMINPLUSI:
1434       if (count == 2)
1435         count = 3;
1436       /* Fall through */
1437 
1438       case OP_STAR:
1439       case OP_PLUS:
1440       case OP_POSSTAR:
1441       case OP_POSPLUS:
1442 
1443       case OP_STARI:
1444       case OP_PLUSI:
1445       case OP_POSSTARI:
1446       case OP_POSPLUSI:
1447 
1448       case OP_NOTSTAR:
1449       case OP_NOTPLUS:
1450       case OP_NOTPOSSTAR:
1451       case OP_NOTPOSPLUS:
1452 
1453       case OP_NOTSTARI:
1454       case OP_NOTPLUSI:
1455       case OP_NOTPOSSTARI:
1456       case OP_NOTPOSPLUSI:
1457       accelerated_start = cc;
1458       cc += 2;
1459 #ifdef SUPPORT_UNICODE
1460       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1461 #endif
1462       break;
1463 
1464       case OP_EXACT:
1465       if (count < 1)
1466         count = 1;
1467       cc += 2 + IMM2_SIZE;
1468 #ifdef SUPPORT_UNICODE
1469       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1470 #endif
1471       continue;
1472 
1473       case OP_UPTO:
1474       case OP_MINUPTO:
1475       case OP_POSUPTO:
1476       case OP_UPTOI:
1477       case OP_MINUPTOI:
1478       case OP_EXACTI:
1479       case OP_POSUPTOI:
1480       case OP_NOTUPTO:
1481       case OP_NOTMINUPTO:
1482       case OP_NOTEXACT:
1483       case OP_NOTPOSUPTO:
1484       case OP_NOTUPTOI:
1485       case OP_NOTMINUPTOI:
1486       case OP_NOTEXACTI:
1487       case OP_NOTPOSUPTOI:
1488       cc += IMM2_SIZE;
1489       /* Fall through */
1490 
1491       case OP_QUERY:
1492       case OP_MINQUERY:
1493       case OP_POSQUERY:
1494       case OP_QUERYI:
1495       case OP_MINQUERYI:
1496       case OP_POSQUERYI:
1497       case OP_NOTQUERY:
1498       case OP_NOTMINQUERY:
1499       case OP_NOTPOSQUERY:
1500       case OP_NOTQUERYI:
1501       case OP_NOTMINQUERYI:
1502       case OP_NOTPOSQUERYI:
1503       if (count < 3)
1504         count = 3;
1505       cc += 2;
1506 #ifdef SUPPORT_UNICODE
1507       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1508 #endif
1509       continue;
1510 
1511       case OP_CLASS:
1512       case OP_NCLASS:
1513 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1514       case OP_XCLASS:
1515       accelerated_start = cc;
1516       cc += ((*cc == OP_XCLASS) ? GET(cc, 1) : (unsigned int)(1 + (32 / sizeof(PCRE2_UCHAR))));
1517 #else
1518       accelerated_start = cc;
1519       cc += (1 + (32 / sizeof(PCRE2_UCHAR)));
1520 #endif
1521 
1522       switch (*cc)
1523         {
1524         case OP_CRMINSTAR:
1525         case OP_CRMINPLUS:
1526         if (count == 2)
1527           count = 3;
1528         /* Fall through */
1529 
1530         case OP_CRSTAR:
1531         case OP_CRPLUS:
1532         case OP_CRPOSSTAR:
1533         case OP_CRPOSPLUS:
1534         cc++;
1535         break;
1536 
1537         case OP_CRRANGE:
1538         case OP_CRMINRANGE:
1539         case OP_CRPOSRANGE:
1540         if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
1541           {
1542           /* Exact repeat. */
1543           cc += 1 + 2 * IMM2_SIZE;
1544           if (count < 1)
1545             count = 1;
1546           continue;
1547           }
1548 
1549         cc += 2 * IMM2_SIZE;
1550         /* Fall through */
1551         case OP_CRQUERY:
1552         case OP_CRMINQUERY:
1553         case OP_CRPOSQUERY:
1554         cc++;
1555         if (count < 3)
1556           count = 3;
1557         continue;
1558 
1559         default:
1560         /* No repeat. */
1561         if (count < 1)
1562           count = 1;
1563         continue;
1564         }
1565       break;
1566 
1567       case OP_BRA:
1568       case OP_CBRA:
1569       prev_count = count;
1570       if (count < 1)
1571         count = 1;
1572 
1573       if (depth >= 4)
1574         break;
1575 
1576       if (count < 3 && cc[GET(cc, 1)] == OP_ALT)
1577         count = 3;
1578 
1579       end = bracketend(cc);
1580       if (end[-1 - LINK_SIZE] != OP_KET || (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0))
1581         break;
1582 
1583       prev_count = detect_early_fail(common, cc, private_data_start, depth + 1, prev_count);
1584 
1585       if (prev_count > count)
1586         count = prev_count;
1587 
1588       if (PRIVATE_DATA(cc) != 0)
1589         common->private_data_ptrs[begin - common->start] = 1;
1590 
1591       if (count < EARLY_FAIL_ENHANCE_MAX)
1592         {
1593         cc = end;
1594         continue;
1595         }
1596       break;
1597 
1598       case OP_KET:
1599       SLJIT_ASSERT(PRIVATE_DATA(cc) == 0);
1600       if (cc >= next_alt)
1601         break;
1602       cc += 1 + LINK_SIZE;
1603       continue;
1604       }
1605 
1606     if (accelerated_start == NULL)
1607       break;
1608 
1609     if (count == 0)
1610       {
1611       common->fast_forward_bc_ptr = accelerated_start;
1612       common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_skip;
1613       *private_data_start += sizeof(sljit_sw);
1614       count = 4;
1615       }
1616     else if (count < 3)
1617       {
1618       common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail;
1619 
1620       if (common->early_fail_start_ptr == 0)
1621         common->early_fail_start_ptr = *private_data_start;
1622 
1623       *private_data_start += sizeof(sljit_sw);
1624       common->early_fail_end_ptr = *private_data_start;
1625 
1626       if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1627         return EARLY_FAIL_ENHANCE_MAX;
1628 
1629       count = 4;
1630       }
1631     else
1632       {
1633       common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail_range;
1634 
1635       if (common->early_fail_start_ptr == 0)
1636         common->early_fail_start_ptr = *private_data_start;
1637 
1638       *private_data_start += 2 * sizeof(sljit_sw);
1639       common->early_fail_end_ptr = *private_data_start;
1640 
1641       if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1642         return EARLY_FAIL_ENHANCE_MAX;
1643 
1644       count++;
1645       }
1646 
1647     /* Cannot be part of a repeat. */
1648     common->private_data_ptrs[begin - common->start] = 1;
1649 
1650     if (count >= EARLY_FAIL_ENHANCE_MAX)
1651       break;
1652     }
1653 
1654   if (*cc != OP_ALT && *cc != OP_KET)
1655     result = EARLY_FAIL_ENHANCE_MAX;
1656   else if (result < count)
1657     result = count;
1658 
1659   cc = next_alt;
1660   next_alt = cc + GET(cc, 1);
1661   }
1662 while (*cc == OP_ALT);
1663 
1664 return result;
1665 }
1666 
get_class_iterator_size(PCRE2_SPTR cc)1667 static int get_class_iterator_size(PCRE2_SPTR cc)
1668 {
1669 sljit_u32 min;
1670 sljit_u32 max;
1671 switch(*cc)
1672   {
1673   case OP_CRSTAR:
1674   case OP_CRPLUS:
1675   return 2;
1676 
1677   case OP_CRMINSTAR:
1678   case OP_CRMINPLUS:
1679   case OP_CRQUERY:
1680   case OP_CRMINQUERY:
1681   return 1;
1682 
1683   case OP_CRRANGE:
1684   case OP_CRMINRANGE:
1685   min = GET2(cc, 1);
1686   max = GET2(cc, 1 + IMM2_SIZE);
1687   if (max == 0)
1688     return (*cc == OP_CRRANGE) ? 2 : 1;
1689   max -= min;
1690   if (max > 2)
1691     max = 2;
1692   return max;
1693 
1694   default:
1695   return 0;
1696   }
1697 }
1698 
detect_repeat(compiler_common * common,PCRE2_SPTR begin)1699 static BOOL detect_repeat(compiler_common *common, PCRE2_SPTR begin)
1700 {
1701 PCRE2_SPTR end = bracketend(begin);
1702 PCRE2_SPTR next;
1703 PCRE2_SPTR next_end;
1704 PCRE2_SPTR max_end;
1705 PCRE2_UCHAR type;
1706 sljit_sw length = end - begin;
1707 sljit_s32 min, max, i;
1708 
1709 /* Detect fixed iterations first. */
1710 if (end[-(1 + LINK_SIZE)] != OP_KET || PRIVATE_DATA(begin) != 0)
1711   return FALSE;
1712 
1713 /* /(?:AB){4,6}/ is currently converted to /(?:AB){3}(?AB){1,3}/
1714  * Skip the check of the second part. */
1715 if (PRIVATE_DATA(end - LINK_SIZE) != 0)
1716   return TRUE;
1717 
1718 next = end;
1719 min = 1;
1720 while (1)
1721   {
1722   if (*next != *begin)
1723     break;
1724   next_end = bracketend(next);
1725   if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1726     break;
1727   next = next_end;
1728   min++;
1729   }
1730 
1731 if (min == 2)
1732   return FALSE;
1733 
1734 max = 0;
1735 max_end = next;
1736 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1737   {
1738   type = *next;
1739   while (1)
1740     {
1741     if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1742       break;
1743     next_end = bracketend(next + 2 + LINK_SIZE);
1744     if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1745       break;
1746     next = next_end;
1747     max++;
1748     }
1749 
1750   if (next[0] == type && next[1] == *begin && max >= 1)
1751     {
1752     next_end = bracketend(next + 1);
1753     if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1754       {
1755       for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1756         if (*next_end != OP_KET)
1757           break;
1758 
1759       if (i == max)
1760         {
1761         common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1762         common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1763         /* +2 the original and the last. */
1764         common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1765         if (min == 1)
1766           return TRUE;
1767         min--;
1768         max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1769         }
1770       }
1771     }
1772   }
1773 
1774 if (min >= 3)
1775   {
1776   common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1777   common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1778   common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1779   return TRUE;
1780   }
1781 
1782 return FALSE;
1783 }
1784 
1785 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1786     case OP_MINSTAR: \
1787     case OP_MINPLUS: \
1788     case OP_QUERY: \
1789     case OP_MINQUERY: \
1790     case OP_MINSTARI: \
1791     case OP_MINPLUSI: \
1792     case OP_QUERYI: \
1793     case OP_MINQUERYI: \
1794     case OP_NOTMINSTAR: \
1795     case OP_NOTMINPLUS: \
1796     case OP_NOTQUERY: \
1797     case OP_NOTMINQUERY: \
1798     case OP_NOTMINSTARI: \
1799     case OP_NOTMINPLUSI: \
1800     case OP_NOTQUERYI: \
1801     case OP_NOTMINQUERYI:
1802 
1803 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1804     case OP_STAR: \
1805     case OP_PLUS: \
1806     case OP_STARI: \
1807     case OP_PLUSI: \
1808     case OP_NOTSTAR: \
1809     case OP_NOTPLUS: \
1810     case OP_NOTSTARI: \
1811     case OP_NOTPLUSI:
1812 
1813 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1814     case OP_UPTO: \
1815     case OP_MINUPTO: \
1816     case OP_UPTOI: \
1817     case OP_MINUPTOI: \
1818     case OP_NOTUPTO: \
1819     case OP_NOTMINUPTO: \
1820     case OP_NOTUPTOI: \
1821     case OP_NOTMINUPTOI:
1822 
1823 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1824     case OP_TYPEMINSTAR: \
1825     case OP_TYPEMINPLUS: \
1826     case OP_TYPEQUERY: \
1827     case OP_TYPEMINQUERY:
1828 
1829 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1830     case OP_TYPESTAR: \
1831     case OP_TYPEPLUS:
1832 
1833 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1834     case OP_TYPEUPTO: \
1835     case OP_TYPEMINUPTO:
1836 
set_private_data_ptrs(compiler_common * common,int * private_data_start,PCRE2_SPTR ccend)1837 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, PCRE2_SPTR ccend)
1838 {
1839 PCRE2_SPTR cc = common->start;
1840 PCRE2_SPTR alternative;
1841 PCRE2_SPTR end = NULL;
1842 int private_data_ptr = *private_data_start;
1843 int space, size, bracketlen;
1844 BOOL repeat_check = TRUE;
1845 
1846 while (cc < ccend)
1847   {
1848   space = 0;
1849   size = 0;
1850   bracketlen = 0;
1851   if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1852     break;
1853 
1854   /* When the bracket is prefixed by a zero iteration, skip the repeat check (at this point). */
1855   if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1856     {
1857     if (detect_repeat(common, cc))
1858       {
1859       /* These brackets are converted to repeats, so no global
1860       based single character repeat is allowed. */
1861       if (cc >= end)
1862         end = bracketend(cc);
1863       }
1864     }
1865   repeat_check = TRUE;
1866 
1867   switch(*cc)
1868     {
1869     case OP_KET:
1870     if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1871       {
1872       common->private_data_ptrs[cc - common->start] = private_data_ptr;
1873       private_data_ptr += sizeof(sljit_sw);
1874       cc += common->private_data_ptrs[cc + 1 - common->start];
1875       }
1876     cc += 1 + LINK_SIZE;
1877     break;
1878 
1879     case OP_ASSERT:
1880     case OP_ASSERT_NOT:
1881     case OP_ASSERTBACK:
1882     case OP_ASSERTBACK_NOT:
1883     case OP_ASSERT_NA:
1884     case OP_ONCE:
1885     case OP_SCRIPT_RUN:
1886     case OP_BRAPOS:
1887     case OP_SBRA:
1888     case OP_SBRAPOS:
1889     case OP_SCOND:
1890     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1891     private_data_ptr += sizeof(sljit_sw);
1892     bracketlen = 1 + LINK_SIZE;
1893     break;
1894 
1895     case OP_ASSERTBACK_NA:
1896     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1897     private_data_ptr += sizeof(sljit_sw);
1898 
1899     if (find_vreverse(cc))
1900       {
1901       common->private_data_ptrs[cc + 1 - common->start] = 1;
1902       private_data_ptr += sizeof(sljit_sw);
1903       }
1904 
1905     bracketlen = 1 + LINK_SIZE;
1906     break;
1907 
1908     case OP_CBRAPOS:
1909     case OP_SCBRAPOS:
1910     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1911     private_data_ptr += sizeof(sljit_sw);
1912     bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1913     break;
1914 
1915     case OP_COND:
1916     /* Might be a hidden SCOND. */
1917     common->private_data_ptrs[cc - common->start] = 0;
1918     alternative = cc + GET(cc, 1);
1919     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1920       {
1921       common->private_data_ptrs[cc - common->start] = private_data_ptr;
1922       private_data_ptr += sizeof(sljit_sw);
1923       }
1924     bracketlen = 1 + LINK_SIZE;
1925     break;
1926 
1927     case OP_BRA:
1928     bracketlen = 1 + LINK_SIZE;
1929     break;
1930 
1931     case OP_CBRA:
1932     case OP_SCBRA:
1933     bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1934     break;
1935 
1936     case OP_BRAZERO:
1937     case OP_BRAMINZERO:
1938     case OP_BRAPOSZERO:
1939     size = 1;
1940     repeat_check = FALSE;
1941     break;
1942 
1943     CASE_ITERATOR_PRIVATE_DATA_1
1944     size = -2;
1945     space = 1;
1946     break;
1947 
1948     CASE_ITERATOR_PRIVATE_DATA_2A
1949     size = -2;
1950     space = 2;
1951     break;
1952 
1953     CASE_ITERATOR_PRIVATE_DATA_2B
1954     size = -(2 + IMM2_SIZE);
1955     space = 2;
1956     break;
1957 
1958     CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1959     size = 1;
1960     space = 1;
1961     break;
1962 
1963     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1964     size = 1;
1965     if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1966       space = 2;
1967     break;
1968 
1969     case OP_TYPEUPTO:
1970     size = 1 + IMM2_SIZE;
1971     if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1972       space = 2;
1973     break;
1974 
1975     case OP_TYPEMINUPTO:
1976     size = 1 + IMM2_SIZE;
1977     space = 2;
1978     break;
1979 
1980     case OP_CLASS:
1981     case OP_NCLASS:
1982     size = 1 + 32 / sizeof(PCRE2_UCHAR);
1983     space = get_class_iterator_size(cc + size);
1984     break;
1985 
1986 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1987     case OP_XCLASS:
1988     size = GET(cc, 1);
1989     space = get_class_iterator_size(cc + size);
1990     break;
1991 #endif
1992 
1993     default:
1994     cc = next_opcode(common, cc);
1995     SLJIT_ASSERT(cc != NULL);
1996     break;
1997     }
1998 
1999   /* Character iterators, which are not inside a repeated bracket,
2000      gets a private slot instead of allocating it on the stack. */
2001   if (space > 0 && cc >= end)
2002     {
2003     common->private_data_ptrs[cc - common->start] = private_data_ptr;
2004     private_data_ptr += sizeof(sljit_sw) * space;
2005     }
2006 
2007   if (size != 0)
2008     {
2009     if (size < 0)
2010       {
2011       cc += -size;
2012 #ifdef SUPPORT_UNICODE
2013       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2014 #endif
2015       }
2016     else
2017       cc += size;
2018     }
2019 
2020   if (bracketlen > 0)
2021     {
2022     if (cc >= end)
2023       {
2024       end = bracketend(cc);
2025       if (end[-1 - LINK_SIZE] == OP_KET)
2026         end = NULL;
2027       }
2028     cc += bracketlen;
2029     }
2030   }
2031 *private_data_start = private_data_ptr;
2032 }
2033 
2034 /* Returns with a frame_types (always < 0) if no need for frame. */
get_framesize(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,BOOL recursive,BOOL * needs_control_head)2035 static int get_framesize(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL recursive, BOOL *needs_control_head)
2036 {
2037 int length = 0;
2038 int possessive = 0;
2039 BOOL stack_restore = FALSE;
2040 BOOL setsom_found = recursive;
2041 BOOL setmark_found = recursive;
2042 /* The last capture is a local variable even for recursions. */
2043 BOOL capture_last_found = FALSE;
2044 
2045 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2046 SLJIT_ASSERT(common->control_head_ptr != 0);
2047 *needs_control_head = TRUE;
2048 #else
2049 *needs_control_head = FALSE;
2050 #endif
2051 
2052 if (ccend == NULL)
2053   {
2054   ccend = bracketend(cc) - (1 + LINK_SIZE);
2055   if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
2056     {
2057     possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
2058     /* This is correct regardless of common->capture_last_ptr. */
2059     capture_last_found = TRUE;
2060     }
2061   cc = next_opcode(common, cc);
2062   }
2063 
2064 SLJIT_ASSERT(cc != NULL);
2065 while (cc < ccend)
2066   switch(*cc)
2067     {
2068     case OP_SET_SOM:
2069     SLJIT_ASSERT(common->has_set_som);
2070     stack_restore = TRUE;
2071     if (!setsom_found)
2072       {
2073       length += 2;
2074       setsom_found = TRUE;
2075       }
2076     cc += 1;
2077     break;
2078 
2079     case OP_MARK:
2080     case OP_COMMIT_ARG:
2081     case OP_PRUNE_ARG:
2082     case OP_THEN_ARG:
2083     SLJIT_ASSERT(common->mark_ptr != 0);
2084     stack_restore = TRUE;
2085     if (!setmark_found)
2086       {
2087       length += 2;
2088       setmark_found = TRUE;
2089       }
2090     if (common->control_head_ptr != 0)
2091       *needs_control_head = TRUE;
2092     cc += 1 + 2 + cc[1];
2093     break;
2094 
2095     case OP_RECURSE:
2096     stack_restore = TRUE;
2097     if (common->has_set_som && !setsom_found)
2098       {
2099       length += 2;
2100       setsom_found = TRUE;
2101       }
2102     if (common->mark_ptr != 0 && !setmark_found)
2103       {
2104       length += 2;
2105       setmark_found = TRUE;
2106       }
2107     if (common->capture_last_ptr != 0 && !capture_last_found)
2108       {
2109       length += 2;
2110       capture_last_found = TRUE;
2111       }
2112     cc += 1 + LINK_SIZE;
2113     break;
2114 
2115     case OP_CBRA:
2116     case OP_CBRAPOS:
2117     case OP_SCBRA:
2118     case OP_SCBRAPOS:
2119     stack_restore = TRUE;
2120     if (common->capture_last_ptr != 0 && !capture_last_found)
2121       {
2122       length += 2;
2123       capture_last_found = TRUE;
2124       }
2125     length += 3;
2126     cc += 1 + LINK_SIZE + IMM2_SIZE;
2127     break;
2128 
2129     case OP_THEN:
2130     stack_restore = TRUE;
2131     if (common->control_head_ptr != 0)
2132       *needs_control_head = TRUE;
2133     cc ++;
2134     break;
2135 
2136     default:
2137     stack_restore = TRUE;
2138     /* Fall through. */
2139 
2140     case OP_NOT_WORD_BOUNDARY:
2141     case OP_WORD_BOUNDARY:
2142     case OP_NOT_DIGIT:
2143     case OP_DIGIT:
2144     case OP_NOT_WHITESPACE:
2145     case OP_WHITESPACE:
2146     case OP_NOT_WORDCHAR:
2147     case OP_WORDCHAR:
2148     case OP_ANY:
2149     case OP_ALLANY:
2150     case OP_ANYBYTE:
2151     case OP_NOTPROP:
2152     case OP_PROP:
2153     case OP_ANYNL:
2154     case OP_NOT_HSPACE:
2155     case OP_HSPACE:
2156     case OP_NOT_VSPACE:
2157     case OP_VSPACE:
2158     case OP_EXTUNI:
2159     case OP_EODN:
2160     case OP_EOD:
2161     case OP_CIRC:
2162     case OP_CIRCM:
2163     case OP_DOLL:
2164     case OP_DOLLM:
2165     case OP_CHAR:
2166     case OP_CHARI:
2167     case OP_NOT:
2168     case OP_NOTI:
2169 
2170     case OP_EXACT:
2171     case OP_POSSTAR:
2172     case OP_POSPLUS:
2173     case OP_POSQUERY:
2174     case OP_POSUPTO:
2175 
2176     case OP_EXACTI:
2177     case OP_POSSTARI:
2178     case OP_POSPLUSI:
2179     case OP_POSQUERYI:
2180     case OP_POSUPTOI:
2181 
2182     case OP_NOTEXACT:
2183     case OP_NOTPOSSTAR:
2184     case OP_NOTPOSPLUS:
2185     case OP_NOTPOSQUERY:
2186     case OP_NOTPOSUPTO:
2187 
2188     case OP_NOTEXACTI:
2189     case OP_NOTPOSSTARI:
2190     case OP_NOTPOSPLUSI:
2191     case OP_NOTPOSQUERYI:
2192     case OP_NOTPOSUPTOI:
2193 
2194     case OP_TYPEEXACT:
2195     case OP_TYPEPOSSTAR:
2196     case OP_TYPEPOSPLUS:
2197     case OP_TYPEPOSQUERY:
2198     case OP_TYPEPOSUPTO:
2199 
2200     case OP_CLASS:
2201     case OP_NCLASS:
2202     case OP_XCLASS:
2203 
2204     case OP_CALLOUT:
2205     case OP_CALLOUT_STR:
2206 
2207     case OP_NOT_UCP_WORD_BOUNDARY:
2208     case OP_UCP_WORD_BOUNDARY:
2209 
2210     cc = next_opcode(common, cc);
2211     SLJIT_ASSERT(cc != NULL);
2212     break;
2213     }
2214 
2215 /* Possessive quantifiers can use a special case. */
2216 if (SLJIT_UNLIKELY(possessive == length))
2217   return stack_restore ? no_frame : no_stack;
2218 
2219 if (length > 0)
2220   return length + 1;
2221 return stack_restore ? no_frame : no_stack;
2222 }
2223 
init_frame(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,int stackpos,int stacktop)2224 static void init_frame(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, int stackpos, int stacktop)
2225 {
2226 DEFINE_COMPILER;
2227 BOOL setsom_found = FALSE;
2228 BOOL setmark_found = FALSE;
2229 /* The last capture is a local variable even for recursions. */
2230 BOOL capture_last_found = FALSE;
2231 int offset;
2232 
2233 /* >= 1 + shortest item size (2) */
2234 SLJIT_UNUSED_ARG(stacktop);
2235 SLJIT_ASSERT(stackpos >= stacktop + 2);
2236 
2237 stackpos = STACK(stackpos);
2238 if (ccend == NULL)
2239   {
2240   ccend = bracketend(cc) - (1 + LINK_SIZE);
2241   if (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)
2242     cc = next_opcode(common, cc);
2243   }
2244 
2245 SLJIT_ASSERT(cc != NULL);
2246 while (cc < ccend)
2247   switch(*cc)
2248     {
2249     case OP_SET_SOM:
2250     SLJIT_ASSERT(common->has_set_som);
2251     if (!setsom_found)
2252       {
2253       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2254       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2255       stackpos -= SSIZE_OF(sw);
2256       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2257       stackpos -= SSIZE_OF(sw);
2258       setsom_found = TRUE;
2259       }
2260     cc += 1;
2261     break;
2262 
2263     case OP_MARK:
2264     case OP_COMMIT_ARG:
2265     case OP_PRUNE_ARG:
2266     case OP_THEN_ARG:
2267     SLJIT_ASSERT(common->mark_ptr != 0);
2268     if (!setmark_found)
2269       {
2270       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2271       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2272       stackpos -= SSIZE_OF(sw);
2273       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2274       stackpos -= SSIZE_OF(sw);
2275       setmark_found = TRUE;
2276       }
2277     cc += 1 + 2 + cc[1];
2278     break;
2279 
2280     case OP_RECURSE:
2281     if (common->has_set_som && !setsom_found)
2282       {
2283       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2284       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2285       stackpos -= SSIZE_OF(sw);
2286       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2287       stackpos -= SSIZE_OF(sw);
2288       setsom_found = TRUE;
2289       }
2290     if (common->mark_ptr != 0 && !setmark_found)
2291       {
2292       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2293       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2294       stackpos -= SSIZE_OF(sw);
2295       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2296       stackpos -= SSIZE_OF(sw);
2297       setmark_found = TRUE;
2298       }
2299     if (common->capture_last_ptr != 0 && !capture_last_found)
2300       {
2301       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2302       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2303       stackpos -= SSIZE_OF(sw);
2304       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2305       stackpos -= SSIZE_OF(sw);
2306       capture_last_found = TRUE;
2307       }
2308     cc += 1 + LINK_SIZE;
2309     break;
2310 
2311     case OP_CBRA:
2312     case OP_CBRAPOS:
2313     case OP_SCBRA:
2314     case OP_SCBRAPOS:
2315     if (common->capture_last_ptr != 0 && !capture_last_found)
2316       {
2317       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2318       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2319       stackpos -= SSIZE_OF(sw);
2320       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2321       stackpos -= SSIZE_OF(sw);
2322       capture_last_found = TRUE;
2323       }
2324     offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2325     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
2326     stackpos -= SSIZE_OF(sw);
2327     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
2328     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
2329     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2330     stackpos -= SSIZE_OF(sw);
2331     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
2332     stackpos -= SSIZE_OF(sw);
2333 
2334     cc += 1 + LINK_SIZE + IMM2_SIZE;
2335     break;
2336 
2337     default:
2338     cc = next_opcode(common, cc);
2339     SLJIT_ASSERT(cc != NULL);
2340     break;
2341     }
2342 
2343 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
2344 SLJIT_ASSERT(stackpos == STACK(stacktop));
2345 }
2346 
2347 #define RECURSE_TMP_REG_COUNT 3
2348 
2349 typedef struct delayed_mem_copy_status {
2350   struct sljit_compiler *compiler;
2351   int store_bases[RECURSE_TMP_REG_COUNT];
2352   int store_offsets[RECURSE_TMP_REG_COUNT];
2353   int tmp_regs[RECURSE_TMP_REG_COUNT];
2354   int saved_tmp_regs[RECURSE_TMP_REG_COUNT];
2355   int next_tmp_reg;
2356 } delayed_mem_copy_status;
2357 
delayed_mem_copy_init(delayed_mem_copy_status * status,compiler_common * common)2358 static void delayed_mem_copy_init(delayed_mem_copy_status *status, compiler_common *common)
2359 {
2360 int i;
2361 
2362 for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2363   {
2364   SLJIT_ASSERT(status->tmp_regs[i] >= 0);
2365   SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]);
2366 
2367   status->store_bases[i] = -1;
2368   }
2369 status->next_tmp_reg = 0;
2370 status->compiler = common->compiler;
2371 }
2372 
delayed_mem_copy_move(delayed_mem_copy_status * status,int load_base,sljit_sw load_offset,int store_base,sljit_sw store_offset)2373 static void delayed_mem_copy_move(delayed_mem_copy_status *status, int load_base, sljit_sw load_offset,
2374   int store_base, sljit_sw store_offset)
2375 {
2376 struct sljit_compiler *compiler = status->compiler;
2377 int next_tmp_reg = status->next_tmp_reg;
2378 int tmp_reg = status->tmp_regs[next_tmp_reg];
2379 
2380 SLJIT_ASSERT(load_base > 0 && store_base > 0);
2381 
2382 if (status->store_bases[next_tmp_reg] == -1)
2383   {
2384   /* Preserve virtual registers. */
2385   if (sljit_get_register_index(SLJIT_GP_REGISTER, status->saved_tmp_regs[next_tmp_reg]) < 0)
2386     OP1(SLJIT_MOV, status->saved_tmp_regs[next_tmp_reg], 0, tmp_reg, 0);
2387   }
2388 else
2389   OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2390 
2391 OP1(SLJIT_MOV, tmp_reg, 0, SLJIT_MEM1(load_base), load_offset);
2392 status->store_bases[next_tmp_reg] = store_base;
2393 status->store_offsets[next_tmp_reg] = store_offset;
2394 
2395 status->next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2396 }
2397 
delayed_mem_copy_finish(delayed_mem_copy_status * status)2398 static void delayed_mem_copy_finish(delayed_mem_copy_status *status)
2399 {
2400 struct sljit_compiler *compiler = status->compiler;
2401 int next_tmp_reg = status->next_tmp_reg;
2402 int tmp_reg, saved_tmp_reg, i;
2403 
2404 for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2405   {
2406   if (status->store_bases[next_tmp_reg] != -1)
2407     {
2408     tmp_reg = status->tmp_regs[next_tmp_reg];
2409     saved_tmp_reg = status->saved_tmp_regs[next_tmp_reg];
2410 
2411     OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2412 
2413     /* Restore virtual registers. */
2414     if (sljit_get_register_index(SLJIT_GP_REGISTER, saved_tmp_reg) < 0)
2415       OP1(SLJIT_MOV, tmp_reg, 0, saved_tmp_reg, 0);
2416     }
2417 
2418   next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2419   }
2420 }
2421 
2422 #undef RECURSE_TMP_REG_COUNT
2423 
recurse_check_bit(compiler_common * common,sljit_sw bit_index)2424 static BOOL recurse_check_bit(compiler_common *common, sljit_sw bit_index)
2425 {
2426 uint8_t *byte;
2427 uint8_t mask;
2428 
2429 SLJIT_ASSERT((bit_index & (sizeof(sljit_sw) - 1)) == 0);
2430 
2431 bit_index >>= SLJIT_WORD_SHIFT;
2432 
2433 SLJIT_ASSERT((bit_index >> 3) < common->recurse_bitset_size);
2434 
2435 mask = 1 << (bit_index & 0x7);
2436 byte = common->recurse_bitset + (bit_index >> 3);
2437 
2438 if (*byte & mask)
2439   return FALSE;
2440 
2441 *byte |= mask;
2442 return TRUE;
2443 }
2444 
2445 enum get_recurse_flags {
2446   recurse_flag_quit_found = (1 << 0),
2447   recurse_flag_accept_found = (1 << 1),
2448   recurse_flag_setsom_found = (1 << 2),
2449   recurse_flag_setmark_found = (1 << 3),
2450   recurse_flag_control_head_found = (1 << 4),
2451 };
2452 
get_recurse_data_length(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,uint32_t * result_flags)2453 static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, uint32_t *result_flags)
2454 {
2455 int length = 1;
2456 int size, offset;
2457 PCRE2_SPTR alternative;
2458 uint32_t recurse_flags = 0;
2459 
2460 memset(common->recurse_bitset, 0, common->recurse_bitset_size);
2461 
2462 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2463 SLJIT_ASSERT(common->control_head_ptr != 0);
2464 recurse_flags |= recurse_flag_control_head_found;
2465 #endif
2466 
2467 /* Calculate the sum of the private machine words. */
2468 while (cc < ccend)
2469   {
2470   size = 0;
2471   switch(*cc)
2472     {
2473     case OP_SET_SOM:
2474     SLJIT_ASSERT(common->has_set_som);
2475     recurse_flags |= recurse_flag_setsom_found;
2476     cc += 1;
2477     break;
2478 
2479     case OP_RECURSE:
2480     if (common->has_set_som)
2481       recurse_flags |= recurse_flag_setsom_found;
2482     if (common->mark_ptr != 0)
2483       recurse_flags |= recurse_flag_setmark_found;
2484     if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2485       length++;
2486     cc += 1 + LINK_SIZE;
2487     break;
2488 
2489     case OP_KET:
2490     offset = PRIVATE_DATA(cc);
2491     if (offset != 0)
2492       {
2493       if (recurse_check_bit(common, offset))
2494         length++;
2495       SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2496       cc += PRIVATE_DATA(cc + 1);
2497       }
2498     cc += 1 + LINK_SIZE;
2499     break;
2500 
2501     case OP_ASSERT:
2502     case OP_ASSERT_NOT:
2503     case OP_ASSERTBACK:
2504     case OP_ASSERTBACK_NOT:
2505     case OP_ASSERT_NA:
2506     case OP_ASSERTBACK_NA:
2507     case OP_ONCE:
2508     case OP_SCRIPT_RUN:
2509     case OP_BRAPOS:
2510     case OP_SBRA:
2511     case OP_SBRAPOS:
2512     case OP_SCOND:
2513     SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
2514     if (recurse_check_bit(common, PRIVATE_DATA(cc)))
2515       length++;
2516     cc += 1 + LINK_SIZE;
2517     break;
2518 
2519     case OP_CBRA:
2520     case OP_SCBRA:
2521     offset = GET2(cc, 1 + LINK_SIZE);
2522     if (recurse_check_bit(common, OVECTOR(offset << 1)))
2523       {
2524       SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
2525       length += 2;
2526       }
2527     if (common->optimized_cbracket[offset] == 0 && recurse_check_bit(common, OVECTOR_PRIV(offset)))
2528       length++;
2529     if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2530       length++;
2531     cc += 1 + LINK_SIZE + IMM2_SIZE;
2532     break;
2533 
2534     case OP_CBRAPOS:
2535     case OP_SCBRAPOS:
2536     offset = GET2(cc, 1 + LINK_SIZE);
2537     if (recurse_check_bit(common, OVECTOR(offset << 1)))
2538       {
2539       SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
2540       length += 2;
2541       }
2542     if (recurse_check_bit(common, OVECTOR_PRIV(offset)))
2543       length++;
2544     if (recurse_check_bit(common, PRIVATE_DATA(cc)))
2545       length++;
2546     if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2547       length++;
2548     cc += 1 + LINK_SIZE + IMM2_SIZE;
2549     break;
2550 
2551     case OP_COND:
2552     /* Might be a hidden SCOND. */
2553     alternative = cc + GET(cc, 1);
2554     if ((*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) && recurse_check_bit(common, PRIVATE_DATA(cc)))
2555       length++;
2556     cc += 1 + LINK_SIZE;
2557     break;
2558 
2559     CASE_ITERATOR_PRIVATE_DATA_1
2560     offset = PRIVATE_DATA(cc);
2561     if (offset != 0 && recurse_check_bit(common, offset))
2562       length++;
2563     cc += 2;
2564 #ifdef SUPPORT_UNICODE
2565     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2566 #endif
2567     break;
2568 
2569     CASE_ITERATOR_PRIVATE_DATA_2A
2570     offset = PRIVATE_DATA(cc);
2571     if (offset != 0 && recurse_check_bit(common, offset))
2572       {
2573       SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2574       length += 2;
2575       }
2576     cc += 2;
2577 #ifdef SUPPORT_UNICODE
2578     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2579 #endif
2580     break;
2581 
2582     CASE_ITERATOR_PRIVATE_DATA_2B
2583     offset = PRIVATE_DATA(cc);
2584     if (offset != 0 && recurse_check_bit(common, offset))
2585       {
2586       SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2587       length += 2;
2588       }
2589     cc += 2 + IMM2_SIZE;
2590 #ifdef SUPPORT_UNICODE
2591     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2592 #endif
2593     break;
2594 
2595     CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2596     offset = PRIVATE_DATA(cc);
2597     if (offset != 0 && recurse_check_bit(common, offset))
2598       length++;
2599     cc += 1;
2600     break;
2601 
2602     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2603     offset = PRIVATE_DATA(cc);
2604     if (offset != 0 && recurse_check_bit(common, offset))
2605       {
2606       SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2607       length += 2;
2608       }
2609     cc += 1;
2610     break;
2611 
2612     CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2613     offset = PRIVATE_DATA(cc);
2614     if (offset != 0 && recurse_check_bit(common, offset))
2615       {
2616       SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2617       length += 2;
2618       }
2619     cc += 1 + IMM2_SIZE;
2620     break;
2621 
2622     case OP_CLASS:
2623     case OP_NCLASS:
2624 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2625     case OP_XCLASS:
2626     size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2627 #else
2628     size = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2629 #endif
2630 
2631     offset = PRIVATE_DATA(cc);
2632     if (offset != 0 && recurse_check_bit(common, offset))
2633       length += get_class_iterator_size(cc + size);
2634     cc += size;
2635     break;
2636 
2637     case OP_MARK:
2638     case OP_COMMIT_ARG:
2639     case OP_PRUNE_ARG:
2640     case OP_THEN_ARG:
2641     SLJIT_ASSERT(common->mark_ptr != 0);
2642     recurse_flags |= recurse_flag_setmark_found;
2643     if (common->control_head_ptr != 0)
2644       recurse_flags |= recurse_flag_control_head_found;
2645     if (*cc != OP_MARK)
2646       recurse_flags |= recurse_flag_quit_found;
2647 
2648     cc += 1 + 2 + cc[1];
2649     break;
2650 
2651     case OP_PRUNE:
2652     case OP_SKIP:
2653     case OP_COMMIT:
2654     recurse_flags |= recurse_flag_quit_found;
2655     cc++;
2656     break;
2657 
2658     case OP_SKIP_ARG:
2659     recurse_flags |= recurse_flag_quit_found;
2660     cc += 1 + 2 + cc[1];
2661     break;
2662 
2663     case OP_THEN:
2664     SLJIT_ASSERT(common->control_head_ptr != 0);
2665     recurse_flags |= recurse_flag_quit_found | recurse_flag_control_head_found;
2666     cc++;
2667     break;
2668 
2669     case OP_ACCEPT:
2670     case OP_ASSERT_ACCEPT:
2671     recurse_flags |= recurse_flag_accept_found;
2672     cc++;
2673     break;
2674 
2675     default:
2676     cc = next_opcode(common, cc);
2677     SLJIT_ASSERT(cc != NULL);
2678     break;
2679     }
2680   }
2681 SLJIT_ASSERT(cc == ccend);
2682 
2683 if (recurse_flags & recurse_flag_control_head_found)
2684   length++;
2685 if (recurse_flags & recurse_flag_quit_found)
2686   {
2687   if (recurse_flags & recurse_flag_setsom_found)
2688     length++;
2689   if (recurse_flags & recurse_flag_setmark_found)
2690     length++;
2691   }
2692 
2693 *result_flags = recurse_flags;
2694 return length;
2695 }
2696 
2697 enum copy_recurse_data_types {
2698   recurse_copy_from_global,
2699   recurse_copy_private_to_global,
2700   recurse_copy_shared_to_global,
2701   recurse_copy_kept_shared_to_global,
2702   recurse_swap_global
2703 };
2704 
copy_recurse_data(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,int type,int stackptr,int stacktop,uint32_t recurse_flags)2705 static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
2706   int type, int stackptr, int stacktop, uint32_t recurse_flags)
2707 {
2708 delayed_mem_copy_status status;
2709 PCRE2_SPTR alternative;
2710 sljit_sw private_srcw[2];
2711 sljit_sw shared_srcw[3];
2712 sljit_sw kept_shared_srcw[2];
2713 int private_count, shared_count, kept_shared_count;
2714 int from_sp, base_reg, offset, i;
2715 
2716 memset(common->recurse_bitset, 0, common->recurse_bitset_size);
2717 
2718 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2719 SLJIT_ASSERT(common->control_head_ptr != 0);
2720 recurse_check_bit(common, common->control_head_ptr);
2721 #endif
2722 
2723 switch (type)
2724   {
2725   case recurse_copy_from_global:
2726   from_sp = TRUE;
2727   base_reg = STACK_TOP;
2728   break;
2729 
2730   case recurse_copy_private_to_global:
2731   case recurse_copy_shared_to_global:
2732   case recurse_copy_kept_shared_to_global:
2733   from_sp = FALSE;
2734   base_reg = STACK_TOP;
2735   break;
2736 
2737   default:
2738   SLJIT_ASSERT(type == recurse_swap_global);
2739   from_sp = FALSE;
2740   base_reg = TMP2;
2741   break;
2742   }
2743 
2744 stackptr = STACK(stackptr);
2745 stacktop = STACK(stacktop);
2746 
2747 status.tmp_regs[0] = TMP1;
2748 status.saved_tmp_regs[0] = TMP1;
2749 
2750 if (base_reg != TMP2)
2751   {
2752   status.tmp_regs[1] = TMP2;
2753   status.saved_tmp_regs[1] = TMP2;
2754   }
2755 else
2756   {
2757   status.saved_tmp_regs[1] = RETURN_ADDR;
2758   if (HAS_VIRTUAL_REGISTERS)
2759     status.tmp_regs[1] = STR_PTR;
2760   else
2761     status.tmp_regs[1] = RETURN_ADDR;
2762   }
2763 
2764 status.saved_tmp_regs[2] = TMP3;
2765 if (HAS_VIRTUAL_REGISTERS)
2766   status.tmp_regs[2] = STR_END;
2767 else
2768   status.tmp_regs[2] = TMP3;
2769 
2770 delayed_mem_copy_init(&status, common);
2771 
2772 if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2773   {
2774   SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2775 
2776   if (!from_sp)
2777     delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->recursive_head_ptr);
2778 
2779   if (from_sp || type == recurse_swap_global)
2780     delayed_mem_copy_move(&status, SLJIT_SP, common->recursive_head_ptr, base_reg, stackptr);
2781   }
2782 
2783 stackptr += sizeof(sljit_sw);
2784 
2785 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2786 if (type != recurse_copy_shared_to_global)
2787   {
2788   if (!from_sp)
2789     delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->control_head_ptr);
2790 
2791   if (from_sp || type == recurse_swap_global)
2792     delayed_mem_copy_move(&status, SLJIT_SP, common->control_head_ptr, base_reg, stackptr);
2793   }
2794 
2795 stackptr += sizeof(sljit_sw);
2796 #endif
2797 
2798 while (cc < ccend)
2799   {
2800   private_count = 0;
2801   shared_count = 0;
2802   kept_shared_count = 0;
2803 
2804   switch(*cc)
2805     {
2806     case OP_SET_SOM:
2807     SLJIT_ASSERT(common->has_set_som);
2808     if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, OVECTOR(0)))
2809       {
2810       kept_shared_srcw[0] = OVECTOR(0);
2811       kept_shared_count = 1;
2812       }
2813     cc += 1;
2814     break;
2815 
2816     case OP_RECURSE:
2817     if (recurse_flags & recurse_flag_quit_found)
2818       {
2819       if (common->has_set_som && recurse_check_bit(common, OVECTOR(0)))
2820         {
2821         kept_shared_srcw[0] = OVECTOR(0);
2822         kept_shared_count = 1;
2823         }
2824       if (common->mark_ptr != 0 && recurse_check_bit(common, common->mark_ptr))
2825         {
2826         kept_shared_srcw[kept_shared_count] = common->mark_ptr;
2827         kept_shared_count++;
2828         }
2829       }
2830     if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2831       {
2832       shared_srcw[0] = common->capture_last_ptr;
2833       shared_count = 1;
2834       }
2835     cc += 1 + LINK_SIZE;
2836     break;
2837 
2838     case OP_KET:
2839     private_srcw[0] = PRIVATE_DATA(cc);
2840     if (private_srcw[0] != 0)
2841       {
2842       if (recurse_check_bit(common, private_srcw[0]))
2843         private_count = 1;
2844       SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2845       cc += PRIVATE_DATA(cc + 1);
2846       }
2847     cc += 1 + LINK_SIZE;
2848     break;
2849 
2850     case OP_ASSERT:
2851     case OP_ASSERT_NOT:
2852     case OP_ASSERTBACK:
2853     case OP_ASSERTBACK_NOT:
2854     case OP_ASSERT_NA:
2855     case OP_ASSERTBACK_NA:
2856     case OP_ONCE:
2857     case OP_SCRIPT_RUN:
2858     case OP_BRAPOS:
2859     case OP_SBRA:
2860     case OP_SBRAPOS:
2861     case OP_SCOND:
2862     private_srcw[0] = PRIVATE_DATA(cc);
2863     if (recurse_check_bit(common, private_srcw[0]))
2864       private_count = 1;
2865     cc += 1 + LINK_SIZE;
2866     break;
2867 
2868     case OP_CBRA:
2869     case OP_SCBRA:
2870     offset = GET2(cc, 1 + LINK_SIZE);
2871     shared_srcw[0] = OVECTOR(offset << 1);
2872     if (recurse_check_bit(common, shared_srcw[0]))
2873       {
2874       shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
2875       SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
2876       shared_count = 2;
2877       }
2878 
2879     if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2880       {
2881       shared_srcw[shared_count] = common->capture_last_ptr;
2882       shared_count++;
2883       }
2884 
2885     if (common->optimized_cbracket[offset] == 0)
2886       {
2887       private_srcw[0] = OVECTOR_PRIV(offset);
2888       if (recurse_check_bit(common, private_srcw[0]))
2889         private_count = 1;
2890       }
2891 
2892     cc += 1 + LINK_SIZE + IMM2_SIZE;
2893     break;
2894 
2895     case OP_CBRAPOS:
2896     case OP_SCBRAPOS:
2897     offset = GET2(cc, 1 + LINK_SIZE);
2898     shared_srcw[0] = OVECTOR(offset << 1);
2899     if (recurse_check_bit(common, shared_srcw[0]))
2900       {
2901       shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
2902       SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
2903       shared_count = 2;
2904       }
2905 
2906     if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2907       {
2908       shared_srcw[shared_count] = common->capture_last_ptr;
2909       shared_count++;
2910       }
2911 
2912     private_srcw[0] = PRIVATE_DATA(cc);
2913     if (recurse_check_bit(common, private_srcw[0]))
2914       private_count = 1;
2915 
2916     offset = OVECTOR_PRIV(offset);
2917     if (recurse_check_bit(common, offset))
2918       {
2919       private_srcw[private_count] = offset;
2920       private_count++;
2921       }
2922     cc += 1 + LINK_SIZE + IMM2_SIZE;
2923     break;
2924 
2925     case OP_COND:
2926     /* Might be a hidden SCOND. */
2927     alternative = cc + GET(cc, 1);
2928     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2929       {
2930       private_srcw[0] = PRIVATE_DATA(cc);
2931       if (recurse_check_bit(common, private_srcw[0]))
2932         private_count = 1;
2933       }
2934     cc += 1 + LINK_SIZE;
2935     break;
2936 
2937     CASE_ITERATOR_PRIVATE_DATA_1
2938     private_srcw[0] = PRIVATE_DATA(cc);
2939     if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2940       private_count = 1;
2941     cc += 2;
2942 #ifdef SUPPORT_UNICODE
2943     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2944 #endif
2945     break;
2946 
2947     CASE_ITERATOR_PRIVATE_DATA_2A
2948     private_srcw[0] = PRIVATE_DATA(cc);
2949     if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2950       {
2951       private_count = 2;
2952       private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2953       SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2954       }
2955     cc += 2;
2956 #ifdef SUPPORT_UNICODE
2957     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2958 #endif
2959     break;
2960 
2961     CASE_ITERATOR_PRIVATE_DATA_2B
2962     private_srcw[0] = PRIVATE_DATA(cc);
2963     if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2964       {
2965       private_count = 2;
2966       private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2967       SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2968       }
2969     cc += 2 + IMM2_SIZE;
2970 #ifdef SUPPORT_UNICODE
2971     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2972 #endif
2973     break;
2974 
2975     CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2976     private_srcw[0] = PRIVATE_DATA(cc);
2977     if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2978       private_count = 1;
2979     cc += 1;
2980     break;
2981 
2982     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2983     private_srcw[0] = PRIVATE_DATA(cc);
2984     if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2985       {
2986       private_count = 2;
2987       private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2988       SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2989       }
2990     cc += 1;
2991     break;
2992 
2993     CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2994     private_srcw[0] = PRIVATE_DATA(cc);
2995     if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2996       {
2997       private_count = 2;
2998       private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2999       SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
3000       }
3001     cc += 1 + IMM2_SIZE;
3002     break;
3003 
3004     case OP_CLASS:
3005     case OP_NCLASS:
3006 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
3007     case OP_XCLASS:
3008     i = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
3009 #else
3010     i = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
3011 #endif
3012     if (PRIVATE_DATA(cc) != 0)
3013       {
3014       private_count = 1;
3015       private_srcw[0] = PRIVATE_DATA(cc);
3016       switch(get_class_iterator_size(cc + i))
3017         {
3018         case 1:
3019         break;
3020 
3021         case 2:
3022         if (recurse_check_bit(common, private_srcw[0]))
3023           {
3024           private_count = 2;
3025           private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
3026           SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
3027           }
3028         break;
3029 
3030         default:
3031         SLJIT_UNREACHABLE();
3032         break;
3033         }
3034       }
3035     cc += i;
3036     break;
3037 
3038     case OP_MARK:
3039     case OP_COMMIT_ARG:
3040     case OP_PRUNE_ARG:
3041     case OP_THEN_ARG:
3042     SLJIT_ASSERT(common->mark_ptr != 0);
3043     if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, common->mark_ptr))
3044       {
3045       kept_shared_srcw[0] = common->mark_ptr;
3046       kept_shared_count = 1;
3047       }
3048     if (common->control_head_ptr != 0 && recurse_check_bit(common, common->control_head_ptr))
3049       {
3050       private_srcw[0] = common->control_head_ptr;
3051       private_count = 1;
3052       }
3053     cc += 1 + 2 + cc[1];
3054     break;
3055 
3056     case OP_THEN:
3057     SLJIT_ASSERT(common->control_head_ptr != 0);
3058     if (recurse_check_bit(common, common->control_head_ptr))
3059       {
3060       private_srcw[0] = common->control_head_ptr;
3061       private_count = 1;
3062       }
3063     cc++;
3064     break;
3065 
3066     default:
3067     cc = next_opcode(common, cc);
3068     SLJIT_ASSERT(cc != NULL);
3069     continue;
3070     }
3071 
3072   if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
3073     {
3074     SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
3075 
3076     for (i = 0; i < private_count; i++)
3077       {
3078       SLJIT_ASSERT(private_srcw[i] != 0);
3079 
3080       if (!from_sp)
3081         delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, private_srcw[i]);
3082 
3083       if (from_sp || type == recurse_swap_global)
3084         delayed_mem_copy_move(&status, SLJIT_SP, private_srcw[i], base_reg, stackptr);
3085 
3086       stackptr += sizeof(sljit_sw);
3087       }
3088     }
3089   else
3090     stackptr += sizeof(sljit_sw) * private_count;
3091 
3092   if (type != recurse_copy_private_to_global && type != recurse_copy_kept_shared_to_global)
3093     {
3094     SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_swap_global);
3095 
3096     for (i = 0; i < shared_count; i++)
3097       {
3098       SLJIT_ASSERT(shared_srcw[i] != 0);
3099 
3100       if (!from_sp)
3101         delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, shared_srcw[i]);
3102 
3103       if (from_sp || type == recurse_swap_global)
3104         delayed_mem_copy_move(&status, SLJIT_SP, shared_srcw[i], base_reg, stackptr);
3105 
3106       stackptr += sizeof(sljit_sw);
3107       }
3108     }
3109   else
3110     stackptr += sizeof(sljit_sw) * shared_count;
3111 
3112   if (type != recurse_copy_private_to_global && type != recurse_swap_global)
3113     {
3114     SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_copy_kept_shared_to_global);
3115 
3116     for (i = 0; i < kept_shared_count; i++)
3117       {
3118       SLJIT_ASSERT(kept_shared_srcw[i] != 0);
3119 
3120       if (!from_sp)
3121         delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, kept_shared_srcw[i]);
3122 
3123       if (from_sp || type == recurse_swap_global)
3124         delayed_mem_copy_move(&status, SLJIT_SP, kept_shared_srcw[i], base_reg, stackptr);
3125 
3126       stackptr += sizeof(sljit_sw);
3127       }
3128     }
3129   else
3130     stackptr += sizeof(sljit_sw) * kept_shared_count;
3131   }
3132 
3133 SLJIT_ASSERT(cc == ccend && stackptr == stacktop);
3134 
3135 delayed_mem_copy_finish(&status);
3136 }
3137 
set_then_offsets(compiler_common * common,PCRE2_SPTR cc,sljit_u8 * current_offset)3138 static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_SPTR cc, sljit_u8 *current_offset)
3139 {
3140 PCRE2_SPTR end = bracketend(cc);
3141 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
3142 
3143 /* Assert captures then. */
3144 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA)
3145   current_offset = NULL;
3146 /* Conditional block does not. */
3147 if (*cc == OP_COND || *cc == OP_SCOND)
3148   has_alternatives = FALSE;
3149 
3150 cc = next_opcode(common, cc);
3151 
3152 if (has_alternatives)
3153   {
3154   if (*cc == OP_REVERSE)
3155     cc += 1 + IMM2_SIZE;
3156   else if (*cc == OP_VREVERSE)
3157     cc += 1 + 2 * IMM2_SIZE;
3158 
3159   current_offset = common->then_offsets + (cc - common->start);
3160   }
3161 
3162 while (cc < end)
3163   {
3164   if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
3165     cc = set_then_offsets(common, cc, current_offset);
3166   else
3167     {
3168     if (*cc == OP_ALT && has_alternatives)
3169       {
3170       cc += 1 + LINK_SIZE;
3171 
3172       if (*cc == OP_REVERSE)
3173         cc += 1 + IMM2_SIZE;
3174       else if (*cc == OP_VREVERSE)
3175         cc += 1 + 2 * IMM2_SIZE;
3176 
3177       current_offset = common->then_offsets + (cc - common->start);
3178       continue;
3179       }
3180 
3181     if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
3182       *current_offset = 1;
3183     cc = next_opcode(common, cc);
3184     }
3185   }
3186 
3187 return end;
3188 }
3189 
3190 #undef CASE_ITERATOR_PRIVATE_DATA_1
3191 #undef CASE_ITERATOR_PRIVATE_DATA_2A
3192 #undef CASE_ITERATOR_PRIVATE_DATA_2B
3193 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
3194 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
3195 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
3196 
is_powerof2(unsigned int value)3197 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
3198 {
3199 return (value & (value - 1)) == 0;
3200 }
3201 
set_jumps(jump_list * list,struct sljit_label * label)3202 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
3203 {
3204 while (list != NULL)
3205   {
3206   /* sljit_set_label is clever enough to do nothing
3207   if either the jump or the label is NULL. */
3208   SET_LABEL(list->jump, label);
3209   list = list->next;
3210   }
3211 }
3212 
add_jump(struct sljit_compiler * compiler,jump_list ** list,struct sljit_jump * jump)3213 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
3214 {
3215 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
3216 if (list_item)
3217   {
3218   list_item->next = *list;
3219   list_item->jump = jump;
3220   *list = list_item;
3221   }
3222 }
3223 
add_stub(compiler_common * common,struct sljit_jump * start)3224 static void add_stub(compiler_common *common, struct sljit_jump *start)
3225 {
3226 DEFINE_COMPILER;
3227 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
3228 
3229 if (list_item)
3230   {
3231   list_item->start = start;
3232   list_item->quit = LABEL();
3233   list_item->next = common->stubs;
3234   common->stubs = list_item;
3235   }
3236 }
3237 
flush_stubs(compiler_common * common)3238 static void flush_stubs(compiler_common *common)
3239 {
3240 DEFINE_COMPILER;
3241 stub_list *list_item = common->stubs;
3242 
3243 while (list_item)
3244   {
3245   JUMPHERE(list_item->start);
3246   add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
3247   JUMPTO(SLJIT_JUMP, list_item->quit);
3248   list_item = list_item->next;
3249   }
3250 common->stubs = NULL;
3251 }
3252 
count_match(compiler_common * common)3253 static SLJIT_INLINE void count_match(compiler_common *common)
3254 {
3255 DEFINE_COMPILER;
3256 
3257 OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
3258 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
3259 }
3260 
allocate_stack(compiler_common * common,int size)3261 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
3262 {
3263 /* May destroy all locals and registers except TMP2. */
3264 DEFINE_COMPILER;
3265 
3266 SLJIT_ASSERT(size > 0);
3267 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw));
3268 #ifdef DESTROY_REGISTERS
3269 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
3270 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3271 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3272 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
3273 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
3274 #endif
3275 add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
3276 }
3277 
free_stack(compiler_common * common,int size)3278 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
3279 {
3280 DEFINE_COMPILER;
3281 
3282 SLJIT_ASSERT(size > 0);
3283 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw));
3284 }
3285 
allocate_read_only_data(compiler_common * common,sljit_uw size)3286 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
3287 {
3288 DEFINE_COMPILER;
3289 sljit_uw *result;
3290 
3291 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3292   return NULL;
3293 
3294 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
3295 if (SLJIT_UNLIKELY(result == NULL))
3296   {
3297   sljit_set_compiler_memory_error(compiler);
3298   return NULL;
3299   }
3300 
3301 *(void**)result = common->read_only_data_head;
3302 common->read_only_data_head = (void *)result;
3303 return result + 1;
3304 }
3305 
reset_ovector(compiler_common * common,int length)3306 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
3307 {
3308 DEFINE_COMPILER;
3309 struct sljit_label *loop;
3310 sljit_s32 i;
3311 
3312 /* At this point we can freely use all temporary registers. */
3313 SLJIT_ASSERT(length > 1);
3314 /* TMP1 returns with begin - 1. */
3315 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
3316 if (length < 8)
3317   {
3318   for (i = 1; i < length; i++)
3319     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
3320   }
3321 else
3322   {
3323   if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3324     {
3325     GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
3326     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3327     loop = LABEL();
3328     sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
3329     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3330     JUMPTO(SLJIT_NOT_ZERO, loop);
3331     }
3332   else
3333     {
3334     GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));
3335     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3336     loop = LABEL();
3337     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);
3338     OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));
3339     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3340     JUMPTO(SLJIT_NOT_ZERO, loop);
3341     }
3342   }
3343 }
3344 
reset_early_fail(compiler_common * common)3345 static SLJIT_INLINE void reset_early_fail(compiler_common *common)
3346 {
3347 DEFINE_COMPILER;
3348 sljit_u32 size = (sljit_u32)(common->early_fail_end_ptr - common->early_fail_start_ptr);
3349 sljit_u32 uncleared_size;
3350 sljit_s32 src = SLJIT_IMM;
3351 sljit_s32 i;
3352 struct sljit_label *loop;
3353 
3354 SLJIT_ASSERT(common->early_fail_start_ptr < common->early_fail_end_ptr);
3355 
3356 if (size == sizeof(sljit_sw))
3357   {
3358   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->early_fail_start_ptr, SLJIT_IMM, 0);
3359   return;
3360   }
3361 
3362 if (sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) >= 0 && !sljit_has_cpu_feature(SLJIT_HAS_ZERO_REGISTER))
3363   {
3364   OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
3365   src = TMP3;
3366   }
3367 
3368 if (size <= 6 * sizeof(sljit_sw))
3369   {
3370   for (i = common->early_fail_start_ptr; i < common->early_fail_end_ptr; i += sizeof(sljit_sw))
3371     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, src, 0);
3372   return;
3373   }
3374 
3375 GET_LOCAL_BASE(TMP1, 0, common->early_fail_start_ptr);
3376 
3377 uncleared_size = ((size / sizeof(sljit_sw)) % 3) * sizeof(sljit_sw);
3378 
3379 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, size - uncleared_size);
3380 
3381 loop = LABEL();
3382 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3383 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3384 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * SSIZE_OF(sw), src, 0);
3385 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * SSIZE_OF(sw), src, 0);
3386 CMPTO(SLJIT_LESS, TMP1, 0, TMP2, 0, loop);
3387 
3388 if (uncleared_size >= sizeof(sljit_sw))
3389   OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3390 
3391 if (uncleared_size >= 2 * sizeof(sljit_sw))
3392   OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), sizeof(sljit_sw), src, 0);
3393 }
3394 
do_reset_match(compiler_common * common,int length)3395 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
3396 {
3397 DEFINE_COMPILER;
3398 struct sljit_label *loop;
3399 int i;
3400 
3401 SLJIT_ASSERT(length > 1);
3402 /* OVECTOR(1) contains the "string begin - 1" constant. */
3403 if (length > 2)
3404   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3405 if (length < 8)
3406   {
3407   for (i = 2; i < length; i++)
3408     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
3409   }
3410 else
3411   {
3412   if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3413     {
3414     GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
3415     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3416     loop = LABEL();
3417     sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
3418     OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3419     JUMPTO(SLJIT_NOT_ZERO, loop);
3420     }
3421   else
3422     {
3423     GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));
3424     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3425     loop = LABEL();
3426     OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
3427     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));
3428     OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3429     JUMPTO(SLJIT_NOT_ZERO, loop);
3430     }
3431   }
3432 
3433 if (!HAS_VIRTUAL_REGISTERS)
3434   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, stack));
3435 else
3436   OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
3437 
3438 if (common->mark_ptr != 0)
3439   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
3440 if (common->control_head_ptr != 0)
3441   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
3442 if (HAS_VIRTUAL_REGISTERS)
3443   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
3444 
3445 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3446 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));
3447 }
3448 
do_search_mark(sljit_sw * current,PCRE2_SPTR skip_arg)3449 static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg)
3450 {
3451 while (current != NULL)
3452   {
3453   switch (current[1])
3454     {
3455     case type_then_trap:
3456     break;
3457 
3458     case type_mark:
3459     if (PRIV(strcmp)(skip_arg, (PCRE2_SPTR)current[2]) == 0)
3460       return current[3];
3461     break;
3462 
3463     default:
3464     SLJIT_UNREACHABLE();
3465     break;
3466     }
3467   SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
3468   current = (sljit_sw*)current[0];
3469   }
3470 return 0;
3471 }
3472 
copy_ovector(compiler_common * common,int topbracket)3473 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
3474 {
3475 DEFINE_COMPILER;
3476 struct sljit_label *loop;
3477 BOOL has_pre;
3478 
3479 /* At this point we can freely use all registers. */
3480 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3481 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
3482 
3483 if (HAS_VIRTUAL_REGISTERS)
3484   {
3485   OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
3486   OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3487   if (common->mark_ptr != 0)
3488     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3489   OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount));
3490   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3491   if (common->mark_ptr != 0)
3492     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
3493   OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, match_data),
3494     SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3495   }
3496 else
3497   {
3498   OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3499   OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, match_data));
3500   if (common->mark_ptr != 0)
3501     OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3502   OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, oveccount));
3503   OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3504   if (common->mark_ptr != 0)
3505     OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R0, 0);
3506   OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3507   }
3508 
3509 has_pre = sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
3510 
3511 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
3512 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
3513 
3514 loop = LABEL();
3515 
3516 if (has_pre)
3517   sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
3518 else
3519   {
3520   OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
3521   OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
3522   }
3523 
3524 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(PCRE2_SIZE));
3525 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);
3526 /* Copy the integer value to the output buffer */
3527 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3528 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
3529 #endif
3530 
3531 SLJIT_ASSERT(sizeof(PCRE2_SIZE) == 4 || sizeof(PCRE2_SIZE) == 8);
3532 OP1(((sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV), SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);
3533 
3534 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3535 JUMPTO(SLJIT_NOT_ZERO, loop);
3536 
3537 /* Calculate the return value, which is the maximum ovector value. */
3538 if (topbracket > 1)
3539   {
3540   if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw))) == SLJIT_SUCCESS)
3541     {
3542     GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
3543     OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3544 
3545     /* OVECTOR(0) is never equal to SLJIT_S2. */
3546     loop = LABEL();
3547     sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw)));
3548     OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3549     CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3550     OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3551     }
3552   else
3553     {
3554     GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));
3555     OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3556 
3557     /* OVECTOR(0) is never equal to SLJIT_S2. */
3558     loop = LABEL();
3559     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
3560     OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
3561     OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3562     CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3563     OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3564     }
3565   }
3566 else
3567   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
3568 }
3569 
return_with_partial_match(compiler_common * common,struct sljit_label * quit)3570 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
3571 {
3572 DEFINE_COMPILER;
3573 sljit_s32 mov_opcode;
3574 sljit_s32 arguments_reg = !HAS_VIRTUAL_REGISTERS ? ARGUMENTS : SLJIT_R1;
3575 
3576 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S0, str_end_must_be_saved_reg0);
3577 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
3578   && (common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start != 0 : common->hit_start == 0));
3579 
3580 if (arguments_reg != ARGUMENTS)
3581   OP1(SLJIT_MOV, arguments_reg, 0, ARGUMENTS, 0);
3582 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP),
3583   common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start : common->start_ptr);
3584 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_PARTIAL);
3585 
3586 /* Store match begin and end. */
3587 OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, begin));
3588 OP1(SLJIT_MOV, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0);
3589 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, match_data));
3590 
3591 mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
3592 
3593 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S1, 0);
3594 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3595 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
3596 #endif
3597 OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector), SLJIT_R2, 0);
3598 
3599 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_S1, 0);
3600 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3601 OP2(SLJIT_ASHR, STR_END, 0, STR_END, 0, SLJIT_IMM, UCHAR_SHIFT);
3602 #endif
3603 OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector) + sizeof(PCRE2_SIZE), STR_END, 0);
3604 
3605 JUMPTO(SLJIT_JUMP, quit);
3606 }
3607 
check_start_used_ptr(compiler_common * common)3608 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
3609 {
3610 /* May destroy TMP1. */
3611 DEFINE_COMPILER;
3612 struct sljit_jump *jump;
3613 
3614 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3615   {
3616   /* The value of -1 must be kept for start_used_ptr! */
3617   OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
3618   /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
3619   is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
3620   jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
3621   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3622   JUMPHERE(jump);
3623   }
3624 else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3625   {
3626   jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3627   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3628   JUMPHERE(jump);
3629   }
3630 }
3631 
char_has_othercase(compiler_common * common,PCRE2_SPTR cc)3632 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR cc)
3633 {
3634 /* Detects if the character has an othercase. */
3635 unsigned int c;
3636 
3637 #ifdef SUPPORT_UNICODE
3638 if (common->utf || common->ucp)
3639   {
3640   if (common->utf)
3641     {
3642     GETCHAR(c, cc);
3643     }
3644   else
3645     c = *cc;
3646 
3647   if (c > 127)
3648     return c != UCD_OTHERCASE(c);
3649 
3650   return common->fcc[c] != c;
3651   }
3652 else
3653 #endif
3654   c = *cc;
3655 return MAX_255(c) ? common->fcc[c] != c : FALSE;
3656 }
3657 
char_othercase(compiler_common * common,unsigned int c)3658 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
3659 {
3660 /* Returns with the othercase. */
3661 #ifdef SUPPORT_UNICODE
3662 if ((common->utf || common->ucp) && c > 127)
3663   return UCD_OTHERCASE(c);
3664 #endif
3665 return TABLE_GET(c, common->fcc, c);
3666 }
3667 
char_get_othercase_bit(compiler_common * common,PCRE2_SPTR cc)3668 static unsigned int char_get_othercase_bit(compiler_common *common, PCRE2_SPTR cc)
3669 {
3670 /* Detects if the character and its othercase has only 1 bit difference. */
3671 unsigned int c, oc, bit;
3672 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3673 int n;
3674 #endif
3675 
3676 #ifdef SUPPORT_UNICODE
3677 if (common->utf || common->ucp)
3678   {
3679   if (common->utf)
3680     {
3681     GETCHAR(c, cc);
3682     }
3683   else
3684     c = *cc;
3685 
3686   if (c <= 127)
3687     oc = common->fcc[c];
3688   else
3689     oc = UCD_OTHERCASE(c);
3690   }
3691 else
3692   {
3693   c = *cc;
3694   oc = TABLE_GET(c, common->fcc, c);
3695   }
3696 #else
3697 c = *cc;
3698 oc = TABLE_GET(c, common->fcc, c);
3699 #endif
3700 
3701 SLJIT_ASSERT(c != oc);
3702 
3703 bit = c ^ oc;
3704 /* Optimized for English alphabet. */
3705 if (c <= 127 && bit == 0x20)
3706   return (0 << 8) | 0x20;
3707 
3708 /* Since c != oc, they must have at least 1 bit difference. */
3709 if (!is_powerof2(bit))
3710   return 0;
3711 
3712 #if PCRE2_CODE_UNIT_WIDTH == 8
3713 
3714 #ifdef SUPPORT_UNICODE
3715 if (common->utf && c > 127)
3716   {
3717   n = GET_EXTRALEN(*cc);
3718   while ((bit & 0x3f) == 0)
3719     {
3720     n--;
3721     bit >>= 6;
3722     }
3723   return (n << 8) | bit;
3724   }
3725 #endif /* SUPPORT_UNICODE */
3726 return (0 << 8) | bit;
3727 
3728 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3729 
3730 #ifdef SUPPORT_UNICODE
3731 if (common->utf && c > 65535)
3732   {
3733   if (bit >= (1u << 10))
3734     bit >>= 10;
3735   else
3736     return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
3737   }
3738 #endif /* SUPPORT_UNICODE */
3739 return (bit < 256) ? ((0u << 8) | bit) : ((1u << 8) | (bit >> 8));
3740 
3741 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3742 }
3743 
check_partial(compiler_common * common,BOOL force)3744 static void check_partial(compiler_common *common, BOOL force)
3745 {
3746 /* Checks whether a partial matching is occurred. Does not modify registers. */
3747 DEFINE_COMPILER;
3748 struct sljit_jump *jump = NULL;
3749 
3750 SLJIT_ASSERT(!force || common->mode != PCRE2_JIT_COMPLETE);
3751 
3752 if (common->mode == PCRE2_JIT_COMPLETE)
3753   return;
3754 
3755 if (!force && !common->allow_empty_partial)
3756   jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3757 else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3758   jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
3759 
3760 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3761   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3762 else
3763   {
3764   if (common->partialmatchlabel != NULL)
3765     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3766   else
3767     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3768   }
3769 
3770 if (jump != NULL)
3771   JUMPHERE(jump);
3772 }
3773 
check_str_end(compiler_common * common,jump_list ** end_reached)3774 static void check_str_end(compiler_common *common, jump_list **end_reached)
3775 {
3776 /* Does not affect registers. Usually used in a tight spot. */
3777 DEFINE_COMPILER;
3778 struct sljit_jump *jump;
3779 
3780 if (common->mode == PCRE2_JIT_COMPLETE)
3781   {
3782   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3783   return;
3784   }
3785 
3786 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3787 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3788   {
3789   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3790   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3791   add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
3792   }
3793 else
3794   {
3795   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3796   if (common->partialmatchlabel != NULL)
3797     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3798   else
3799     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3800   }
3801 JUMPHERE(jump);
3802 }
3803 
detect_partial_match(compiler_common * common,jump_list ** backtracks)3804 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
3805 {
3806 DEFINE_COMPILER;
3807 struct sljit_jump *jump;
3808 
3809 if (common->mode == PCRE2_JIT_COMPLETE)
3810   {
3811   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3812   return;
3813   }
3814 
3815 /* Partial matching mode. */
3816 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3817 if (!common->allow_empty_partial)
3818   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3819 else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3820   add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1));
3821 
3822 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3823   {
3824   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3825   add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3826   }
3827 else
3828   {
3829   if (common->partialmatchlabel != NULL)
3830     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3831   else
3832     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3833   }
3834 JUMPHERE(jump);
3835 }
3836 
process_partial_match(compiler_common * common)3837 static void process_partial_match(compiler_common *common)
3838 {
3839 DEFINE_COMPILER;
3840 struct sljit_jump *jump;
3841 
3842 /* Partial matching mode. */
3843 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3844   {
3845   jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3846   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3847   JUMPHERE(jump);
3848   }
3849 else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3850   {
3851   if (common->partialmatchlabel != NULL)
3852     CMPTO(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0, common->partialmatchlabel);
3853   else
3854     add_jump(compiler, &common->partialmatch, CMP(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3855   }
3856 }
3857 
detect_partial_match_to(compiler_common * common,struct sljit_label * label)3858 static void detect_partial_match_to(compiler_common *common, struct sljit_label *label)
3859 {
3860 DEFINE_COMPILER;
3861 
3862 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, label);
3863 process_partial_match(common);
3864 }
3865 
peek_char(compiler_common * common,sljit_u32 max,sljit_s32 dst,sljit_sw dstw,jump_list ** backtracks)3866 static void peek_char(compiler_common *common, sljit_u32 max, sljit_s32 dst, sljit_sw dstw, jump_list **backtracks)
3867 {
3868 /* Reads the character into TMP1, keeps STR_PTR.
3869 Does not check STR_END. TMP2, dst, RETURN_ADDR Destroyed. */
3870 DEFINE_COMPILER;
3871 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3872 struct sljit_jump *jump;
3873 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3874 
3875 SLJIT_UNUSED_ARG(max);
3876 SLJIT_UNUSED_ARG(dst);
3877 SLJIT_UNUSED_ARG(dstw);
3878 SLJIT_UNUSED_ARG(backtracks);
3879 
3880 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3881 
3882 #ifdef SUPPORT_UNICODE
3883 #if PCRE2_CODE_UNIT_WIDTH == 8
3884 if (common->utf)
3885   {
3886   if (max < 128) return;
3887 
3888   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3889   OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3890   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3891   add_jump(compiler, common->invalid_utf ? &common->utfreadchar_invalid : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3892   OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3893   if (backtracks && common->invalid_utf)
3894     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3895   JUMPHERE(jump);
3896   }
3897 #elif PCRE2_CODE_UNIT_WIDTH == 16
3898 if (common->utf)
3899   {
3900   if (max < 0xd800) return;
3901 
3902   OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3903 
3904   if (common->invalid_utf)
3905     {
3906     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3907     OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3908     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3909     add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3910     OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3911     if (backtracks && common->invalid_utf)
3912       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3913     }
3914   else
3915     {
3916     /* TMP2 contains the high surrogate. */
3917     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
3918     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3919     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3920     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
3921     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3922     }
3923 
3924   JUMPHERE(jump);
3925   }
3926 #elif PCRE2_CODE_UNIT_WIDTH == 32
3927 if (common->invalid_utf)
3928   {
3929   if (max < 0xd800) return;
3930 
3931   if (backtracks != NULL)
3932     {
3933     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3934     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3935     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
3936     }
3937   else
3938     {
3939     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3940     OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000);
3941     SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
3942     OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3943     SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
3944     }
3945   }
3946 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3947 #endif /* SUPPORT_UNICODE */
3948 }
3949 
peek_char_back(compiler_common * common,sljit_u32 max,jump_list ** backtracks)3950 static void peek_char_back(compiler_common *common, sljit_u32 max, jump_list **backtracks)
3951 {
3952 /* Reads one character back without moving STR_PTR. TMP2 must
3953 contain the start of the subject buffer. Affects TMP1, TMP2, and RETURN_ADDR. */
3954 DEFINE_COMPILER;
3955 
3956 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3957 struct sljit_jump *jump;
3958 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3959 
3960 SLJIT_UNUSED_ARG(max);
3961 SLJIT_UNUSED_ARG(backtracks);
3962 
3963 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3964 
3965 #ifdef SUPPORT_UNICODE
3966 #if PCRE2_CODE_UNIT_WIDTH == 8
3967 if (common->utf)
3968   {
3969   if (max < 128) return;
3970 
3971   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3972   if (common->invalid_utf)
3973     {
3974     add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3975     if (backtracks != NULL)
3976       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3977     }
3978   else
3979     add_jump(compiler, &common->utfpeakcharback, JUMP(SLJIT_FAST_CALL));
3980   JUMPHERE(jump);
3981   }
3982 #elif PCRE2_CODE_UNIT_WIDTH == 16
3983 if (common->utf)
3984   {
3985   if (max < 0xd800) return;
3986 
3987   if (common->invalid_utf)
3988     {
3989     jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3990     add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3991     if (backtracks != NULL)
3992       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3993     }
3994   else
3995     {
3996     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3997     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xdc00);
3998     /* TMP2 contains the low surrogate. */
3999     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4000     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
4001     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4002     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
4003     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4004     }
4005     JUMPHERE(jump);
4006   }
4007 #elif PCRE2_CODE_UNIT_WIDTH == 32
4008 if (common->invalid_utf)
4009   {
4010   OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4011   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4012   add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
4013   }
4014 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4015 #endif /* SUPPORT_UNICODE */
4016 }
4017 
4018 #define READ_CHAR_UPDATE_STR_PTR 0x1
4019 #define READ_CHAR_UTF8_NEWLINE 0x2
4020 #define READ_CHAR_NEWLINE (READ_CHAR_UPDATE_STR_PTR | READ_CHAR_UTF8_NEWLINE)
4021 #define READ_CHAR_VALID_UTF 0x4
4022 
read_char(compiler_common * common,sljit_u32 min,sljit_u32 max,jump_list ** backtracks,sljit_u32 options)4023 static void read_char(compiler_common *common, sljit_u32 min, sljit_u32 max,
4024   jump_list **backtracks, sljit_u32 options)
4025 {
4026 /* Reads the precise value of a character into TMP1, if the character is
4027 between min and max (c >= min && c <= max). Otherwise it returns with a value
4028 outside the range. Does not check STR_END. */
4029 DEFINE_COMPILER;
4030 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4031 struct sljit_jump *jump;
4032 #endif
4033 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4034 struct sljit_jump *jump2;
4035 #endif
4036 
4037 SLJIT_UNUSED_ARG(min);
4038 SLJIT_UNUSED_ARG(max);
4039 SLJIT_UNUSED_ARG(backtracks);
4040 SLJIT_UNUSED_ARG(options);
4041 SLJIT_ASSERT(min <= max);
4042 
4043 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4044 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4045 
4046 #ifdef SUPPORT_UNICODE
4047 #if PCRE2_CODE_UNIT_WIDTH == 8
4048 if (common->utf)
4049   {
4050   if (max < 128 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
4051 
4052   if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
4053     {
4054     jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4055 
4056     if (options & READ_CHAR_UTF8_NEWLINE)
4057       add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
4058     else
4059       add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4060 
4061     if (backtracks != NULL)
4062       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4063     JUMPHERE(jump);
4064     return;
4065     }
4066 
4067   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4068   if (min >= 0x10000)
4069     {
4070     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
4071     if (options & READ_CHAR_UPDATE_STR_PTR)
4072       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4073     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4074     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
4075     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4076     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4077     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4078     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4079     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4080     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4081     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4082     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4083     if (!(options & READ_CHAR_UPDATE_STR_PTR))
4084       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4085     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4086     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4087     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4088     JUMPHERE(jump2);
4089     if (options & READ_CHAR_UPDATE_STR_PTR)
4090       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
4091     }
4092   else if (min >= 0x800 && max <= 0xffff)
4093     {
4094     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4095     if (options & READ_CHAR_UPDATE_STR_PTR)
4096       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4097     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4098     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
4099     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4100     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4101     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4102     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4103     if (!(options & READ_CHAR_UPDATE_STR_PTR))
4104       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4105     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4106     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4107     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4108     JUMPHERE(jump2);
4109     if (options & READ_CHAR_UPDATE_STR_PTR)
4110       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
4111     }
4112   else if (max >= 0x800)
4113     {
4114     add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
4115     }
4116   else if (max < 128)
4117     {
4118     OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4119     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4120     }
4121   else
4122     {
4123     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4124     if (!(options & READ_CHAR_UPDATE_STR_PTR))
4125       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4126     else
4127       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4128     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4129     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4130     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4131     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4132     if (options & READ_CHAR_UPDATE_STR_PTR)
4133       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
4134     }
4135   JUMPHERE(jump);
4136   }
4137 #elif PCRE2_CODE_UNIT_WIDTH == 16
4138 if (common->utf)
4139   {
4140   if (max < 0xd800 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
4141 
4142   if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
4143     {
4144     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4145     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4146 
4147     if (options & READ_CHAR_UTF8_NEWLINE)
4148       add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
4149     else
4150       add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4151 
4152     if (backtracks != NULL)
4153       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4154     JUMPHERE(jump);
4155     return;
4156     }
4157 
4158   if (max >= 0x10000)
4159     {
4160     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4161     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
4162     /* TMP2 contains the high surrogate. */
4163     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4164     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
4165     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4166     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
4167     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4168     JUMPHERE(jump);
4169     return;
4170     }
4171 
4172   /* Skip low surrogate if necessary. */
4173   OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4174 
4175   if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4176     {
4177     if (options & READ_CHAR_UPDATE_STR_PTR)
4178       OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4179     OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
4180     if (options & READ_CHAR_UPDATE_STR_PTR)
4181       SELECT(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0, STR_PTR);
4182     if (max >= 0xd800)
4183       SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, 0x10000, TMP1);
4184     }
4185   else
4186     {
4187     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4188     if (options & READ_CHAR_UPDATE_STR_PTR)
4189       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4190     if (max >= 0xd800)
4191       OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
4192     JUMPHERE(jump);
4193     }
4194   }
4195 #elif PCRE2_CODE_UNIT_WIDTH == 32
4196 if (common->invalid_utf)
4197   {
4198   if (backtracks != NULL)
4199     {
4200     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4201     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4202     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
4203     }
4204   else
4205     {
4206     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4207     OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000);
4208     SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4209     OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4210     SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4211     }
4212   }
4213 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4214 #endif /* SUPPORT_UNICODE */
4215 }
4216 
skip_valid_char(compiler_common * common)4217 static void skip_valid_char(compiler_common *common)
4218 {
4219 DEFINE_COMPILER;
4220 #if (defined SUPPORT_UNICODE) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
4221 struct sljit_jump *jump;
4222 #endif
4223 
4224 #if (defined SUPPORT_UNICODE) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
4225   if (common->utf)
4226     {
4227     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4228     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4229 #if PCRE2_CODE_UNIT_WIDTH == 8
4230     jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4231     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4232     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4233 #elif PCRE2_CODE_UNIT_WIDTH == 16
4234     jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4235     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4236     OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xd800);
4237     OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4238     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4239     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4240 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
4241     JUMPHERE(jump);
4242     return;
4243     }
4244 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
4245   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4246 }
4247 
4248 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4249 
is_char7_bitset(const sljit_u8 * bitset,BOOL nclass)4250 static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
4251 {
4252 /* Tells whether the character codes below 128 are enough
4253 to determine a match. */
4254 const sljit_u8 value = nclass ? 0xff : 0;
4255 const sljit_u8 *end = bitset + 32;
4256 
4257 bitset += 16;
4258 do
4259   {
4260   if (*bitset++ != value)
4261     return FALSE;
4262   }
4263 while (bitset < end);
4264 return TRUE;
4265 }
4266 
read_char7_type(compiler_common * common,jump_list ** backtracks,BOOL negated)4267 static void read_char7_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4268 {
4269 /* Reads the precise character type of a character into TMP1, if the character
4270 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
4271 full_read argument tells whether characters above max are accepted or not. */
4272 DEFINE_COMPILER;
4273 struct sljit_jump *jump;
4274 
4275 SLJIT_ASSERT(common->utf);
4276 
4277 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4278 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4279 
4280 /* All values > 127 are zero in ctypes. */
4281 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4282 
4283 if (negated)
4284   {
4285   jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4286 
4287   if (common->invalid_utf)
4288     {
4289     OP1(SLJIT_MOV, TMP1, 0, TMP2, 0);
4290     add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4291     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4292     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4293     }
4294   else
4295     {
4296     OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4297     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4298     }
4299   JUMPHERE(jump);
4300   }
4301 }
4302 
4303 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4304 
read_char8_type(compiler_common * common,jump_list ** backtracks,BOOL negated)4305 static void read_char8_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4306 {
4307 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
4308 DEFINE_COMPILER;
4309 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
4310 struct sljit_jump *jump;
4311 #endif
4312 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4313 struct sljit_jump *jump2;
4314 #endif
4315 
4316 SLJIT_UNUSED_ARG(backtracks);
4317 SLJIT_UNUSED_ARG(negated);
4318 
4319 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4320 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4321 
4322 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4323 if (common->utf)
4324   {
4325   /* The result of this read may be unused, but saves an "else" part. */
4326   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4327   jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4328 
4329   if (!negated)
4330     {
4331     if (common->invalid_utf)
4332       add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4333 
4334     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4335     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4336     OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4337     if (common->invalid_utf)
4338       add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe0 - 0xc2));
4339 
4340     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4341     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4342     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4343     if (common->invalid_utf)
4344       add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40));
4345 
4346     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4347     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4348     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4349     JUMPHERE(jump2);
4350     }
4351   else if (common->invalid_utf)
4352     {
4353     add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4354     OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
4355     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4356 
4357     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4358     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4359     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4360     JUMPHERE(jump2);
4361     }
4362   else
4363     add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
4364 
4365   JUMPHERE(jump);
4366   return;
4367   }
4368 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4369 
4370 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32
4371 if (common->invalid_utf && negated)
4372   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x110000));
4373 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32 */
4374 
4375 #if PCRE2_CODE_UNIT_WIDTH != 8
4376 /* The ctypes array contains only 256 values. */
4377 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4378 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4379 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4380 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4381 #if PCRE2_CODE_UNIT_WIDTH != 8
4382 JUMPHERE(jump);
4383 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4384 
4385 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
4386 if (common->utf && negated)
4387   {
4388   /* Skip low surrogate if necessary. */
4389   if (!common->invalid_utf)
4390     {
4391     OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4392 
4393     if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4394       {
4395       OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4396       OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
4397       SELECT(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0, STR_PTR);
4398       }
4399     else
4400       {
4401       jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4402       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4403       JUMPHERE(jump);
4404       }
4405     return;
4406     }
4407 
4408   OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4409   jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4410   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4411   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4412 
4413   OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4414   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4415   OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
4416   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4417 
4418   JUMPHERE(jump);
4419   return;
4420   }
4421 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 */
4422 }
4423 
move_back(compiler_common * common,jump_list ** backtracks,BOOL must_be_valid)4424 static void move_back(compiler_common *common, jump_list **backtracks, BOOL must_be_valid)
4425 {
4426 /* Goes one character back. Affects STR_PTR and TMP1. If must_be_valid is TRUE,
4427 TMP2 is not used. Otherwise TMP2 must contain the start of the subject buffer,
4428 and it is destroyed. Does not modify STR_PTR for invalid character sequences. */
4429 DEFINE_COMPILER;
4430 
4431 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4432 struct sljit_jump *jump;
4433 #endif
4434 
4435 #ifdef SUPPORT_UNICODE
4436 #if PCRE2_CODE_UNIT_WIDTH == 8
4437 struct sljit_label *label;
4438 
4439 if (common->utf)
4440   {
4441   if (!must_be_valid && common->invalid_utf)
4442     {
4443     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4444     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4445     jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4446     add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4447     if (backtracks != NULL)
4448       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4449     JUMPHERE(jump);
4450     return;
4451     }
4452 
4453   label = LABEL();
4454   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4455   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4456   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4457   CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
4458   return;
4459   }
4460 #elif PCRE2_CODE_UNIT_WIDTH == 16
4461 if (common->utf)
4462   {
4463   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4464   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4465 
4466   if (!must_be_valid && common->invalid_utf)
4467     {
4468     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4469     jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0xd800);
4470     add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4471     if (backtracks != NULL)
4472       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4473     JUMPHERE(jump);
4474     return;
4475     }
4476 
4477   /* Skip low surrogate if necessary. */
4478   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4479   OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xdc00);
4480   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4481   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4482   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4483   return;
4484   }
4485 #elif PCRE2_CODE_UNIT_WIDTH == 32
4486 if (common->invalid_utf && !must_be_valid)
4487   {
4488   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4489   if (backtracks != NULL)
4490     {
4491     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4492     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4493     return;
4494     }
4495 
4496   OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x110000);
4497   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
4498   OP2(SLJIT_SHL,  TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4499   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4500   return;
4501   }
4502 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4503 #endif /* SUPPORT_UNICODE */
4504 
4505 SLJIT_UNUSED_ARG(backtracks);
4506 SLJIT_UNUSED_ARG(must_be_valid);
4507 
4508 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4509 }
4510 
check_newlinechar(compiler_common * common,int nltype,jump_list ** backtracks,BOOL jumpifmatch)4511 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
4512 {
4513 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
4514 DEFINE_COMPILER;
4515 struct sljit_jump *jump;
4516 
4517 if (nltype == NLTYPE_ANY)
4518   {
4519   add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4520   sljit_set_current_flags(compiler, SLJIT_SET_Z);
4521   add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
4522   }
4523 else if (nltype == NLTYPE_ANYCRLF)
4524   {
4525   if (jumpifmatch)
4526     {
4527     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
4528     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4529     }
4530   else
4531     {
4532     jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4533     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4534     JUMPHERE(jump);
4535     }
4536   }
4537 else
4538   {
4539   SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
4540   add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4541   }
4542 }
4543 
4544 #ifdef SUPPORT_UNICODE
4545 
4546 #if PCRE2_CODE_UNIT_WIDTH == 8
do_utfreadchar(compiler_common * common)4547 static void do_utfreadchar(compiler_common *common)
4548 {
4549 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
4550 of the character (>= 0xc0). Return char value in TMP1. */
4551 DEFINE_COMPILER;
4552 struct sljit_jump *jump;
4553 
4554 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4555 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4556 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4557 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4558 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4559 
4560 /* Searching for the first zero. */
4561 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4562 jump = JUMP(SLJIT_NOT_ZERO);
4563 /* Two byte sequence. */
4564 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3000);
4565 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4566 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4567 
4568 JUMPHERE(jump);
4569 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4570 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4571 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4572 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4573 
4574 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000);
4575 jump = JUMP(SLJIT_NOT_ZERO);
4576 /* Three byte sequence. */
4577 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0000);
4578 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4579 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4580 
4581 /* Four byte sequence. */
4582 JUMPHERE(jump);
4583 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4584 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0000);
4585 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4586 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4587 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4588 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4589 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4590 }
4591 
do_utfreadtype8(compiler_common * common)4592 static void do_utfreadtype8(compiler_common *common)
4593 {
4594 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
4595 of the character (>= 0xc0). Return value in TMP1. */
4596 DEFINE_COMPILER;
4597 struct sljit_jump *jump;
4598 struct sljit_jump *compare;
4599 
4600 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4601 
4602 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0x20);
4603 jump = JUMP(SLJIT_NOT_ZERO);
4604 /* Two byte sequence. */
4605 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4606 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4607 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
4608 /* The upper 5 bits are known at this point. */
4609 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
4610 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4611 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4612 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
4613 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4614 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4615 
4616 JUMPHERE(compare);
4617 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4618 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4619 
4620 /* We only have types for characters less than 256. */
4621 JUMPHERE(jump);
4622 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4623 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4624 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4625 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4626 }
4627 
do_utfreadchar_invalid(compiler_common * common)4628 static void do_utfreadchar_invalid(compiler_common *common)
4629 {
4630 /* Slow decoding a UTF-8 character. TMP1 contains the first byte
4631 of the character (>= 0xc0). Return char value in TMP1. STR_PTR is
4632 undefined for invalid characters. */
4633 DEFINE_COMPILER;
4634 sljit_s32 i;
4635 sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4636 struct sljit_jump *jump;
4637 struct sljit_jump *buffer_end_close;
4638 struct sljit_label *three_byte_entry;
4639 struct sljit_label *exit_invalid_label;
4640 struct sljit_jump *exit_invalid[11];
4641 
4642 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4643 
4644 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc2);
4645 
4646 /* Usually more than 3 characters remained in the subject buffer. */
4647 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4648 
4649 /* Not a valid start of a multi-byte sequence, no more bytes read. */
4650 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xf5 - 0xc2);
4651 
4652 buffer_end_close = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4653 
4654 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4655 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4656 /* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4657 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4658 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4659 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4660 
4661 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4662 jump = JUMP(SLJIT_NOT_ZERO);
4663 
4664 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4665 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4666 
4667 JUMPHERE(jump);
4668 
4669 /* Three-byte sequence. */
4670 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4671 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4672 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4673 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4674 if (has_cmov)
4675   {
4676   OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4677   SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0x20000, TMP1);
4678   exit_invalid[2] = NULL;
4679   }
4680 else
4681   exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4682 
4683 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000);
4684 jump = JUMP(SLJIT_NOT_ZERO);
4685 
4686 three_byte_entry = LABEL();
4687 
4688 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2d800);
4689 if (has_cmov)
4690   {
4691   OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4692   SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0xd800, TMP1);
4693   exit_invalid[3] = NULL;
4694   }
4695 else
4696   exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4697 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4698 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4699 
4700 if (has_cmov)
4701   {
4702   OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4703   SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4704   exit_invalid[4] = NULL;
4705   }
4706 else
4707   exit_invalid[4] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4708 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4709 
4710 JUMPHERE(jump);
4711 
4712 /* Four-byte sequence. */
4713 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4714 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4715 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4716 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4717 if (has_cmov)
4718   {
4719   OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4720   SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0, TMP1);
4721   exit_invalid[5] = NULL;
4722   }
4723 else
4724   exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4725 
4726 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc10000);
4727 if (has_cmov)
4728   {
4729   OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4730   SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000, TMP1);
4731   exit_invalid[6] = NULL;
4732   }
4733 else
4734   exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4735 
4736 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4737 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4738 
4739 JUMPHERE(buffer_end_close);
4740 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4741 exit_invalid[7] = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4742 
4743 /* Two-byte sequence. */
4744 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4745 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4746 /* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4747 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4748 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4749 exit_invalid[8] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4750 
4751 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4752 jump = JUMP(SLJIT_NOT_ZERO);
4753 
4754 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4755 
4756 /* Three-byte sequence. */
4757 JUMPHERE(jump);
4758 exit_invalid[9] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4759 
4760 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4761 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4762 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4763 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4764 if (has_cmov)
4765   {
4766   OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4767   SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4768   exit_invalid[10] = NULL;
4769   }
4770 else
4771   exit_invalid[10] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4772 
4773 /* One will be substracted from STR_PTR later. */
4774 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4775 
4776 /* Four byte sequences are not possible. */
4777 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x30000, three_byte_entry);
4778 
4779 exit_invalid_label = LABEL();
4780 for (i = 0; i < 11; i++)
4781   sljit_set_label(exit_invalid[i], exit_invalid_label);
4782 
4783 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4784 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4785 }
4786 
do_utfreadnewline_invalid(compiler_common * common)4787 static void do_utfreadnewline_invalid(compiler_common *common)
4788 {
4789 /* Slow decoding a UTF-8 character, specialized for newlines.
4790 TMP1 contains the first byte of the character (>= 0xc0). Return
4791 char value in TMP1. */
4792 DEFINE_COMPILER;
4793 struct sljit_label *loop;
4794 struct sljit_label *skip_start;
4795 struct sljit_label *three_byte_exit;
4796 struct sljit_jump *jump[5];
4797 
4798 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4799 
4800 if (common->nltype != NLTYPE_ANY)
4801   {
4802   SLJIT_ASSERT(common->nltype != NLTYPE_FIXED || common->newline < 128);
4803 
4804   /* All newlines are ascii, just skip intermediate octets. */
4805   jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4806   loop = LABEL();
4807   if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)) == SLJIT_SUCCESS)
4808     sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4809   else
4810     {
4811     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4812     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4813     }
4814 
4815   OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4816   CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4817   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4818 
4819   JUMPHERE(jump[0]);
4820 
4821   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4822   OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4823   return;
4824   }
4825 
4826 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4827 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4828 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4829 
4830 jump[1] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xc2);
4831 jump[2] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xe2);
4832 
4833 skip_start = LABEL();
4834 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4835 jump[3] = CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80);
4836 
4837 /* Skip intermediate octets. */
4838 loop = LABEL();
4839 jump[4] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4840 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4841 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4842 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4843 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4844 
4845 JUMPHERE(jump[3]);
4846 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4847 
4848 three_byte_exit = LABEL();
4849 JUMPHERE(jump[0]);
4850 JUMPHERE(jump[4]);
4851 
4852 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4853 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4854 
4855 /* Two byte long newline: 0x85. */
4856 JUMPHERE(jump[1]);
4857 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x85, skip_start);
4858 
4859 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x85);
4860 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4861 
4862 /* Three byte long newlines: 0x2028 and 0x2029. */
4863 JUMPHERE(jump[2]);
4864 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, skip_start);
4865 CMPTO(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0, three_byte_exit);
4866 
4867 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4868 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4869 
4870 OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 0x80);
4871 CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40, skip_start);
4872 
4873 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0x2000);
4874 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4875 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4876 }
4877 
do_utfmoveback_invalid(compiler_common * common)4878 static void do_utfmoveback_invalid(compiler_common *common)
4879 {
4880 /* Goes one character back. */
4881 DEFINE_COMPILER;
4882 sljit_s32 i;
4883 struct sljit_jump *jump;
4884 struct sljit_jump *buffer_start_close;
4885 struct sljit_label *exit_ok_label;
4886 struct sljit_label *exit_invalid_label;
4887 struct sljit_jump *exit_invalid[7];
4888 
4889 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4890 
4891 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4892 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
4893 
4894 /* Two-byte sequence. */
4895 buffer_start_close = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4896 
4897 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4898 
4899 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4900 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x20);
4901 
4902 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4903 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4904 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4905 
4906 /* Three-byte sequence. */
4907 JUMPHERE(jump);
4908 exit_invalid[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4909 
4910 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4911 
4912 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4913 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x10);
4914 
4915 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4916 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4917 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4918 
4919 /* Four-byte sequence. */
4920 JUMPHERE(jump);
4921 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4922 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40);
4923 
4924 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4925 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0);
4926 exit_invalid[3] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x05);
4927 
4928 exit_ok_label = LABEL();
4929 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4930 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4931 
4932 /* Two-byte sequence. */
4933 JUMPHERE(buffer_start_close);
4934 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4935 
4936 exit_invalid[4] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4937 
4938 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4939 
4940 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4941 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20, exit_ok_label);
4942 
4943 /* Three-byte sequence. */
4944 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4945 exit_invalid[5] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4946 exit_invalid[6] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4947 
4948 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4949 
4950 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4951 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10, exit_ok_label);
4952 
4953 /* Four-byte sequences are not possible. */
4954 
4955 exit_invalid_label = LABEL();
4956 sljit_set_label(exit_invalid[5], exit_invalid_label);
4957 sljit_set_label(exit_invalid[6], exit_invalid_label);
4958 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4959 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4960 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4961 
4962 JUMPHERE(exit_invalid[4]);
4963 /* -2 + 4 = 2 */
4964 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4965 
4966 exit_invalid_label = LABEL();
4967 for (i = 0; i < 4; i++)
4968   sljit_set_label(exit_invalid[i], exit_invalid_label);
4969 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4970 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(4));
4971 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4972 }
4973 
do_utfpeakcharback(compiler_common * common)4974 static void do_utfpeakcharback(compiler_common *common)
4975 {
4976 /* Peak a character back. Does not modify STR_PTR. */
4977 DEFINE_COMPILER;
4978 struct sljit_jump *jump[2];
4979 
4980 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4981 
4982 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4983 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4984 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20);
4985 
4986 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4987 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4988 jump[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10);
4989 
4990 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
4991 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4992 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
4993 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4994 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4995 
4996 JUMPHERE(jump[1]);
4997 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4998 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4999 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
5000 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5001 
5002 JUMPHERE(jump[0]);
5003 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5004 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
5005 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
5006 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5007 
5008 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5009 }
5010 
do_utfpeakcharback_invalid(compiler_common * common)5011 static void do_utfpeakcharback_invalid(compiler_common *common)
5012 {
5013 /* Peak a character back. Does not modify STR_PTR. */
5014 DEFINE_COMPILER;
5015 sljit_s32 i;
5016 sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
5017 struct sljit_jump *jump[2];
5018 struct sljit_label *two_byte_entry;
5019 struct sljit_label *three_byte_entry;
5020 struct sljit_label *exit_invalid_label;
5021 struct sljit_jump *exit_invalid[8];
5022 
5023 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5024 
5025 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
5026 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
5027 jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5028 
5029 /* Two-byte sequence. */
5030 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5031 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
5032 jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x1e);
5033 
5034 two_byte_entry = LABEL();
5035 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
5036 /* If TMP1 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
5037 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5038 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5039 
5040 JUMPHERE(jump[1]);
5041 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
5042 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5043 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
5044 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
5045 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5046 
5047 /* Three-byte sequence. */
5048 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
5049 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
5050 jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x10);
5051 
5052 three_byte_entry = LABEL();
5053 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
5054 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5055 
5056 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5057 if (has_cmov)
5058   {
5059   OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5060   SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, -0xd800, TMP1);
5061   exit_invalid[2] = NULL;
5062   }
5063 else
5064   exit_invalid[2] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5065 
5066 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5067 if (has_cmov)
5068   {
5069   OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5070   SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
5071   exit_invalid[3] = NULL;
5072   }
5073 else
5074   exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5075 
5076 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5077 
5078 JUMPHERE(jump[1]);
5079 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0 - 0x80);
5080 exit_invalid[4] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
5081 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
5082 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5083 
5084 /* Four-byte sequence. */
5085 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
5086 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
5087 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
5088 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18);
5089 /* ADD is used instead of OR because of the SUB 0x10000 above. */
5090 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5091 
5092 if (has_cmov)
5093   {
5094   OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
5095   SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000, TMP1);
5096   exit_invalid[5] = NULL;
5097   }
5098 else
5099   exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
5100 
5101 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
5102 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5103 
5104 JUMPHERE(jump[0]);
5105 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5106 jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5107 
5108 /* Two-byte sequence. */
5109 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5110 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
5111 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
5112 
5113 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
5114 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5115 exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
5116 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
5117 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5118 
5119 /* Three-byte sequence. */
5120 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
5121 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
5122 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x10, three_byte_entry);
5123 
5124 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5125 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5126 
5127 JUMPHERE(jump[0]);
5128 exit_invalid[7] = CMP(SLJIT_GREATER, TMP2, 0, STR_PTR, 0);
5129 
5130 /* Two-byte sequence. */
5131 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5132 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
5133 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
5134 
5135 exit_invalid_label = LABEL();
5136 for (i = 0; i < 8; i++)
5137   sljit_set_label(exit_invalid[i], exit_invalid_label);
5138 
5139 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5140 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5141 }
5142 
5143 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
5144 
5145 #if PCRE2_CODE_UNIT_WIDTH == 16
5146 
do_utfreadchar_invalid(compiler_common * common)5147 static void do_utfreadchar_invalid(compiler_common *common)
5148 {
5149 /* Slow decoding a UTF-16 character. TMP1 contains the first half
5150 of the character (>= 0xd800). Return char value in TMP1. STR_PTR is
5151 undefined for invalid characters. */
5152 DEFINE_COMPILER;
5153 struct sljit_jump *exit_invalid[3];
5154 
5155 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5156 
5157 /* TMP2 contains the high surrogate. */
5158 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
5159 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5160 
5161 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5162 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5163 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5164 
5165 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5166 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
5167 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
5168 
5169 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5170 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5171 
5172 JUMPHERE(exit_invalid[0]);
5173 JUMPHERE(exit_invalid[1]);
5174 JUMPHERE(exit_invalid[2]);
5175 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5176 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5177 }
5178 
do_utfreadnewline_invalid(compiler_common * common)5179 static void do_utfreadnewline_invalid(compiler_common *common)
5180 {
5181 /* Slow decoding a UTF-16 character, specialized for newlines.
5182 TMP1 contains the first half of the character (>= 0xd800). Return
5183 char value in TMP1. */
5184 
5185 DEFINE_COMPILER;
5186 struct sljit_jump *exit_invalid[2];
5187 
5188 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5189 
5190 /* TMP2 contains the high surrogate. */
5191 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5192 
5193 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5194 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
5195 
5196 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
5197 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
5198 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
5199 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
5200 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
5201 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5202 
5203 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5204 
5205 JUMPHERE(exit_invalid[0]);
5206 JUMPHERE(exit_invalid[1]);
5207 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5208 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5209 }
5210 
do_utfmoveback_invalid(compiler_common * common)5211 static void do_utfmoveback_invalid(compiler_common *common)
5212 {
5213 /* Goes one character back. */
5214 DEFINE_COMPILER;
5215 struct sljit_jump *exit_invalid[3];
5216 
5217 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5218 
5219 exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5220 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5221 
5222 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5223 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5224 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
5225 
5226 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5227 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
5228 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5229 
5230 JUMPHERE(exit_invalid[0]);
5231 JUMPHERE(exit_invalid[1]);
5232 JUMPHERE(exit_invalid[2]);
5233 
5234 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5235 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
5236 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5237 }
5238 
do_utfpeakcharback_invalid(compiler_common * common)5239 static void do_utfpeakcharback_invalid(compiler_common *common)
5240 {
5241 /* Peak a character back. Does not modify STR_PTR. */
5242 DEFINE_COMPILER;
5243 struct sljit_jump *jump;
5244 struct sljit_jump *exit_invalid[3];
5245 
5246 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5247 
5248 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000);
5249 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5250 exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
5251 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5252 
5253 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5254 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
5255 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
5256 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
5257 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5258 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5259 
5260 JUMPHERE(jump);
5261 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5262 
5263 JUMPHERE(exit_invalid[0]);
5264 JUMPHERE(exit_invalid[1]);
5265 JUMPHERE(exit_invalid[2]);
5266 
5267 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5268 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5269 }
5270 
5271 #endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
5272 
5273 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
5274 #define UCD_BLOCK_MASK 127
5275 #define UCD_BLOCK_SHIFT 7
5276 
do_getucd(compiler_common * common)5277 static void do_getucd(compiler_common *common)
5278 {
5279 /* Search the UCD record for the character comes in TMP1.
5280 Returns chartype in TMP1 and UCD offset in TMP2. */
5281 DEFINE_COMPILER;
5282 #if PCRE2_CODE_UNIT_WIDTH == 32
5283 struct sljit_jump *jump;
5284 #endif
5285 
5286 #if defined SLJIT_DEBUG && SLJIT_DEBUG
5287 /* dummy_ucd_record */
5288 const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5289 SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5290 SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5291 #endif
5292 
5293 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5294 
5295 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5296 
5297 #if PCRE2_CODE_UNIT_WIDTH == 32
5298 if (!common->utf)
5299   {
5300   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5301   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5302   JUMPHERE(jump);
5303   }
5304 #endif
5305 
5306 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5307 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5308 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5309 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5310 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5311 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5312 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5313 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5314 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5315 }
5316 
do_getucdtype(compiler_common * common)5317 static void do_getucdtype(compiler_common *common)
5318 {
5319 /* Search the UCD record for the character comes in TMP1.
5320 Returns chartype in TMP1 and UCD offset in TMP2. */
5321 DEFINE_COMPILER;
5322 #if PCRE2_CODE_UNIT_WIDTH == 32
5323 struct sljit_jump *jump;
5324 #endif
5325 
5326 #if defined SLJIT_DEBUG && SLJIT_DEBUG
5327 /* dummy_ucd_record */
5328 const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5329 SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5330 SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5331 #endif
5332 
5333 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5334 
5335 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5336 
5337 #if PCRE2_CODE_UNIT_WIDTH == 32
5338 if (!common->utf)
5339   {
5340   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5341   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5342   JUMPHERE(jump);
5343   }
5344 #endif
5345 
5346 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5347 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5348 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5349 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5350 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5351 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5352 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5353 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5354 
5355 /* TMP2 is multiplied by 12. Same as (TMP2 << 2) + ((TMP2 << 2) << 1). */
5356 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5357 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
5358 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5359 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 1);
5360 
5361 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5362 }
5363 
5364 #endif /* SUPPORT_UNICODE */
5365 
mainloop_entry(compiler_common * common)5366 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common)
5367 {
5368 DEFINE_COMPILER;
5369 struct sljit_label *mainloop;
5370 struct sljit_label *newlinelabel = NULL;
5371 struct sljit_jump *start;
5372 struct sljit_jump *end = NULL;
5373 struct sljit_jump *end2 = NULL;
5374 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5375 struct sljit_label *loop;
5376 struct sljit_jump *jump;
5377 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5378 jump_list *newline = NULL;
5379 sljit_u32 overall_options = common->re->overall_options;
5380 BOOL hascrorlf = (common->re->flags & PCRE2_HASCRORLF) != 0;
5381 BOOL newlinecheck = FALSE;
5382 BOOL readuchar = FALSE;
5383 
5384 if (!(hascrorlf || (overall_options & PCRE2_FIRSTLINE) != 0)
5385     && (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
5386   newlinecheck = TRUE;
5387 
5388 SLJIT_ASSERT(common->abort_label == NULL);
5389 
5390 if ((overall_options & PCRE2_FIRSTLINE) != 0)
5391   {
5392   /* Search for the end of the first line. */
5393   SLJIT_ASSERT(common->match_end_ptr != 0);
5394   OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5395 
5396   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5397     {
5398     mainloop = LABEL();
5399     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5400     end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5401     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5402     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5403     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
5404     CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
5405     JUMPHERE(end);
5406     OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5407     }
5408   else
5409     {
5410     end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5411     mainloop = LABEL();
5412     /* Continual stores does not cause data dependency. */
5413     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5414     read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
5415     check_newlinechar(common, common->nltype, &newline, TRUE);
5416     CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
5417     JUMPHERE(end);
5418     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5419     set_jumps(newline, LABEL());
5420     }
5421 
5422   OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5423   }
5424 else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0)
5425   {
5426   /* Check whether offset limit is set and valid. */
5427   SLJIT_ASSERT(common->match_end_ptr != 0);
5428 
5429   if (HAS_VIRTUAL_REGISTERS)
5430     {
5431     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5432     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5433     }
5434   else
5435     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5436 
5437   OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5438   end = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET);
5439   if (HAS_VIRTUAL_REGISTERS)
5440     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5441   else
5442     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
5443 
5444 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5445   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5446 #endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5447   if (HAS_VIRTUAL_REGISTERS)
5448     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5449 
5450   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
5451   end2 = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5452   OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5453   JUMPHERE(end2);
5454   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
5455   add_jump(compiler, &common->abort, CMP(SLJIT_LESS, TMP2, 0, STR_PTR, 0));
5456   JUMPHERE(end);
5457   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, TMP2, 0);
5458   }
5459 
5460 start = JUMP(SLJIT_JUMP);
5461 
5462 if (newlinecheck)
5463   {
5464   newlinelabel = LABEL();
5465   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5466   end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5467   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5468   OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
5469   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
5470 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5471   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5472 #endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5473   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5474   end2 = JUMP(SLJIT_JUMP);
5475   }
5476 
5477 mainloop = LABEL();
5478 
5479 /* Increasing the STR_PTR here requires one less jump in the most common case. */
5480 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5481 if (common->utf && !common->invalid_utf) readuchar = TRUE;
5482 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5483 if (newlinecheck) readuchar = TRUE;
5484 
5485 if (readuchar)
5486   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5487 
5488 if (newlinecheck)
5489   CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
5490 
5491 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5492 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5493 #if PCRE2_CODE_UNIT_WIDTH == 8
5494 if (common->invalid_utf)
5495   {
5496   /* Skip continuation code units. */
5497   loop = LABEL();
5498   jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5499   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5500   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5501   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5502   CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x40, loop);
5503   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5504   JUMPHERE(jump);
5505   }
5506 else if (common->utf)
5507   {
5508   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5509   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5510   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5511   JUMPHERE(jump);
5512   }
5513 #elif PCRE2_CODE_UNIT_WIDTH == 16
5514 if (common->invalid_utf)
5515   {
5516   /* Skip continuation code units. */
5517   loop = LABEL();
5518   jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5519   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5520   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5521   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5522   CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400, loop);
5523   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5524   JUMPHERE(jump);
5525   }
5526 else if (common->utf)
5527   {
5528   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5529 
5530   if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
5531     {
5532     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5533     OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5534     SELECT(SLJIT_LESS, STR_PTR, TMP2, 0, STR_PTR);
5535     }
5536   else
5537     {
5538     OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5539     OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
5540     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5541     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5542     }
5543   }
5544 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
5545 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5546 JUMPHERE(start);
5547 
5548 if (newlinecheck)
5549   {
5550   JUMPHERE(end);
5551   JUMPHERE(end2);
5552   }
5553 
5554 return mainloop;
5555 }
5556 
5557 
add_prefix_char(PCRE2_UCHAR chr,fast_forward_char_data * chars,BOOL last)5558 static SLJIT_INLINE void add_prefix_char(PCRE2_UCHAR chr, fast_forward_char_data *chars, BOOL last)
5559 {
5560 sljit_u32 i, count = chars->count;
5561 
5562 if (count == 255)
5563   return;
5564 
5565 if (count == 0)
5566   {
5567   chars->count = 1;
5568   chars->chars[0] = chr;
5569 
5570   if (last)
5571     chars->last_count = 1;
5572   return;
5573   }
5574 
5575 for (i = 0; i < count; i++)
5576   if (chars->chars[i] == chr)
5577     return;
5578 
5579 if (count >= MAX_DIFF_CHARS)
5580   {
5581   chars->count = 255;
5582   return;
5583   }
5584 
5585 chars->chars[count] = chr;
5586 chars->count = count + 1;
5587 
5588 if (last)
5589   chars->last_count++;
5590 }
5591 
scan_prefix(compiler_common * common,PCRE2_SPTR cc,fast_forward_char_data * chars,int max_chars,sljit_u32 * rec_count)5592 static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars, int max_chars, sljit_u32 *rec_count)
5593 {
5594 /* Recursive function, which scans prefix literals. */
5595 BOOL last, any, class, caseless;
5596 int len, repeat, len_save, consumed = 0;
5597 sljit_u32 chr; /* Any unicode character. */
5598 sljit_u8 *bytes, *bytes_end, byte;
5599 PCRE2_SPTR alternative, cc_save, oc;
5600 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5601 PCRE2_UCHAR othercase[4];
5602 #elif defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
5603 PCRE2_UCHAR othercase[2];
5604 #else
5605 PCRE2_UCHAR othercase[1];
5606 #endif
5607 
5608 repeat = 1;
5609 while (TRUE)
5610   {
5611   if (*rec_count == 0)
5612     return 0;
5613   (*rec_count)--;
5614 
5615   last = TRUE;
5616   any = FALSE;
5617   class = FALSE;
5618   caseless = FALSE;
5619 
5620   switch (*cc)
5621     {
5622     case OP_CHARI:
5623     caseless = TRUE;
5624     /* Fall through */
5625     case OP_CHAR:
5626     last = FALSE;
5627     cc++;
5628     break;
5629 
5630     case OP_SOD:
5631     case OP_SOM:
5632     case OP_SET_SOM:
5633     case OP_NOT_WORD_BOUNDARY:
5634     case OP_WORD_BOUNDARY:
5635     case OP_EODN:
5636     case OP_EOD:
5637     case OP_CIRC:
5638     case OP_CIRCM:
5639     case OP_DOLL:
5640     case OP_DOLLM:
5641     case OP_NOT_UCP_WORD_BOUNDARY:
5642     case OP_UCP_WORD_BOUNDARY:
5643     /* Zero width assertions. */
5644     cc++;
5645     continue;
5646 
5647     case OP_ASSERT:
5648     case OP_ASSERT_NOT:
5649     case OP_ASSERTBACK:
5650     case OP_ASSERTBACK_NOT:
5651     case OP_ASSERT_NA:
5652     case OP_ASSERTBACK_NA:
5653     cc = bracketend(cc);
5654     continue;
5655 
5656     case OP_PLUSI:
5657     case OP_MINPLUSI:
5658     case OP_POSPLUSI:
5659     caseless = TRUE;
5660     /* Fall through */
5661     case OP_PLUS:
5662     case OP_MINPLUS:
5663     case OP_POSPLUS:
5664     cc++;
5665     break;
5666 
5667     case OP_EXACTI:
5668     caseless = TRUE;
5669     /* Fall through */
5670     case OP_EXACT:
5671     repeat = GET2(cc, 1);
5672     last = FALSE;
5673     cc += 1 + IMM2_SIZE;
5674     break;
5675 
5676     case OP_QUERYI:
5677     case OP_MINQUERYI:
5678     case OP_POSQUERYI:
5679     caseless = TRUE;
5680     /* Fall through */
5681     case OP_QUERY:
5682     case OP_MINQUERY:
5683     case OP_POSQUERY:
5684     len = 1;
5685     cc++;
5686 #ifdef SUPPORT_UNICODE
5687     if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5688 #endif
5689     max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
5690     if (max_chars == 0)
5691       return consumed;
5692     last = FALSE;
5693     break;
5694 
5695     case OP_KET:
5696     cc += 1 + LINK_SIZE;
5697     continue;
5698 
5699     case OP_ALT:
5700     cc += GET(cc, 1);
5701     continue;
5702 
5703     case OP_ONCE:
5704     case OP_BRA:
5705     case OP_BRAPOS:
5706     case OP_CBRA:
5707     case OP_CBRAPOS:
5708     alternative = cc + GET(cc, 1);
5709     while (*alternative == OP_ALT)
5710       {
5711       max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
5712       if (max_chars == 0)
5713         return consumed;
5714       alternative += GET(alternative, 1);
5715       }
5716 
5717     if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
5718       cc += IMM2_SIZE;
5719     cc += 1 + LINK_SIZE;
5720     continue;
5721 
5722     case OP_CLASS:
5723 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5724     if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
5725       return consumed;
5726 #endif
5727     class = TRUE;
5728     break;
5729 
5730     case OP_NCLASS:
5731 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5732     if (common->utf) return consumed;
5733 #endif
5734     class = TRUE;
5735     break;
5736 
5737 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
5738     case OP_XCLASS:
5739 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5740     if (common->utf) return consumed;
5741 #endif
5742     any = TRUE;
5743     cc += GET(cc, 1);
5744     break;
5745 #endif
5746 
5747     case OP_DIGIT:
5748 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5749     if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
5750       return consumed;
5751 #endif
5752     any = TRUE;
5753     cc++;
5754     break;
5755 
5756     case OP_WHITESPACE:
5757 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5758     if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
5759       return consumed;
5760 #endif
5761     any = TRUE;
5762     cc++;
5763     break;
5764 
5765     case OP_WORDCHAR:
5766 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5767     if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
5768       return consumed;
5769 #endif
5770     any = TRUE;
5771     cc++;
5772     break;
5773 
5774     case OP_NOT:
5775     case OP_NOTI:
5776     cc++;
5777     /* Fall through. */
5778     case OP_NOT_DIGIT:
5779     case OP_NOT_WHITESPACE:
5780     case OP_NOT_WORDCHAR:
5781     case OP_ANY:
5782     case OP_ALLANY:
5783 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5784     if (common->utf) return consumed;
5785 #endif
5786     any = TRUE;
5787     cc++;
5788     break;
5789 
5790 #ifdef SUPPORT_UNICODE
5791     case OP_NOTPROP:
5792     case OP_PROP:
5793 #if PCRE2_CODE_UNIT_WIDTH != 32
5794     if (common->utf) return consumed;
5795 #endif
5796     any = TRUE;
5797     cc += 1 + 2;
5798     break;
5799 #endif
5800 
5801     case OP_TYPEEXACT:
5802     repeat = GET2(cc, 1);
5803     cc += 1 + IMM2_SIZE;
5804     continue;
5805 
5806     case OP_NOTEXACT:
5807     case OP_NOTEXACTI:
5808 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5809     if (common->utf) return consumed;
5810 #endif
5811     any = TRUE;
5812     repeat = GET2(cc, 1);
5813     cc += 1 + IMM2_SIZE + 1;
5814     break;
5815 
5816     default:
5817     return consumed;
5818     }
5819 
5820   if (any)
5821     {
5822     do
5823       {
5824       chars->count = 255;
5825 
5826       consumed++;
5827       if (--max_chars == 0)
5828         return consumed;
5829       chars++;
5830       }
5831     while (--repeat > 0);
5832 
5833     repeat = 1;
5834     continue;
5835     }
5836 
5837   if (class)
5838     {
5839     bytes = (sljit_u8*) (cc + 1);
5840     cc += 1 + 32 / sizeof(PCRE2_UCHAR);
5841 
5842     switch (*cc)
5843       {
5844       case OP_CRSTAR:
5845       case OP_CRMINSTAR:
5846       case OP_CRPOSSTAR:
5847       case OP_CRQUERY:
5848       case OP_CRMINQUERY:
5849       case OP_CRPOSQUERY:
5850       max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
5851       if (max_chars == 0)
5852         return consumed;
5853       break;
5854 
5855       default:
5856       case OP_CRPLUS:
5857       case OP_CRMINPLUS:
5858       case OP_CRPOSPLUS:
5859       break;
5860 
5861       case OP_CRRANGE:
5862       case OP_CRMINRANGE:
5863       case OP_CRPOSRANGE:
5864       repeat = GET2(cc, 1);
5865       if (repeat <= 0)
5866         return consumed;
5867       break;
5868       }
5869 
5870     do
5871       {
5872       if (bytes[31] & 0x80)
5873         chars->count = 255;
5874       else if (chars->count != 255)
5875         {
5876         bytes_end = bytes + 32;
5877         chr = 0;
5878         do
5879           {
5880           byte = *bytes++;
5881           SLJIT_ASSERT((chr & 0x7) == 0);
5882           if (byte == 0)
5883             chr += 8;
5884           else
5885             {
5886             do
5887               {
5888               if ((byte & 0x1) != 0)
5889                 add_prefix_char(chr, chars, TRUE);
5890               byte >>= 1;
5891               chr++;
5892               }
5893             while (byte != 0);
5894             chr = (chr + 7) & (sljit_u32)(~7);
5895             }
5896           }
5897         while (chars->count != 255 && bytes < bytes_end);
5898         bytes = bytes_end - 32;
5899         }
5900 
5901       consumed++;
5902       if (--max_chars == 0)
5903         return consumed;
5904       chars++;
5905       }
5906     while (--repeat > 0);
5907 
5908     switch (*cc)
5909       {
5910       case OP_CRSTAR:
5911       case OP_CRMINSTAR:
5912       case OP_CRPOSSTAR:
5913       return consumed;
5914 
5915       case OP_CRQUERY:
5916       case OP_CRMINQUERY:
5917       case OP_CRPOSQUERY:
5918       cc++;
5919       break;
5920 
5921       case OP_CRRANGE:
5922       case OP_CRMINRANGE:
5923       case OP_CRPOSRANGE:
5924       if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
5925         return consumed;
5926       cc += 1 + 2 * IMM2_SIZE;
5927       break;
5928       }
5929 
5930     repeat = 1;
5931     continue;
5932     }
5933 
5934   len = 1;
5935 #ifdef SUPPORT_UNICODE
5936   if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5937 #endif
5938 
5939   if (caseless && char_has_othercase(common, cc))
5940     {
5941 #ifdef SUPPORT_UNICODE
5942     if (common->utf)
5943       {
5944       GETCHAR(chr, cc);
5945       if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
5946         return consumed;
5947       }
5948     else
5949 #endif
5950       {
5951       chr = *cc;
5952 #ifdef SUPPORT_UNICODE
5953       if (common->ucp && chr > 127)
5954         {
5955         chr = UCD_OTHERCASE(chr);
5956         othercase[0] = (chr == (PCRE2_UCHAR)chr) ? chr : *cc;
5957         }
5958       else
5959 #endif
5960         othercase[0] = TABLE_GET(chr, common->fcc, chr);
5961       }
5962     }
5963   else
5964     {
5965     caseless = FALSE;
5966     othercase[0] = 0; /* Stops compiler warning - PH */
5967     }
5968 
5969   len_save = len;
5970   cc_save = cc;
5971   while (TRUE)
5972     {
5973     oc = othercase;
5974     do
5975       {
5976       len--;
5977       consumed++;
5978 
5979       chr = *cc;
5980       add_prefix_char(*cc, chars, len == 0);
5981 
5982       if (caseless)
5983         add_prefix_char(*oc, chars, len == 0);
5984 
5985       if (--max_chars == 0)
5986         return consumed;
5987       chars++;
5988       cc++;
5989       oc++;
5990       }
5991     while (len > 0);
5992 
5993     if (--repeat == 0)
5994       break;
5995 
5996     len = len_save;
5997     cc = cc_save;
5998     }
5999 
6000   repeat = 1;
6001   if (last)
6002     return consumed;
6003   }
6004 }
6005 
6006 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
jumpto_if_not_utf_char_start(struct sljit_compiler * compiler,sljit_s32 reg,struct sljit_label * label)6007 static void jumpto_if_not_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg, struct sljit_label *label)
6008 {
6009 #if PCRE2_CODE_UNIT_WIDTH == 8
6010 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
6011 CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0x80, label);
6012 #elif PCRE2_CODE_UNIT_WIDTH == 16
6013 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
6014 CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00, label);
6015 #else
6016 #error "Unknown code width"
6017 #endif
6018 }
6019 #endif
6020 
6021 #include "pcre2_jit_simd_inc.h"
6022 
6023 #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6024 
check_fast_forward_char_pair_simd(compiler_common * common,fast_forward_char_data * chars,int max)6025 static BOOL check_fast_forward_char_pair_simd(compiler_common *common, fast_forward_char_data *chars, int max)
6026 {
6027   sljit_s32 i, j, max_i = 0, max_j = 0;
6028   sljit_u32 max_pri = 0;
6029   sljit_s32 max_offset = max_fast_forward_char_pair_offset();
6030   PCRE2_UCHAR a1, a2, a_pri, b1, b2, b_pri;
6031 
6032   for (i = max - 1; i >= 1; i--)
6033     {
6034     if (chars[i].last_count > 2)
6035       {
6036       a1 = chars[i].chars[0];
6037       a2 = chars[i].chars[1];
6038       a_pri = chars[i].last_count;
6039 
6040       j = i - max_offset;
6041       if (j < 0)
6042         j = 0;
6043 
6044       while (j < i)
6045         {
6046         b_pri = chars[j].last_count;
6047         if (b_pri > 2 && (sljit_u32)a_pri + (sljit_u32)b_pri >= max_pri)
6048           {
6049           b1 = chars[j].chars[0];
6050           b2 = chars[j].chars[1];
6051 
6052           if (a1 != b1 && a1 != b2 && a2 != b1 && a2 != b2)
6053             {
6054             max_pri = a_pri + b_pri;
6055             max_i = i;
6056             max_j = j;
6057             }
6058           }
6059         j++;
6060         }
6061       }
6062     }
6063 
6064 if (max_pri == 0)
6065   return FALSE;
6066 
6067 fast_forward_char_pair_simd(common, max_i, chars[max_i].chars[0], chars[max_i].chars[1], max_j, chars[max_j].chars[0], chars[max_j].chars[1]);
6068 return TRUE;
6069 }
6070 
6071 #endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
6072 
fast_forward_first_char2(compiler_common * common,PCRE2_UCHAR char1,PCRE2_UCHAR char2,sljit_s32 offset)6073 static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
6074 {
6075 DEFINE_COMPILER;
6076 struct sljit_label *start;
6077 struct sljit_jump *match;
6078 struct sljit_jump *partial_quit;
6079 PCRE2_UCHAR mask;
6080 BOOL has_match_end = (common->match_end_ptr != 0);
6081 
6082 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);
6083 
6084 if (has_match_end)
6085   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6086 
6087 if (offset > 0)
6088   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
6089 
6090 if (has_match_end)
6091   {
6092   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6093 
6094   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
6095   OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
6096   SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);
6097   }
6098 
6099 #ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
6100 
6101 if (JIT_HAS_FAST_FORWARD_CHAR_SIMD)
6102   {
6103   fast_forward_char_simd(common, char1, char2, offset);
6104 
6105   if (offset > 0)
6106     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
6107 
6108   if (has_match_end)
6109     OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6110   return;
6111   }
6112 
6113 #endif
6114 
6115 start = LABEL();
6116 
6117 partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6118 if (common->mode == PCRE2_JIT_COMPLETE)
6119   add_jump(compiler, &common->failed_match, partial_quit);
6120 
6121 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6122 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6123 
6124 if (char1 == char2)
6125   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1, start);
6126 else
6127   {
6128   mask = char1 ^ char2;
6129   if (is_powerof2(mask))
6130     {
6131     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
6132     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask, start);
6133     }
6134   else
6135     {
6136     match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
6137     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char2, start);
6138     JUMPHERE(match);
6139     }
6140   }
6141 
6142 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6143 if (common->utf && offset > 0)
6144   {
6145   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-(offset + 1)));
6146   jumpto_if_not_utf_char_start(compiler, TMP1, start);
6147   }
6148 #endif
6149 
6150 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
6151 
6152 if (common->mode != PCRE2_JIT_COMPLETE)
6153   JUMPHERE(partial_quit);
6154 
6155 if (has_match_end)
6156   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6157 }
6158 
fast_forward_first_n_chars(compiler_common * common)6159 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
6160 {
6161 DEFINE_COMPILER;
6162 struct sljit_label *start;
6163 struct sljit_jump *match;
6164 fast_forward_char_data chars[MAX_N_CHARS];
6165 sljit_s32 offset;
6166 PCRE2_UCHAR mask;
6167 PCRE2_UCHAR *char_set, *char_set_end;
6168 int i, max, from;
6169 int range_right = -1, range_len;
6170 sljit_u8 *update_table = NULL;
6171 BOOL in_range;
6172 sljit_u32 rec_count;
6173 
6174 for (i = 0; i < MAX_N_CHARS; i++)
6175   {
6176   chars[i].count = 0;
6177   chars[i].last_count = 0;
6178   }
6179 
6180 rec_count = 10000;
6181 max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
6182 
6183 if (max < 1)
6184   return FALSE;
6185 
6186 /* Convert last_count to priority. */
6187 for (i = 0; i < max; i++)
6188   {
6189   SLJIT_ASSERT(chars[i].last_count <= chars[i].count);
6190 
6191   switch (chars[i].count)
6192     {
6193     case 0:
6194     chars[i].count = 255;
6195     chars[i].last_count = 0;
6196     break;
6197 
6198     case 1:
6199     chars[i].last_count = (chars[i].last_count == 1) ? 7 : 5;
6200     /* Simplifies algorithms later. */
6201     chars[i].chars[1] = chars[i].chars[0];
6202     break;
6203 
6204     case 2:
6205     SLJIT_ASSERT(chars[i].chars[0] != chars[i].chars[1]);
6206 
6207     if (is_powerof2(chars[i].chars[0] ^ chars[i].chars[1]))
6208       chars[i].last_count = (chars[i].last_count == 2) ? 6 : 4;
6209     else
6210       chars[i].last_count = (chars[i].last_count == 2) ? 3 : 2;
6211     break;
6212 
6213     default:
6214     chars[i].last_count = (chars[i].count == 255) ? 0 : 1;
6215     break;
6216     }
6217   }
6218 
6219 #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6220 if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && check_fast_forward_char_pair_simd(common, chars, max))
6221   return TRUE;
6222 #endif
6223 
6224 in_range = FALSE;
6225 /* Prevent compiler "uninitialized" warning */
6226 from = 0;
6227 range_len = 4 /* minimum length */ - 1;
6228 for (i = 0; i <= max; i++)
6229   {
6230   if (in_range && (i - from) > range_len && (chars[i - 1].count < 255))
6231     {
6232     range_len = i - from;
6233     range_right = i - 1;
6234     }
6235 
6236   if (i < max && chars[i].count < 255)
6237     {
6238     SLJIT_ASSERT(chars[i].count > 0);
6239     if (!in_range)
6240       {
6241       in_range = TRUE;
6242       from = i;
6243       }
6244     }
6245   else
6246     in_range = FALSE;
6247   }
6248 
6249 if (range_right >= 0)
6250   {
6251   update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
6252   if (update_table == NULL)
6253     return TRUE;
6254   memset(update_table, IN_UCHARS(range_len), 256);
6255 
6256   for (i = 0; i < range_len; i++)
6257     {
6258     SLJIT_ASSERT(chars[range_right - i].count > 0 && chars[range_right - i].count < 255);
6259 
6260     char_set = chars[range_right - i].chars;
6261     char_set_end = char_set + chars[range_right - i].count;
6262     do
6263       {
6264       if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
6265         update_table[(*char_set) & 0xff] = IN_UCHARS(i);
6266       char_set++;
6267       }
6268     while (char_set < char_set_end);
6269     }
6270   }
6271 
6272 offset = -1;
6273 /* Scan forward. */
6274 for (i = 0; i < max; i++)
6275   {
6276   if (range_right == i)
6277     continue;
6278 
6279   if (offset == -1)
6280     {
6281     if (chars[i].last_count >= 2)
6282       offset = i;
6283     }
6284   else if (chars[offset].last_count < chars[i].last_count)
6285     offset = i;
6286   }
6287 
6288 SLJIT_ASSERT(offset == -1 || (chars[offset].count >= 1 && chars[offset].count <= 2));
6289 
6290 if (range_right < 0)
6291   {
6292   if (offset < 0)
6293     return FALSE;
6294   /* Works regardless the value is 1 or 2. */
6295   fast_forward_first_char2(common, chars[offset].chars[0], chars[offset].chars[1], offset);
6296   return TRUE;
6297   }
6298 
6299 SLJIT_ASSERT(range_right != offset);
6300 
6301 if (common->match_end_ptr != 0)
6302   {
6303   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6304   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6305   OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6306   add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6307   OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
6308   SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);
6309   }
6310 else
6311   {
6312   OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6313   add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6314   }
6315 
6316 SLJIT_ASSERT(range_right >= 0);
6317 
6318 if (!HAS_VIRTUAL_REGISTERS)
6319   OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
6320 
6321 start = LABEL();
6322 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6323 
6324 #if PCRE2_CODE_UNIT_WIDTH == 8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
6325 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
6326 #else
6327 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
6328 #endif
6329 
6330 if (!HAS_VIRTUAL_REGISTERS)
6331   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
6332 else
6333   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
6334 
6335 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6336 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
6337 
6338 if (offset >= 0)
6339   {
6340   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
6341   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6342 
6343   if (chars[offset].count == 1)
6344     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0], start);
6345   else
6346     {
6347     mask = chars[offset].chars[0] ^ chars[offset].chars[1];
6348     if (is_powerof2(mask))
6349       {
6350       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
6351       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0] | mask, start);
6352       }
6353     else
6354       {
6355       match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0]);
6356       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[1], start);
6357       JUMPHERE(match);
6358       }
6359     }
6360   }
6361 
6362 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6363 if (common->utf && offset != 0)
6364   {
6365   if (offset < 0)
6366     {
6367     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6368     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6369     }
6370   else
6371     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6372 
6373   jumpto_if_not_utf_char_start(compiler, TMP1, start);
6374 
6375   if (offset < 0)
6376     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6377   }
6378 #endif
6379 
6380 if (offset >= 0)
6381   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6382 
6383 if (common->match_end_ptr != 0)
6384   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6385 else
6386   OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6387 return TRUE;
6388 }
6389 
fast_forward_first_char(compiler_common * common)6390 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common)
6391 {
6392 PCRE2_UCHAR first_char = (PCRE2_UCHAR)(common->re->first_codeunit);
6393 PCRE2_UCHAR oc;
6394 
6395 oc = first_char;
6396 if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0)
6397   {
6398   oc = TABLE_GET(first_char, common->fcc, first_char);
6399 #if defined SUPPORT_UNICODE
6400   if (first_char > 127 && (common->utf || common->ucp))
6401     oc = UCD_OTHERCASE(first_char);
6402 #endif
6403   }
6404 
6405 fast_forward_first_char2(common, first_char, oc, 0);
6406 }
6407 
fast_forward_newline(compiler_common * common)6408 static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
6409 {
6410 DEFINE_COMPILER;
6411 struct sljit_label *loop;
6412 struct sljit_jump *lastchar = NULL;
6413 struct sljit_jump *firstchar;
6414 struct sljit_jump *quit = NULL;
6415 struct sljit_jump *foundcr = NULL;
6416 struct sljit_jump *notfoundnl;
6417 jump_list *newline = NULL;
6418 
6419 if (common->match_end_ptr != 0)
6420   {
6421   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6422   OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6423   }
6424 
6425 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6426   {
6427 #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6428   if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && common->mode == PCRE2_JIT_COMPLETE)
6429     {
6430     if (HAS_VIRTUAL_REGISTERS)
6431       {
6432       OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6433       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6434       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6435       }
6436     else
6437       {
6438       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6439       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6440       }
6441     firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6442 
6443     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6444     OP2U(SLJIT_SUB | SLJIT_SET_Z, STR_PTR, 0, TMP1, 0);
6445     OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_NOT_EQUAL);
6446 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6447     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6448 #endif
6449     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6450 
6451     fast_forward_char_pair_simd(common, 1, common->newline & 0xff, common->newline & 0xff, 0, (common->newline >> 8) & 0xff, (common->newline >> 8) & 0xff);
6452     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6453     }
6454   else
6455 #endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
6456     {
6457     lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6458     if (HAS_VIRTUAL_REGISTERS)
6459       {
6460       OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6461       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6462       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6463       }
6464     else
6465       {
6466       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6467       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6468       }
6469     firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6470 
6471     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
6472     OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, STR_PTR, 0, TMP1, 0);
6473     OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
6474 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6475     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
6476 #endif
6477     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6478 
6479     loop = LABEL();
6480     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6481     quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6482     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6483     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6484     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
6485     CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
6486 
6487     JUMPHERE(quit);
6488     JUMPHERE(lastchar);
6489     }
6490 
6491   JUMPHERE(firstchar);
6492 
6493   if (common->match_end_ptr != 0)
6494     OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6495   return;
6496   }
6497 
6498 if (HAS_VIRTUAL_REGISTERS)
6499   {
6500   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6501   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6502   }
6503 else
6504   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6505 
6506 /* Example: match /^/ to \r\n from offset 1. */
6507 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6508 
6509 if (common->nltype == NLTYPE_ANY)
6510   move_back(common, NULL, FALSE);
6511 else
6512   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6513 
6514 loop = LABEL();
6515 common->ff_newline_shortcut = loop;
6516 
6517 #ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
6518 if (JIT_HAS_FAST_FORWARD_CHAR_SIMD && (common->nltype == NLTYPE_FIXED || common->nltype == NLTYPE_ANYCRLF))
6519   {
6520   if (common->nltype == NLTYPE_ANYCRLF)
6521     {
6522     fast_forward_char_simd(common, CHAR_CR, CHAR_LF, 0);
6523     if (common->mode != PCRE2_JIT_COMPLETE)
6524       lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6525 
6526     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6527     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6528     quit = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6529     }
6530    else
6531     {
6532     fast_forward_char_simd(common, common->newline, common->newline, 0);
6533 
6534     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6535     if (common->mode != PCRE2_JIT_COMPLETE)
6536       {
6537       OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
6538       SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
6539       }
6540     }
6541   }
6542 else
6543 #endif /* JIT_HAS_FAST_FORWARD_CHAR_SIMD */
6544   {
6545   read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
6546   lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6547   if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6548     foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6549   check_newlinechar(common, common->nltype, &newline, FALSE);
6550   set_jumps(newline, loop);
6551   }
6552 
6553 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6554   {
6555   if (quit == NULL)
6556     {
6557     quit = JUMP(SLJIT_JUMP);
6558     JUMPHERE(foundcr);
6559     }
6560 
6561   notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6562   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6563   OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NL);
6564   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
6565 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6566   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6567 #endif
6568   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6569   JUMPHERE(notfoundnl);
6570   JUMPHERE(quit);
6571   }
6572 
6573 if (lastchar)
6574   JUMPHERE(lastchar);
6575 JUMPHERE(firstchar);
6576 
6577 if (common->match_end_ptr != 0)
6578   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6579 }
6580 
6581 static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
6582 
fast_forward_start_bits(compiler_common * common)6583 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common)
6584 {
6585 DEFINE_COMPILER;
6586 const sljit_u8 *start_bits = common->re->start_bitmap;
6587 struct sljit_label *start;
6588 struct sljit_jump *partial_quit;
6589 #if PCRE2_CODE_UNIT_WIDTH != 8
6590 struct sljit_jump *found = NULL;
6591 #endif
6592 jump_list *matches = NULL;
6593 
6594 if (common->match_end_ptr != 0)
6595   {
6596   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6597   OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
6598   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6599   OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
6600   SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);
6601   }
6602 
6603 start = LABEL();
6604 
6605 partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6606 if (common->mode == PCRE2_JIT_COMPLETE)
6607   add_jump(compiler, &common->failed_match, partial_quit);
6608 
6609 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6610 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6611 
6612 if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &matches))
6613   {
6614 #if PCRE2_CODE_UNIT_WIDTH != 8
6615   if ((start_bits[31] & 0x80) != 0)
6616     found = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255);
6617   else
6618     CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255, start);
6619 #elif defined SUPPORT_UNICODE
6620   if (common->utf && is_char7_bitset(start_bits, FALSE))
6621     CMPTO(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 127, start);
6622 #endif
6623   OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6624   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6625   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
6626   if (!HAS_VIRTUAL_REGISTERS)
6627     {
6628     OP2(SLJIT_SHL, TMP3, 0, SLJIT_IMM, 1, TMP2, 0);
6629     OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP3, 0);
6630     }
6631   else
6632     {
6633     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6634     OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
6635     }
6636   JUMPTO(SLJIT_ZERO, start);
6637   }
6638 else
6639   set_jumps(matches, start);
6640 
6641 #if PCRE2_CODE_UNIT_WIDTH != 8
6642 if (found != NULL)
6643   JUMPHERE(found);
6644 #endif
6645 
6646 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6647 
6648 if (common->mode != PCRE2_JIT_COMPLETE)
6649   JUMPHERE(partial_quit);
6650 
6651 if (common->match_end_ptr != 0)
6652   OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
6653 }
6654 
search_requested_char(compiler_common * common,PCRE2_UCHAR req_char,BOOL caseless,BOOL has_firstchar)6655 static SLJIT_INLINE jump_list *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar)
6656 {
6657 DEFINE_COMPILER;
6658 struct sljit_label *loop;
6659 struct sljit_jump *toolong;
6660 struct sljit_jump *already_found;
6661 struct sljit_jump *found;
6662 struct sljit_jump *found_oc = NULL;
6663 jump_list *not_found = NULL;
6664 sljit_u32 oc, bit;
6665 
6666 SLJIT_ASSERT(common->req_char_ptr != 0);
6667 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(REQ_CU_MAX) * 100);
6668 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
6669 toolong = CMP(SLJIT_LESS, TMP2, 0, STR_END, 0);
6670 already_found = CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0);
6671 
6672 if (has_firstchar)
6673   OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6674 else
6675   OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
6676 
6677 oc = req_char;
6678 if (caseless)
6679   {
6680   oc = TABLE_GET(req_char, common->fcc, req_char);
6681 #if defined SUPPORT_UNICODE
6682   if (req_char > 127 && (common->utf || common->ucp))
6683     oc = UCD_OTHERCASE(req_char);
6684 #endif
6685   }
6686 
6687 #ifdef JIT_HAS_FAST_REQUESTED_CHAR_SIMD
6688 if (JIT_HAS_FAST_REQUESTED_CHAR_SIMD)
6689   {
6690   not_found = fast_requested_char_simd(common, req_char, oc);
6691   }
6692 else
6693 #endif
6694   {
6695   loop = LABEL();
6696   add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
6697 
6698   OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
6699 
6700   if (req_char == oc)
6701     found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6702   else
6703     {
6704     bit = req_char ^ oc;
6705     if (is_powerof2(bit))
6706       {
6707        OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
6708       found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
6709       }
6710     else
6711       {
6712       found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6713       found_oc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
6714       }
6715     }
6716   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6717   JUMPTO(SLJIT_JUMP, loop);
6718 
6719   JUMPHERE(found);
6720   if (found_oc)
6721     JUMPHERE(found_oc);
6722   }
6723 
6724 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
6725 
6726 JUMPHERE(already_found);
6727 JUMPHERE(toolong);
6728 return not_found;
6729 }
6730 
do_revertframes(compiler_common * common)6731 static void do_revertframes(compiler_common *common)
6732 {
6733 DEFINE_COMPILER;
6734 struct sljit_jump *jump;
6735 struct sljit_label *mainloop;
6736 
6737 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
6738 GET_LOCAL_BASE(TMP1, 0, 0);
6739 
6740 /* Drop frames until we reach STACK_TOP. */
6741 mainloop = LABEL();
6742 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -SSIZE_OF(sw));
6743 OP2U(SLJIT_SUB | SLJIT_SET_SIG_LESS_EQUAL | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0);
6744 jump = JUMP(SLJIT_SIG_LESS_EQUAL);
6745 
6746 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6747 if (HAS_VIRTUAL_REGISTERS)
6748   {
6749   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6750   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw)));
6751   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw));
6752   }
6753 else
6754   {
6755   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6756   OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw)));
6757   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw));
6758   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
6759   GET_LOCAL_BASE(TMP1, 0, 0);
6760   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0);
6761   }
6762 JUMPTO(SLJIT_JUMP, mainloop);
6763 
6764 JUMPHERE(jump);
6765 sljit_set_current_flags(compiler, SLJIT_CURRENT_FLAGS_SUB | SLJIT_CURRENT_FLAGS_COMPARE | SLJIT_SET_SIG_LESS_EQUAL | SLJIT_SET_Z);
6766 jump = JUMP(SLJIT_NOT_ZERO /* SIG_LESS */);
6767 /* End of reverting values. */
6768 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
6769 
6770 JUMPHERE(jump);
6771 OP2(SLJIT_SUB, TMP2, 0, SLJIT_IMM, 0, TMP2, 0);
6772 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6773 if (HAS_VIRTUAL_REGISTERS)
6774   {
6775   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6776   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
6777   }
6778 else
6779   {
6780   OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6781   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
6782   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0);
6783   }
6784 JUMPTO(SLJIT_JUMP, mainloop);
6785 }
6786 
6787 #ifdef SUPPORT_UNICODE
6788 #define UCPCAT(bit) (1 << (bit))
6789 #define UCPCAT2(bit1, bit2) (UCPCAT(bit1) | UCPCAT(bit2))
6790 #define UCPCAT3(bit1, bit2, bit3) (UCPCAT(bit1) | UCPCAT(bit2) | UCPCAT(bit3))
6791 #define UCPCAT_RANGE(start, end) (((1 << ((end) + 1)) - 1) - ((1 << (start)) - 1))
6792 #define UCPCAT_L UCPCAT_RANGE(ucp_Ll, ucp_Lu)
6793 #define UCPCAT_N UCPCAT_RANGE(ucp_Nd, ucp_No)
6794 #define UCPCAT_ALL ((1 << (ucp_Zs + 1)) - 1)
6795 #endif
6796 
check_wordboundary(compiler_common * common,BOOL ucp)6797 static void check_wordboundary(compiler_common *common, BOOL ucp)
6798 {
6799 DEFINE_COMPILER;
6800 struct sljit_jump *skipread;
6801 jump_list *skipread_list = NULL;
6802 #ifdef SUPPORT_UNICODE
6803 struct sljit_label *valid_utf;
6804 jump_list *invalid_utf1 = NULL;
6805 #endif /* SUPPORT_UNICODE */
6806 jump_list *invalid_utf2 = NULL;
6807 #if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE
6808 struct sljit_jump *jump;
6809 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 || SUPPORT_UNICODE */
6810 
6811 SLJIT_UNUSED_ARG(ucp);
6812 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
6813 
6814 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6815 /* Get type of the previous char, and put it to TMP3. */
6816 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6817 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6818 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
6819 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6820 
6821 #ifdef SUPPORT_UNICODE
6822 if (common->invalid_utf)
6823   {
6824   peek_char_back(common, READ_CHAR_MAX, &invalid_utf1);
6825 
6826   if (common->mode != PCRE2_JIT_COMPLETE)
6827     {
6828     OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
6829     OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
6830     move_back(common, NULL, TRUE);
6831     check_start_used_ptr(common);
6832     OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
6833     OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
6834     }
6835   }
6836 else
6837 #endif /* SUPPORT_UNICODE */
6838   {
6839   if (common->mode == PCRE2_JIT_COMPLETE)
6840     peek_char_back(common, READ_CHAR_MAX, NULL);
6841   else
6842     {
6843     move_back(common, NULL, TRUE);
6844     check_start_used_ptr(common);
6845     read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR);
6846     }
6847   }
6848 
6849 /* Testing char type. */
6850 #ifdef SUPPORT_UNICODE
6851 if (ucp)
6852   {
6853   add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6854   OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP1, 0);
6855   OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N);
6856   OP_FLAGS(SLJIT_MOV, TMP3, 0, SLJIT_NOT_ZERO);
6857   }
6858 else
6859 #endif /* SUPPORT_UNICODE */
6860   {
6861 #if PCRE2_CODE_UNIT_WIDTH != 8
6862   jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6863 #elif defined SUPPORT_UNICODE
6864   /* Here TMP3 has already been zeroed. */
6865   jump = NULL;
6866   if (common->utf)
6867     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6868 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6869   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
6870   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
6871   OP2(SLJIT_AND, TMP3, 0, TMP1, 0, SLJIT_IMM, 1);
6872 #if PCRE2_CODE_UNIT_WIDTH != 8
6873   JUMPHERE(jump);
6874 #elif defined SUPPORT_UNICODE
6875   if (jump != NULL)
6876     JUMPHERE(jump);
6877 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6878   }
6879 JUMPHERE(skipread);
6880 
6881 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6882 check_str_end(common, &skipread_list);
6883 peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf2);
6884 
6885 /* Testing char type. This is a code duplication. */
6886 #ifdef SUPPORT_UNICODE
6887 
6888 valid_utf = LABEL();
6889 
6890 if (ucp)
6891   {
6892   add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6893   OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP1, 0);
6894   OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N);
6895   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
6896   }
6897 else
6898 #endif /* SUPPORT_UNICODE */
6899   {
6900 #if PCRE2_CODE_UNIT_WIDTH != 8
6901   /* TMP2 may be destroyed by peek_char. */
6902   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6903   jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6904 #elif defined SUPPORT_UNICODE
6905   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6906   jump = NULL;
6907   if (common->utf)
6908     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6909 #endif
6910   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
6911   OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
6912   OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
6913 #if PCRE2_CODE_UNIT_WIDTH != 8
6914   JUMPHERE(jump);
6915 #elif defined SUPPORT_UNICODE
6916   if (jump != NULL)
6917     JUMPHERE(jump);
6918 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6919   }
6920 set_jumps(skipread_list, LABEL());
6921 
6922 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6923 OP2(SLJIT_XOR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, TMP3, 0);
6924 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6925 
6926 #ifdef SUPPORT_UNICODE
6927 if (common->invalid_utf)
6928   {
6929   set_jumps(invalid_utf1, LABEL());
6930 
6931   peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, NULL);
6932   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR, valid_utf);
6933 
6934   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6935   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, -1);
6936   OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6937 
6938   set_jumps(invalid_utf2, LABEL());
6939   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6940   OP1(SLJIT_MOV, TMP2, 0, TMP3, 0);
6941   OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6942   }
6943 #endif /* SUPPORT_UNICODE */
6944 }
6945 
optimize_class_ranges(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)6946 static BOOL optimize_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6947 {
6948 /* May destroy TMP1. */
6949 DEFINE_COMPILER;
6950 int ranges[MAX_CLASS_RANGE_SIZE];
6951 sljit_u8 bit, cbit, all;
6952 int i, byte, length = 0;
6953 
6954 bit = bits[0] & 0x1;
6955 /* All bits will be zero or one (since bit is zero or one). */
6956 all = (sljit_u8)-bit;
6957 
6958 for (i = 0; i < 256; )
6959   {
6960   byte = i >> 3;
6961   if ((i & 0x7) == 0 && bits[byte] == all)
6962     i += 8;
6963   else
6964     {
6965     cbit = (bits[byte] >> (i & 0x7)) & 0x1;
6966     if (cbit != bit)
6967       {
6968       if (length >= MAX_CLASS_RANGE_SIZE)
6969         return FALSE;
6970       ranges[length] = i;
6971       length++;
6972       bit = cbit;
6973       all = (sljit_u8)-cbit; /* sign extend bit into byte */
6974       }
6975     i++;
6976     }
6977   }
6978 
6979 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
6980   {
6981   if (length >= MAX_CLASS_RANGE_SIZE)
6982     return FALSE;
6983   ranges[length] = 256;
6984   length++;
6985   }
6986 
6987 if (length < 0 || length > 4)
6988   return FALSE;
6989 
6990 bit = bits[0] & 0x1;
6991 if (invert) bit ^= 0x1;
6992 
6993 /* No character is accepted. */
6994 if (length == 0 && bit == 0)
6995   add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6996 
6997 switch(length)
6998   {
6999   case 0:
7000   /* When bit != 0, all characters are accepted. */
7001   return TRUE;
7002 
7003   case 1:
7004   add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7005   return TRUE;
7006 
7007   case 2:
7008   if (ranges[0] + 1 != ranges[1])
7009     {
7010     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7011     add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7012     }
7013   else
7014     add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7015   return TRUE;
7016 
7017   case 3:
7018   if (bit != 0)
7019     {
7020     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
7021     if (ranges[0] + 1 != ranges[1])
7022       {
7023       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7024       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7025       }
7026     else
7027       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7028     return TRUE;
7029     }
7030 
7031   add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
7032   if (ranges[1] + 1 != ranges[2])
7033     {
7034     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
7035     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
7036     }
7037   else
7038     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
7039   return TRUE;
7040 
7041   case 4:
7042   if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
7043       && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
7044       && (ranges[1] & (ranges[2] - ranges[0])) == 0
7045       && is_powerof2(ranges[2] - ranges[0]))
7046     {
7047     SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
7048     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
7049     if (ranges[2] + 1 != ranges[3])
7050       {
7051       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
7052       add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
7053       }
7054     else
7055       add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
7056     return TRUE;
7057     }
7058 
7059   if (bit != 0)
7060     {
7061     i = 0;
7062     if (ranges[0] + 1 != ranges[1])
7063       {
7064       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7065       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7066       i = ranges[0];
7067       }
7068     else
7069       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7070 
7071     if (ranges[2] + 1 != ranges[3])
7072       {
7073       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
7074       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
7075       }
7076     else
7077       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
7078     return TRUE;
7079     }
7080 
7081   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7082   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
7083   if (ranges[1] + 1 != ranges[2])
7084     {
7085     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
7086     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
7087     }
7088   else
7089     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7090   return TRUE;
7091 
7092   default:
7093   SLJIT_UNREACHABLE();
7094   return FALSE;
7095   }
7096 }
7097 
optimize_class_chars(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)7098 static BOOL optimize_class_chars(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
7099 {
7100 /* May destroy TMP1. */
7101 DEFINE_COMPILER;
7102 uint16_t char_list[MAX_CLASS_CHARS_SIZE];
7103 uint8_t byte;
7104 sljit_s32 type;
7105 int i, j, k, len, c;
7106 
7107 if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
7108   return FALSE;
7109 
7110 len = 0;
7111 
7112 for (i = 0; i < 32; i++)
7113   {
7114   byte = bits[i];
7115 
7116   if (nclass)
7117     byte = (sljit_u8)~byte;
7118 
7119   j = 0;
7120   while (byte != 0)
7121     {
7122     if (byte & 0x1)
7123       {
7124       c = i * 8 + j;
7125 
7126       k = len;
7127 
7128       if ((c & 0x20) != 0)
7129         {
7130         for (k = 0; k < len; k++)
7131           if (char_list[k] == c - 0x20)
7132             {
7133             char_list[k] |= 0x120;
7134             break;
7135             }
7136         }
7137 
7138       if (k == len)
7139         {
7140         if (len >= MAX_CLASS_CHARS_SIZE)
7141           return FALSE;
7142 
7143         char_list[len++] = (uint16_t) c;
7144         }
7145       }
7146 
7147     byte >>= 1;
7148     j++;
7149     }
7150   }
7151 
7152 if (len == 0) return FALSE;  /* Should never occur, but stops analyzers complaining. */
7153 
7154 i = 0;
7155 j = 0;
7156 
7157 if (char_list[0] == 0)
7158   {
7159   i++;
7160   OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0);
7161   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_ZERO);
7162   }
7163 else
7164   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
7165 
7166 while (i < len)
7167   {
7168   if ((char_list[i] & 0x100) != 0)
7169     j++;
7170   else
7171     {
7172     OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i]);
7173     SELECT(SLJIT_ZERO, TMP2, TMP1, 0, TMP2);
7174     }
7175   i++;
7176   }
7177 
7178 if (j != 0)
7179   {
7180   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x20);
7181 
7182   for (i = 0; i < len; i++)
7183     if ((char_list[i] & 0x100) != 0)
7184       {
7185       j--;
7186       OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff);
7187       SELECT(SLJIT_ZERO, TMP2, TMP1, 0, TMP2);
7188       }
7189   }
7190 
7191 if (invert)
7192   nclass = !nclass;
7193 
7194 type = nclass ? SLJIT_NOT_EQUAL : SLJIT_EQUAL;
7195 add_jump(compiler, backtracks, CMP(type, TMP2, 0, SLJIT_IMM, 0));
7196 return TRUE;
7197 }
7198 
optimize_class(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)7199 static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
7200 {
7201 /* May destroy TMP1. */
7202 if (optimize_class_ranges(common, bits, nclass, invert, backtracks))
7203   return TRUE;
7204 return optimize_class_chars(common, bits, nclass, invert, backtracks);
7205 }
7206 
check_anynewline(compiler_common * common)7207 static void check_anynewline(compiler_common *common)
7208 {
7209 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7210 DEFINE_COMPILER;
7211 
7212 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
7213 
7214 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
7215 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
7216 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7217 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
7218 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7219 #if PCRE2_CODE_UNIT_WIDTH == 8
7220 if (common->utf)
7221   {
7222 #endif
7223   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7224   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7225   OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
7226 #if PCRE2_CODE_UNIT_WIDTH == 8
7227   }
7228 #endif
7229 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7230 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7231 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7232 }
7233 
check_hspace(compiler_common * common)7234 static void check_hspace(compiler_common *common)
7235 {
7236 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7237 DEFINE_COMPILER;
7238 
7239 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
7240 
7241 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x09);
7242 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7243 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x20);
7244 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7245 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xa0);
7246 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7247 #if PCRE2_CODE_UNIT_WIDTH == 8
7248 if (common->utf)
7249   {
7250 #endif
7251   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7252   OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x1680);
7253   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7254   OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e);
7255   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7256   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
7257   OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
7258   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7259   OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
7260   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7261   OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
7262   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7263   OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
7264 #if PCRE2_CODE_UNIT_WIDTH == 8
7265   }
7266 #endif
7267 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7268 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7269 
7270 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7271 }
7272 
check_vspace(compiler_common * common)7273 static void check_vspace(compiler_common *common)
7274 {
7275 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7276 DEFINE_COMPILER;
7277 
7278 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
7279 
7280 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
7281 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
7282 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7283 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
7284 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7285 #if PCRE2_CODE_UNIT_WIDTH == 8
7286 if (common->utf)
7287   {
7288 #endif
7289   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7290   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7291   OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
7292 #if PCRE2_CODE_UNIT_WIDTH == 8
7293   }
7294 #endif
7295 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7296 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7297 
7298 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7299 }
7300 
do_casefulcmp(compiler_common * common)7301 static void do_casefulcmp(compiler_common *common)
7302 {
7303 DEFINE_COMPILER;
7304 struct sljit_jump *jump;
7305 struct sljit_label *label;
7306 int char1_reg;
7307 int char2_reg;
7308 
7309 if (HAS_VIRTUAL_REGISTERS)
7310   {
7311   char1_reg = STR_END;
7312   char2_reg = STACK_TOP;
7313   }
7314 else
7315   {
7316   char1_reg = TMP3;
7317   char2_reg = RETURN_ADDR;
7318   }
7319 
7320 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7321 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7322 
7323 if (char1_reg == STR_END)
7324   {
7325   OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0);
7326   OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
7327   }
7328 
7329 if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7330   {
7331   label = LABEL();
7332   sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7333   sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7334   jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7335   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7336   JUMPTO(SLJIT_NOT_ZERO, label);
7337 
7338   JUMPHERE(jump);
7339   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7340   }
7341 else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7342   {
7343   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7344   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7345 
7346   label = LABEL();
7347   sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7348   sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7349   jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7350   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7351   JUMPTO(SLJIT_NOT_ZERO, label);
7352 
7353   JUMPHERE(jump);
7354   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7355   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7356   }
7357 else
7358   {
7359   label = LABEL();
7360   OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7361   OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7362   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7363   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7364   jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7365   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7366   JUMPTO(SLJIT_NOT_ZERO, label);
7367 
7368   JUMPHERE(jump);
7369   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7370   }
7371 
7372 if (char1_reg == STR_END)
7373   {
7374   OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0);
7375   OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);
7376   }
7377 
7378 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7379 }
7380 
do_caselesscmp(compiler_common * common)7381 static void do_caselesscmp(compiler_common *common)
7382 {
7383 DEFINE_COMPILER;
7384 struct sljit_jump *jump;
7385 struct sljit_label *label;
7386 int char1_reg = STR_END;
7387 int char2_reg;
7388 int lcc_table;
7389 int opt_type = 0;
7390 
7391 if (HAS_VIRTUAL_REGISTERS)
7392   {
7393   char2_reg = STACK_TOP;
7394   lcc_table = STACK_LIMIT;
7395   }
7396 else
7397   {
7398   char2_reg = RETURN_ADDR;
7399   lcc_table = TMP3;
7400   }
7401 
7402 if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7403   opt_type = 1;
7404 else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7405   opt_type = 2;
7406 
7407 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7408 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7409 
7410 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0);
7411 
7412 if (char2_reg == STACK_TOP)
7413   {
7414   OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0);
7415   OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0);
7416   }
7417 
7418 OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
7419 
7420 if (opt_type == 1)
7421   {
7422   label = LABEL();
7423   sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7424   sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7425   }
7426 else if (opt_type == 2)
7427   {
7428   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7429   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7430 
7431   label = LABEL();
7432   sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7433   sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7434   }
7435 else
7436   {
7437   label = LABEL();
7438   OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7439   OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7440   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7441   }
7442 
7443 #if PCRE2_CODE_UNIT_WIDTH != 8
7444 jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255);
7445 #endif
7446 OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0);
7447 #if PCRE2_CODE_UNIT_WIDTH != 8
7448 JUMPHERE(jump);
7449 jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255);
7450 #endif
7451 OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0);
7452 #if PCRE2_CODE_UNIT_WIDTH != 8
7453 JUMPHERE(jump);
7454 #endif
7455 
7456 if (opt_type == 0)
7457   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7458 
7459 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7460 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7461 JUMPTO(SLJIT_NOT_ZERO, label);
7462 
7463 JUMPHERE(jump);
7464 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7465 
7466 if (opt_type == 2)
7467   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7468 
7469 if (char2_reg == STACK_TOP)
7470   {
7471   OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0);
7472   OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0);
7473   }
7474 
7475 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
7476 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7477 }
7478 
byte_sequence_compare(compiler_common * common,BOOL caseless,PCRE2_SPTR cc,compare_context * context,jump_list ** backtracks)7479 static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc,
7480     compare_context *context, jump_list **backtracks)
7481 {
7482 DEFINE_COMPILER;
7483 unsigned int othercasebit = 0;
7484 PCRE2_SPTR othercasechar = NULL;
7485 #ifdef SUPPORT_UNICODE
7486 int utflength;
7487 #endif
7488 
7489 if (caseless && char_has_othercase(common, cc))
7490   {
7491   othercasebit = char_get_othercase_bit(common, cc);
7492   SLJIT_ASSERT(othercasebit);
7493   /* Extracting bit difference info. */
7494 #if PCRE2_CODE_UNIT_WIDTH == 8
7495   othercasechar = cc + (othercasebit >> 8);
7496   othercasebit &= 0xff;
7497 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7498   /* Note that this code only handles characters in the BMP. If there
7499   ever are characters outside the BMP whose othercase differs in only one
7500   bit from itself (there currently are none), this code will need to be
7501   revised for PCRE2_CODE_UNIT_WIDTH == 32. */
7502   othercasechar = cc + (othercasebit >> 9);
7503   if ((othercasebit & 0x100) != 0)
7504     othercasebit = (othercasebit & 0xff) << 8;
7505   else
7506     othercasebit &= 0xff;
7507 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7508   }
7509 
7510 if (context->sourcereg == -1)
7511   {
7512 #if PCRE2_CODE_UNIT_WIDTH == 8
7513 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7514   if (context->length >= 4)
7515     OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7516   else if (context->length >= 2)
7517     OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7518   else
7519 #endif
7520     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7521 #elif PCRE2_CODE_UNIT_WIDTH == 16
7522 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7523   if (context->length >= 4)
7524     OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7525   else
7526 #endif
7527     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7528 #elif PCRE2_CODE_UNIT_WIDTH == 32
7529   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7530 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7531   context->sourcereg = TMP2;
7532   }
7533 
7534 #ifdef SUPPORT_UNICODE
7535 utflength = 1;
7536 if (common->utf && HAS_EXTRALEN(*cc))
7537   utflength += GET_EXTRALEN(*cc);
7538 
7539 do
7540   {
7541 #endif
7542 
7543   context->length -= IN_UCHARS(1);
7544 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7545 
7546   /* Unaligned read is supported. */
7547   if (othercasebit != 0 && othercasechar == cc)
7548     {
7549     context->c.asuchars[context->ucharptr] = *cc | othercasebit;
7550     context->oc.asuchars[context->ucharptr] = othercasebit;
7551     }
7552   else
7553     {
7554     context->c.asuchars[context->ucharptr] = *cc;
7555     context->oc.asuchars[context->ucharptr] = 0;
7556     }
7557   context->ucharptr++;
7558 
7559 #if PCRE2_CODE_UNIT_WIDTH == 8
7560   if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
7561 #else
7562   if (context->ucharptr >= 2 || context->length == 0)
7563 #endif
7564     {
7565     if (context->length >= 4)
7566       OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7567     else if (context->length >= 2)
7568       OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7569 #if PCRE2_CODE_UNIT_WIDTH == 8
7570     else if (context->length >= 1)
7571       OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7572 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7573     context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7574 
7575     switch(context->ucharptr)
7576       {
7577       case 4 / sizeof(PCRE2_UCHAR):
7578       if (context->oc.asint != 0)
7579         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
7580       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
7581       break;
7582 
7583       case 2 / sizeof(PCRE2_UCHAR):
7584       if (context->oc.asushort != 0)
7585         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
7586       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
7587       break;
7588 
7589 #if PCRE2_CODE_UNIT_WIDTH == 8
7590       case 1:
7591       if (context->oc.asbyte != 0)
7592         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
7593       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
7594       break;
7595 #endif
7596 
7597       default:
7598       SLJIT_UNREACHABLE();
7599       break;
7600       }
7601     context->ucharptr = 0;
7602     }
7603 
7604 #else
7605 
7606   /* Unaligned read is unsupported or in 32 bit mode. */
7607   if (context->length >= 1)
7608     OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7609 
7610   context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7611 
7612   if (othercasebit != 0 && othercasechar == cc)
7613     {
7614     OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
7615     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
7616     }
7617   else
7618     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
7619 
7620 #endif
7621 
7622   cc++;
7623 #ifdef SUPPORT_UNICODE
7624   utflength--;
7625   }
7626 while (utflength > 0);
7627 #endif
7628 
7629 return cc;
7630 }
7631 
7632 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
7633 
7634 #define SET_CHAR_OFFSET(value) \
7635   if ((value) != charoffset) \
7636     { \
7637     if ((value) < charoffset) \
7638       OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
7639     else \
7640       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
7641     } \
7642   charoffset = (value);
7643 
7644 static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr);
7645 
7646 #ifdef SUPPORT_UNICODE
7647 #define XCLASS_SAVE_CHAR 0x001
7648 #define XCLASS_CHAR_SAVED 0x002
7649 #define XCLASS_HAS_TYPE 0x004
7650 #define XCLASS_HAS_SCRIPT 0x008
7651 #define XCLASS_HAS_SCRIPT_EXTENSION 0x010
7652 #define XCLASS_HAS_BOOL 0x020
7653 #define XCLASS_HAS_BIDICL 0x040
7654 #define XCLASS_NEEDS_UCD (XCLASS_HAS_TYPE | XCLASS_HAS_SCRIPT | XCLASS_HAS_SCRIPT_EXTENSION | XCLASS_HAS_BOOL | XCLASS_HAS_BIDICL)
7655 #define XCLASS_SCRIPT_EXTENSION_NOTPROP 0x080
7656 #define XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR 0x100
7657 #define XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0 0x200
7658 #endif /* SUPPORT_UNICODE */
7659 
compile_xclass_matchingpath(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks)7660 static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
7661 {
7662 DEFINE_COMPILER;
7663 jump_list *found = NULL;
7664 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
7665 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
7666 struct sljit_jump *jump = NULL;
7667 PCRE2_SPTR ccbegin;
7668 int compares, invertcmp, numberofcmps;
7669 #if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7670 BOOL utf = common->utf;
7671 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
7672 
7673 #ifdef SUPPORT_UNICODE
7674 sljit_u32 unicode_status = 0;
7675 sljit_u32 category_list = 0;
7676 sljit_u32 items;
7677 int typereg = TMP1;
7678 const sljit_u32 *other_cases;
7679 #endif /* SUPPORT_UNICODE */
7680 
7681 /* Scanning the necessary info. */
7682 cc++;
7683 ccbegin = cc;
7684 compares = 0;
7685 
7686 if (cc[-1] & XCL_MAP)
7687   {
7688   min = 0;
7689   cc += 32 / sizeof(PCRE2_UCHAR);
7690   }
7691 
7692 while (*cc != XCL_END)
7693   {
7694   compares++;
7695 
7696   if (*cc == XCL_SINGLE)
7697     {
7698     cc ++;
7699     GETCHARINCTEST(c, cc);
7700     if (c > max) max = c;
7701     if (c < min) min = c;
7702 #ifdef SUPPORT_UNICODE
7703     unicode_status |= XCLASS_SAVE_CHAR;
7704 #endif /* SUPPORT_UNICODE */
7705     }
7706   else if (*cc == XCL_RANGE)
7707     {
7708     cc ++;
7709     GETCHARINCTEST(c, cc);
7710     if (c < min) min = c;
7711     GETCHARINCTEST(c, cc);
7712     if (c > max) max = c;
7713 #ifdef SUPPORT_UNICODE
7714     unicode_status |= XCLASS_SAVE_CHAR;
7715 #endif /* SUPPORT_UNICODE */
7716     }
7717 #ifdef SUPPORT_UNICODE
7718   else
7719     {
7720     SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7721     cc++;
7722 
7723     if (*cc == PT_CLIST && cc[-1] == XCL_PROP)
7724       {
7725       other_cases = PRIV(ucd_caseless_sets) + cc[1];
7726       while (*other_cases != NOTACHAR)
7727         {
7728         if (*other_cases > max) max = *other_cases;
7729         if (*other_cases < min) min = *other_cases;
7730         other_cases++;
7731         }
7732       }
7733     else
7734       {
7735       max = READ_CHAR_MAX;
7736       min = 0;
7737       }
7738 
7739     items = 0;
7740 
7741     switch(*cc)
7742       {
7743       case PT_ANY:
7744       /* Any either accepts everything or ignored. */
7745       if (cc[-1] == XCL_PROP)
7746         items = UCPCAT_ALL;
7747       else
7748         compares--;
7749       break;
7750 
7751       case PT_LAMP:
7752       items = UCPCAT3(ucp_Lu, ucp_Ll, ucp_Lt);
7753       break;
7754 
7755       case PT_GC:
7756       items = UCPCAT_RANGE(PRIV(ucp_typerange)[(int)cc[1] * 2], PRIV(ucp_typerange)[(int)cc[1] * 2 + 1]);
7757       break;
7758 
7759       case PT_PC:
7760       items = UCPCAT(cc[1]);
7761       break;
7762 
7763       case PT_WORD:
7764       items = UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N;
7765       break;
7766 
7767       case PT_ALNUM:
7768       items = UCPCAT_L | UCPCAT_N;
7769       break;
7770 
7771       case PT_SCX:
7772       unicode_status |= XCLASS_HAS_SCRIPT_EXTENSION;
7773       if (cc[-1] == XCL_NOTPROP)
7774         {
7775         unicode_status |= XCLASS_SCRIPT_EXTENSION_NOTPROP;
7776         break;
7777         }
7778       compares++;
7779       /* Fall through */
7780 
7781       case PT_SC:
7782       unicode_status |= XCLASS_HAS_SCRIPT;
7783       break;
7784 
7785       case PT_SPACE:
7786       case PT_PXSPACE:
7787       case PT_PXGRAPH:
7788       case PT_PXPRINT:
7789       case PT_PXPUNCT:
7790       unicode_status |= XCLASS_SAVE_CHAR | XCLASS_HAS_TYPE;
7791       break;
7792 
7793       case PT_CLIST:
7794       case PT_UCNC:
7795       case PT_PXXDIGIT:
7796       unicode_status |= XCLASS_SAVE_CHAR;
7797       break;
7798 
7799       case PT_BOOL:
7800       unicode_status |= XCLASS_HAS_BOOL;
7801       break;
7802 
7803       case PT_BIDICL:
7804       unicode_status |= XCLASS_HAS_BIDICL;
7805       break;
7806 
7807       default:
7808       SLJIT_UNREACHABLE();
7809       break;
7810       }
7811 
7812     if (items > 0)
7813       {
7814       if (cc[-1] == XCL_NOTPROP)
7815         items ^= UCPCAT_ALL;
7816       category_list |= items;
7817       unicode_status |= XCLASS_HAS_TYPE;
7818       compares--;
7819       }
7820 
7821     cc += 2;
7822     }
7823 #endif /* SUPPORT_UNICODE */
7824   }
7825 
7826 #ifdef SUPPORT_UNICODE
7827 if (category_list == UCPCAT_ALL)
7828   {
7829   /* All characters are accepted, same as dotall. */
7830   compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
7831   if (list == backtracks)
7832     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7833   return;
7834   }
7835 
7836 if (compares == 0 && category_list == 0)
7837   {
7838   /* No characters are accepted, same as (*F) or dotall. */
7839   compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
7840   if (list != backtracks)
7841     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7842   return;
7843   }
7844 #else /* !SUPPORT_UNICODE */
7845 SLJIT_ASSERT(compares > 0);
7846 #endif /* SUPPORT_UNICODE */
7847 
7848 /* We are not necessary in utf mode even in 8 bit mode. */
7849 cc = ccbegin;
7850 if ((cc[-1] & XCL_NOT) != 0)
7851   read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR);
7852 else
7853   {
7854 #ifdef SUPPORT_UNICODE
7855   read_char(common, min, max, (unicode_status & XCLASS_NEEDS_UCD) ? backtracks : NULL, 0);
7856 #else /* !SUPPORT_UNICODE */
7857   read_char(common, min, max, NULL, 0);
7858 #endif /* SUPPORT_UNICODE */
7859   }
7860 
7861 if ((cc[-1] & XCL_HASPROP) == 0)
7862   {
7863   if ((cc[-1] & XCL_MAP) != 0)
7864     {
7865     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7866     if (!optimize_class(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
7867       {
7868       OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7869       OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7870       OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7871       OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7872       OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
7873       add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
7874       }
7875 
7876     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7877     JUMPHERE(jump);
7878 
7879     cc += 32 / sizeof(PCRE2_UCHAR);
7880     }
7881   else
7882     {
7883     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
7884     add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
7885     }
7886   }
7887 else if ((cc[-1] & XCL_MAP) != 0)
7888   {
7889   OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7890 #ifdef SUPPORT_UNICODE
7891   unicode_status |= XCLASS_CHAR_SAVED;
7892 #endif /* SUPPORT_UNICODE */
7893   if (!optimize_class(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
7894     {
7895 #if PCRE2_CODE_UNIT_WIDTH == 8
7896     jump = NULL;
7897     if (common->utf)
7898 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7899       jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7900 
7901     OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7902     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7903     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7904     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7905     OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
7906     add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
7907 
7908 #if PCRE2_CODE_UNIT_WIDTH == 8
7909     if (common->utf)
7910 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7911       JUMPHERE(jump);
7912     }
7913 
7914   OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7915   cc += 32 / sizeof(PCRE2_UCHAR);
7916   }
7917 
7918 #ifdef SUPPORT_UNICODE
7919 if (unicode_status & XCLASS_NEEDS_UCD)
7920   {
7921   if ((unicode_status & (XCLASS_SAVE_CHAR | XCLASS_CHAR_SAVED)) == XCLASS_SAVE_CHAR)
7922     OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7923 
7924 #if PCRE2_CODE_UNIT_WIDTH == 32
7925   if (!common->utf)
7926     {
7927     jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
7928     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
7929     JUMPHERE(jump);
7930     }
7931 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
7932 
7933   OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7934   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
7935   OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
7936   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
7937   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7938   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7939   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
7940   OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
7941   OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
7942   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
7943   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
7944 
7945   ccbegin = cc;
7946 
7947   if (category_list != 0)
7948     compares++;
7949 
7950   if (unicode_status & XCLASS_HAS_BIDICL)
7951     {
7952     OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
7953     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BIDICLASS_SHIFT);
7954 
7955     while (*cc != XCL_END)
7956       {
7957       if (*cc == XCL_SINGLE)
7958         {
7959         cc ++;
7960         GETCHARINCTEST(c, cc);
7961         }
7962       else if (*cc == XCL_RANGE)
7963         {
7964         cc ++;
7965         GETCHARINCTEST(c, cc);
7966         GETCHARINCTEST(c, cc);
7967         }
7968       else
7969         {
7970         SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7971         cc++;
7972         if (*cc == PT_BIDICL)
7973           {
7974           compares--;
7975           invertcmp = (compares == 0 && list != backtracks);
7976           if (cc[-1] == XCL_NOTPROP)
7977             invertcmp ^= 0x1;
7978           jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
7979           add_jump(compiler, compares > 0 ? list : backtracks, jump);
7980           }
7981         cc += 2;
7982         }
7983       }
7984 
7985     cc = ccbegin;
7986     }
7987 
7988   if (unicode_status & XCLASS_HAS_BOOL)
7989     {
7990     OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, bprops));
7991     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BPROPS_MASK);
7992     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
7993 
7994     while (*cc != XCL_END)
7995       {
7996       if (*cc == XCL_SINGLE)
7997         {
7998         cc ++;
7999         GETCHARINCTEST(c, cc);
8000         }
8001       else if (*cc == XCL_RANGE)
8002         {
8003         cc ++;
8004         GETCHARINCTEST(c, cc);
8005         GETCHARINCTEST(c, cc);
8006         }
8007       else
8008         {
8009         SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
8010         cc++;
8011         if (*cc == PT_BOOL)
8012           {
8013           compares--;
8014           invertcmp = (compares == 0 && list != backtracks);
8015           if (cc[-1] == XCL_NOTPROP)
8016             invertcmp ^= 0x1;
8017 
8018           OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_boolprop_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)(1u << (cc[1] & 0x1f)));
8019           add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
8020           }
8021         cc += 2;
8022         }
8023       }
8024 
8025     cc = ccbegin;
8026     }
8027 
8028   if (unicode_status & XCLASS_HAS_SCRIPT)
8029     {
8030     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
8031 
8032     while (*cc != XCL_END)
8033       {
8034       if (*cc == XCL_SINGLE)
8035         {
8036         cc ++;
8037         GETCHARINCTEST(c, cc);
8038         }
8039       else if (*cc == XCL_RANGE)
8040         {
8041         cc ++;
8042         GETCHARINCTEST(c, cc);
8043         GETCHARINCTEST(c, cc);
8044         }
8045       else
8046         {
8047         SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
8048         cc++;
8049         switch (*cc)
8050           {
8051           case PT_SCX:
8052           if (cc[-1] == XCL_NOTPROP)
8053             break;
8054           /* Fall through */
8055 
8056           case PT_SC:
8057           compares--;
8058           invertcmp = (compares == 0 && list != backtracks);
8059           if (cc[-1] == XCL_NOTPROP)
8060             invertcmp ^= 0x1;
8061 
8062           add_jump(compiler, compares > 0 ? list : backtracks, CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]));
8063           }
8064         cc += 2;
8065         }
8066       }
8067 
8068     cc = ccbegin;
8069     }
8070 
8071   if (unicode_status & XCLASS_HAS_SCRIPT_EXTENSION)
8072     {
8073     OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
8074     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_SCRIPTX_MASK);
8075     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
8076 
8077     if (unicode_status & XCLASS_SCRIPT_EXTENSION_NOTPROP)
8078       {
8079       if (unicode_status & XCLASS_HAS_TYPE)
8080         {
8081         if (unicode_status & XCLASS_SAVE_CHAR)
8082           {
8083           OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP2, 0);
8084           unicode_status |= XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0;
8085           }
8086         else
8087           {
8088           OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0);
8089           unicode_status |= XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR;
8090           }
8091         }
8092       OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
8093       }
8094 
8095     while (*cc != XCL_END)
8096       {
8097       if (*cc == XCL_SINGLE)
8098         {
8099         cc ++;
8100         GETCHARINCTEST(c, cc);
8101         }
8102       else if (*cc == XCL_RANGE)
8103         {
8104         cc ++;
8105         GETCHARINCTEST(c, cc);
8106         GETCHARINCTEST(c, cc);
8107         }
8108       else
8109         {
8110         SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
8111         cc++;
8112         if (*cc == PT_SCX)
8113           {
8114           compares--;
8115           invertcmp = (compares == 0 && list != backtracks);
8116 
8117           jump = NULL;
8118           if (cc[-1] == XCL_NOTPROP)
8119             {
8120             jump = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, (int)cc[1]);
8121             if (invertcmp)
8122               {
8123               add_jump(compiler, backtracks, jump);
8124               jump = NULL;
8125               }
8126             invertcmp ^= 0x1;
8127             }
8128 
8129           OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_script_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)(1u << (cc[1] & 0x1f)));
8130           add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
8131 
8132           if (jump != NULL)
8133             JUMPHERE(jump);
8134           }
8135         cc += 2;
8136         }
8137       }
8138 
8139     if (unicode_status & XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0)
8140       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
8141     else if (unicode_status & XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR)
8142       OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0);
8143     cc = ccbegin;
8144     }
8145 
8146   if (unicode_status & XCLASS_SAVE_CHAR)
8147     OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
8148 
8149   if (unicode_status & XCLASS_HAS_TYPE)
8150     {
8151     if (unicode_status & XCLASS_SAVE_CHAR)
8152       typereg = RETURN_ADDR;
8153 
8154     OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
8155     OP2(SLJIT_SHL, typereg, 0, SLJIT_IMM, 1, TMP2, 0);
8156 
8157     if (category_list > 0)
8158       {
8159       compares--;
8160       invertcmp = (compares == 0 && list != backtracks);
8161       OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, category_list);
8162       add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
8163       }
8164     }
8165   }
8166 #endif /* SUPPORT_UNICODE */
8167 
8168 /* Generating code. */
8169 charoffset = 0;
8170 numberofcmps = 0;
8171 
8172 while (*cc != XCL_END)
8173   {
8174   compares--;
8175   invertcmp = (compares == 0 && list != backtracks);
8176   jump = NULL;
8177 
8178   if (*cc == XCL_SINGLE)
8179     {
8180     cc ++;
8181     GETCHARINCTEST(c, cc);
8182 
8183     if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
8184       {
8185       OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8186       OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8187       numberofcmps++;
8188       }
8189     else if (numberofcmps > 0)
8190       {
8191       OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8192       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
8193       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8194       numberofcmps = 0;
8195       }
8196     else
8197       {
8198       jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8199       numberofcmps = 0;
8200       }
8201     }
8202   else if (*cc == XCL_RANGE)
8203     {
8204     cc ++;
8205     GETCHARINCTEST(c, cc);
8206     SET_CHAR_OFFSET(c);
8207     GETCHARINCTEST(c, cc);
8208 
8209     if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
8210       {
8211       OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8212       OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8213       numberofcmps++;
8214       }
8215     else if (numberofcmps > 0)
8216       {
8217       OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8218       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
8219       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8220       numberofcmps = 0;
8221       }
8222     else
8223       {
8224       jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8225       numberofcmps = 0;
8226       }
8227     }
8228 #ifdef SUPPORT_UNICODE
8229   else
8230     {
8231     SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
8232     if (*cc == XCL_NOTPROP)
8233       invertcmp ^= 0x1;
8234     cc++;
8235     switch(*cc)
8236       {
8237       case PT_ANY:
8238       case PT_LAMP:
8239       case PT_GC:
8240       case PT_PC:
8241       case PT_SC:
8242       case PT_SCX:
8243       case PT_BOOL:
8244       case PT_BIDICL:
8245       case PT_WORD:
8246       case PT_ALNUM:
8247       compares++;
8248       /* Already handled. */
8249       break;
8250 
8251       case PT_SPACE:
8252       case PT_PXSPACE:
8253       SET_CHAR_OFFSET(9);
8254       OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
8255       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8256 
8257       OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
8258       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8259 
8260       OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
8261       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8262 
8263       OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Zl, ucp_Zs));
8264       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_ZERO);
8265       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8266       break;
8267 
8268       case PT_CLIST:
8269       other_cases = PRIV(ucd_caseless_sets) + cc[1];
8270 
8271       /* At least three characters are required.
8272          Otherwise this case would be handled by the normal code path. */
8273       SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
8274       SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
8275 
8276       /* Optimizing character pairs, if their difference is power of 2. */
8277       if (is_powerof2(other_cases[1] ^ other_cases[0]))
8278         {
8279         if (charoffset == 0)
8280           OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8281         else
8282           {
8283           OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
8284           OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8285           }
8286         OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[1]);
8287         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8288         other_cases += 2;
8289         }
8290       else if (is_powerof2(other_cases[2] ^ other_cases[1]))
8291         {
8292         if (charoffset == 0)
8293           OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
8294         else
8295           {
8296           OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
8297           OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8298           }
8299         OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[2]);
8300         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8301 
8302         OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
8303         OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
8304 
8305         other_cases += 3;
8306         }
8307       else
8308         {
8309         OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
8310         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8311         }
8312 
8313       while (*other_cases != NOTACHAR)
8314         {
8315         OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
8316         OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
8317         }
8318       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8319       break;
8320 
8321       case PT_UCNC:
8322       OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
8323       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8324       OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
8325       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8326       OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
8327       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8328 
8329       SET_CHAR_OFFSET(0xa0);
8330       OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
8331       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8332       SET_CHAR_OFFSET(0);
8333       OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
8334       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
8335       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8336       break;
8337 
8338       case PT_PXGRAPH:
8339       OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Cc, ucp_Cs) | UCPCAT_RANGE(ucp_Zl, ucp_Zs));
8340       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
8341 
8342       OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT(ucp_Cf));
8343       jump = JUMP(SLJIT_ZERO);
8344 
8345       c = charoffset;
8346       /* In case of ucp_Cf, we overwrite the result. */
8347       SET_CHAR_OFFSET(0x2066);
8348       OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
8349       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8350 
8351       OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
8352       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8353 
8354       OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
8355       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8356 
8357       /* Restore charoffset. */
8358       SET_CHAR_OFFSET(c);
8359 
8360       JUMPHERE(jump);
8361       jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
8362       break;
8363 
8364       case PT_PXPRINT:
8365       OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Cc, ucp_Cs) | UCPCAT2(ucp_Zl, ucp_Zp));
8366       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
8367 
8368       OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT(ucp_Cf));
8369       jump = JUMP(SLJIT_ZERO);
8370 
8371       c = charoffset;
8372       /* In case of ucp_Cf, we overwrite the result. */
8373       SET_CHAR_OFFSET(0x2066);
8374       OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
8375       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8376 
8377       OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
8378       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8379 
8380       /* Restore charoffset. */
8381       SET_CHAR_OFFSET(c);
8382 
8383       JUMPHERE(jump);
8384       jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
8385       break;
8386 
8387       case PT_PXPUNCT:
8388       OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Sc, ucp_So));
8389       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
8390 
8391       SET_CHAR_OFFSET(0);
8392       OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x7f);
8393       OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
8394 
8395       OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Pc, ucp_Ps));
8396       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_ZERO);
8397       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8398       break;
8399 
8400       case PT_PXXDIGIT:
8401       SET_CHAR_OFFSET(CHAR_A);
8402       OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, ~0x20);
8403       OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP2, 0, SLJIT_IMM, CHAR_F - CHAR_A);
8404       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8405 
8406       SET_CHAR_OFFSET(CHAR_0);
8407       OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_9 - CHAR_0);
8408       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8409 
8410       SET_CHAR_OFFSET(0xff10);
8411       jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 0xff46 - 0xff10);
8412 
8413       OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff19 - 0xff10);
8414       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8415 
8416       SET_CHAR_OFFSET(0xff21);
8417       OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff26 - 0xff21);
8418       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8419 
8420       SET_CHAR_OFFSET(0xff41);
8421       OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff46 - 0xff41);
8422       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8423 
8424       SET_CHAR_OFFSET(0xff10);
8425 
8426       JUMPHERE(jump);
8427       OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0);
8428       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8429       break;
8430 
8431       default:
8432       SLJIT_UNREACHABLE();
8433       break;
8434       }
8435     cc += 2;
8436     }
8437 #endif /* SUPPORT_UNICODE */
8438 
8439   if (jump != NULL)
8440     add_jump(compiler, compares > 0 ? list : backtracks, jump);
8441   }
8442 
8443 SLJIT_ASSERT(compares == 0);
8444 if (found != NULL)
8445   set_jumps(found, LABEL());
8446 }
8447 
8448 #undef SET_TYPE_OFFSET
8449 #undef SET_CHAR_OFFSET
8450 
8451 #endif
8452 
compile_simple_assertion_matchingpath(compiler_common * common,PCRE2_UCHAR type,PCRE2_SPTR cc,jump_list ** backtracks)8453 static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks)
8454 {
8455 DEFINE_COMPILER;
8456 struct sljit_jump *jump[4];
8457 
8458 switch(type)
8459   {
8460   case OP_SOD:
8461   if (HAS_VIRTUAL_REGISTERS)
8462     {
8463     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8464     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8465     }
8466   else
8467     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8468   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8469   return cc;
8470 
8471   case OP_SOM:
8472   if (HAS_VIRTUAL_REGISTERS)
8473     {
8474     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8475     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
8476     }
8477   else
8478     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
8479   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8480   return cc;
8481 
8482   case OP_NOT_WORD_BOUNDARY:
8483   case OP_WORD_BOUNDARY:
8484   case OP_NOT_UCP_WORD_BOUNDARY:
8485   case OP_UCP_WORD_BOUNDARY:
8486   add_jump(compiler, (type == OP_NOT_WORD_BOUNDARY || type == OP_WORD_BOUNDARY) ? &common->wordboundary : &common->ucp_wordboundary, JUMP(SLJIT_FAST_CALL));
8487 #ifdef SUPPORT_UNICODE
8488   if (common->invalid_utf)
8489     {
8490     add_jump(compiler, backtracks, CMP((type == OP_NOT_WORD_BOUNDARY || type == OP_NOT_UCP_WORD_BOUNDARY) ? SLJIT_NOT_EQUAL : SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8491     return cc;
8492     }
8493 #endif /* SUPPORT_UNICODE */
8494   sljit_set_current_flags(compiler, SLJIT_SET_Z);
8495   add_jump(compiler, backtracks, JUMP((type == OP_NOT_WORD_BOUNDARY || type == OP_NOT_UCP_WORD_BOUNDARY) ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8496   return cc;
8497 
8498   case OP_EODN:
8499   /* Requires rather complex checks. */
8500   jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
8501   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8502     {
8503     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8504     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8505     if (common->mode == PCRE2_JIT_COMPLETE)
8506       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8507     else
8508       {
8509       jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
8510       OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, STR_END, 0);
8511       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
8512       OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
8513       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
8514       add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
8515       check_partial(common, TRUE);
8516       add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8517       JUMPHERE(jump[1]);
8518       }
8519     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8520     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8521     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8522     }
8523   else if (common->nltype == NLTYPE_FIXED)
8524     {
8525     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8526     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8527     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8528     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
8529     }
8530   else
8531     {
8532     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8533     jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
8534     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8535     OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, TMP2, 0, STR_END, 0);
8536     jump[2] = JUMP(SLJIT_GREATER);
8537     add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
8538     /* Equal. */
8539     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8540     jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
8541     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8542 
8543     JUMPHERE(jump[1]);
8544     if (common->nltype == NLTYPE_ANYCRLF)
8545       {
8546       OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8547       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
8548       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
8549       }
8550     else
8551       {
8552       OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
8553       read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
8554       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
8555       add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
8556       sljit_set_current_flags(compiler, SLJIT_SET_Z);
8557       add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
8558       OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
8559       }
8560     JUMPHERE(jump[2]);
8561     JUMPHERE(jump[3]);
8562     }
8563   JUMPHERE(jump[0]);
8564   if (common->mode != PCRE2_JIT_COMPLETE)
8565     check_partial(common, TRUE);
8566   return cc;
8567 
8568   case OP_EOD:
8569   add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8570   if (common->mode != PCRE2_JIT_COMPLETE)
8571     check_partial(common, TRUE);
8572   return cc;
8573 
8574   case OP_DOLL:
8575   if (HAS_VIRTUAL_REGISTERS)
8576     {
8577     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8578     OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8579     }
8580   else
8581     OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8582   add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8583 
8584   if (!common->endonly)
8585     compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
8586   else
8587     {
8588     add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8589     check_partial(common, FALSE);
8590     }
8591   return cc;
8592 
8593   case OP_DOLLM:
8594   jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
8595   if (HAS_VIRTUAL_REGISTERS)
8596     {
8597     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8598     OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8599     }
8600   else
8601     OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8602   add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8603   check_partial(common, FALSE);
8604   jump[0] = JUMP(SLJIT_JUMP);
8605   JUMPHERE(jump[1]);
8606 
8607   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8608     {
8609     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8610     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8611     if (common->mode == PCRE2_JIT_COMPLETE)
8612       add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
8613     else
8614       {
8615       jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
8616       /* STR_PTR = STR_END - IN_UCHARS(1) */
8617       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8618       check_partial(common, TRUE);
8619       add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8620       JUMPHERE(jump[1]);
8621       }
8622 
8623     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8624     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8625     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8626     }
8627   else
8628     {
8629     peek_char(common, common->nlmax, TMP3, 0, NULL);
8630     check_newlinechar(common, common->nltype, backtracks, FALSE);
8631     }
8632   JUMPHERE(jump[0]);
8633   return cc;
8634 
8635   case OP_CIRC:
8636   if (HAS_VIRTUAL_REGISTERS)
8637     {
8638     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8639     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
8640     add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8641     OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8642     add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8643     }
8644   else
8645     {
8646     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8647     add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8648     OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8649     add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8650     }
8651   return cc;
8652 
8653   case OP_CIRCM:
8654   /* TMP2 might be used by peek_char_back. */
8655   if (HAS_VIRTUAL_REGISTERS)
8656     {
8657     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8658     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8659     jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8660     OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8661     }
8662   else
8663     {
8664     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8665     jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8666     OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8667     }
8668   add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8669   jump[0] = JUMP(SLJIT_JUMP);
8670   JUMPHERE(jump[1]);
8671 
8672   if (!common->alt_circumflex)
8673     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8674 
8675   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8676     {
8677     OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8678     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, TMP2, 0));
8679     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
8680     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
8681     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8682     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8683     }
8684   else
8685     {
8686     peek_char_back(common, common->nlmax, backtracks);
8687     check_newlinechar(common, common->nltype, backtracks, FALSE);
8688     }
8689   JUMPHERE(jump[0]);
8690   return cc;
8691   }
8692 SLJIT_UNREACHABLE();
8693 return cc;
8694 }
8695 
8696 #ifdef SUPPORT_UNICODE
8697 
8698 #if PCRE2_CODE_UNIT_WIDTH != 32
8699 
8700 /* The code in this function copies the logic of the interpreter function that
8701 is defined in the pcre2_extuni.c source. If that code is updated, this
8702 function, and those below it, must be kept in step (note by PH, June 2024). */
8703 
do_extuni_utf(jit_arguments * args,PCRE2_SPTR cc)8704 static PCRE2_SPTR SLJIT_FUNC do_extuni_utf(jit_arguments *args, PCRE2_SPTR cc)
8705 {
8706 PCRE2_SPTR start_subject = args->begin;
8707 PCRE2_SPTR end_subject = args->end;
8708 int lgb, rgb, ricount;
8709 PCRE2_SPTR prevcc, endcc, bptr;
8710 BOOL first = TRUE;
8711 BOOL was_ep_ZWJ = FALSE;
8712 uint32_t c;
8713 
8714 prevcc = cc;
8715 endcc = NULL;
8716 do
8717   {
8718   GETCHARINC(c, cc);
8719   rgb = UCD_GRAPHBREAK(c);
8720 
8721   if (first)
8722     {
8723     lgb = rgb;
8724     endcc = cc;
8725     first = FALSE;
8726     continue;
8727     }
8728 
8729   if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8730     break;
8731 
8732   /* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was
8733   preceded by Extended Pictographic. */
8734 
8735   if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ)
8736     break;
8737 
8738   /* Not breaking between Regional Indicators is allowed only if there
8739   are an even number of preceding RIs. */
8740 
8741   if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
8742     {
8743     ricount = 0;
8744     bptr = prevcc;
8745 
8746     /* bptr is pointing to the left-hand character */
8747     while (bptr > start_subject)
8748       {
8749       bptr--;
8750       BACKCHAR(bptr);
8751       GETCHAR(c, bptr);
8752 
8753       if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator)
8754         break;
8755 
8756       ricount++;
8757       }
8758 
8759     if ((ricount & 1) != 0) break;  /* Grapheme break required */
8760     }
8761 
8762   /* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in
8763   between; see next statement). */
8764 
8765   was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ);
8766 
8767   /* If Extend follows Extended_Pictographic, do not update lgb; this allows
8768   any number of them before a following ZWJ. */
8769 
8770   if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic)
8771     lgb = rgb;
8772 
8773   prevcc = endcc;
8774   endcc = cc;
8775   }
8776 while (cc < end_subject);
8777 
8778 return endcc;
8779 }
8780 
8781 #endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
8782 
8783 /* The code in this function copies the logic of the interpreter function that
8784 is defined in the pcre2_extuni.c source. If that code is updated, this
8785 function, and the one below it, must be kept in step (note by PH, June 2024). */
8786 
do_extuni_utf_invalid(jit_arguments * args,PCRE2_SPTR cc)8787 static PCRE2_SPTR SLJIT_FUNC do_extuni_utf_invalid(jit_arguments *args, PCRE2_SPTR cc)
8788 {
8789 PCRE2_SPTR start_subject = args->begin;
8790 PCRE2_SPTR end_subject = args->end;
8791 int lgb, rgb, ricount;
8792 PCRE2_SPTR prevcc, endcc, bptr;
8793 BOOL first = TRUE;
8794 BOOL was_ep_ZWJ = FALSE;
8795 uint32_t c;
8796 
8797 prevcc = cc;
8798 endcc = NULL;
8799 do
8800   {
8801   GETCHARINC_INVALID(c, cc, end_subject, break);
8802   rgb = UCD_GRAPHBREAK(c);
8803 
8804   if (first)
8805     {
8806     lgb = rgb;
8807     endcc = cc;
8808     first = FALSE;
8809     continue;
8810     }
8811 
8812   if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8813     break;
8814 
8815   /* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was
8816   preceded by Extended Pictographic. */
8817 
8818   if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ)
8819     break;
8820 
8821   /* Not breaking between Regional Indicators is allowed only if there
8822   are an even number of preceding RIs. */
8823 
8824   if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
8825     {
8826     ricount = 0;
8827     bptr = prevcc;
8828 
8829     /* bptr is pointing to the left-hand character */
8830     while (bptr > start_subject)
8831       {
8832       GETCHARBACK_INVALID(c, bptr, start_subject, break);
8833 
8834       if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator)
8835         break;
8836 
8837       ricount++;
8838       }
8839 
8840     if ((ricount & 1) != 0)
8841       break;  /* Grapheme break required */
8842     }
8843 
8844   /* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in
8845   between; see next statement). */
8846 
8847   was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ);
8848 
8849   /* If Extend follows Extended_Pictographic, do not update lgb; this allows
8850   any number of them before a following ZWJ. */
8851 
8852   if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic)
8853     lgb = rgb;
8854 
8855   prevcc = endcc;
8856   endcc = cc;
8857   }
8858 while (cc < end_subject);
8859 
8860 return endcc;
8861 }
8862 
8863 /* The code in this function copies the logic of the interpreter function that
8864 is defined in the pcre2_extuni.c source. If that code is updated, this
8865 function must be kept in step (note by PH, June 2024). */
8866 
do_extuni_no_utf(jit_arguments * args,PCRE2_SPTR cc)8867 static PCRE2_SPTR SLJIT_FUNC do_extuni_no_utf(jit_arguments *args, PCRE2_SPTR cc)
8868 {
8869 PCRE2_SPTR start_subject = args->begin;
8870 PCRE2_SPTR end_subject = args->end;
8871 int lgb, rgb, ricount;
8872 PCRE2_SPTR bptr;
8873 uint32_t c;
8874 BOOL was_ep_ZWJ = FALSE;
8875 
8876 /* Patch by PH */
8877 /* GETCHARINC(c, cc); */
8878 c = *cc++;
8879 
8880 #if PCRE2_CODE_UNIT_WIDTH == 32
8881 if (c >= 0x110000)
8882   return cc;
8883 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8884 lgb = UCD_GRAPHBREAK(c);
8885 
8886 while (cc < end_subject)
8887   {
8888   c = *cc;
8889 #if PCRE2_CODE_UNIT_WIDTH == 32
8890   if (c >= 0x110000)
8891     break;
8892 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8893   rgb = UCD_GRAPHBREAK(c);
8894 
8895   if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8896     break;
8897 
8898   /* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was
8899   preceded by Extended Pictographic. */
8900 
8901   if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ)
8902     break;
8903 
8904   /* Not breaking between Regional Indicators is allowed only if there
8905   are an even number of preceding RIs. */
8906 
8907   if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
8908     {
8909     ricount = 0;
8910     bptr = cc - 1;
8911 
8912     /* bptr is pointing to the left-hand character */
8913     while (bptr > start_subject)
8914       {
8915       bptr--;
8916       c = *bptr;
8917 #if PCRE2_CODE_UNIT_WIDTH == 32
8918       if (c >= 0x110000)
8919         break;
8920 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8921 
8922       if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break;
8923 
8924       ricount++;
8925       }
8926 
8927     if ((ricount & 1) != 0)
8928       break;  /* Grapheme break required */
8929     }
8930 
8931   /* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in
8932   between; see next statement). */
8933 
8934   was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ);
8935 
8936   /* If Extend follows Extended_Pictographic, do not update lgb; this allows
8937   any number of them before a following ZWJ. */
8938 
8939   if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic)
8940     lgb = rgb;
8941 
8942   cc++;
8943   }
8944 
8945 return cc;
8946 }
8947 
8948 #endif /* SUPPORT_UNICODE */
8949 
compile_char1_matchingpath(compiler_common * common,PCRE2_UCHAR type,PCRE2_SPTR cc,jump_list ** backtracks,BOOL check_str_ptr)8950 static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr)
8951 {
8952 DEFINE_COMPILER;
8953 int length;
8954 unsigned int c, oc, bit;
8955 compare_context context;
8956 struct sljit_jump *jump[3];
8957 jump_list *end_list;
8958 #ifdef SUPPORT_UNICODE
8959 PCRE2_UCHAR propdata[5];
8960 #endif /* SUPPORT_UNICODE */
8961 
8962 switch(type)
8963   {
8964   case OP_NOT_DIGIT:
8965   case OP_DIGIT:
8966   /* Digits are usually 0-9, so it is worth to optimize them. */
8967   if (check_str_ptr)
8968     detect_partial_match(common, backtracks);
8969 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8970   if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_digit, FALSE))
8971     read_char7_type(common, backtracks, type == OP_NOT_DIGIT);
8972   else
8973 #endif
8974     read_char8_type(common, backtracks, type == OP_NOT_DIGIT);
8975     /* Flip the starting bit in the negative case. */
8976   OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_digit);
8977   add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8978   return cc;
8979 
8980   case OP_NOT_WHITESPACE:
8981   case OP_WHITESPACE:
8982   if (check_str_ptr)
8983     detect_partial_match(common, backtracks);
8984 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8985   if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_space, FALSE))
8986     read_char7_type(common, backtracks, type == OP_NOT_WHITESPACE);
8987   else
8988 #endif
8989     read_char8_type(common, backtracks, type == OP_NOT_WHITESPACE);
8990   OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_space);
8991   add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8992   return cc;
8993 
8994   case OP_NOT_WORDCHAR:
8995   case OP_WORDCHAR:
8996   if (check_str_ptr)
8997     detect_partial_match(common, backtracks);
8998 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8999   if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_word, FALSE))
9000     read_char7_type(common, backtracks, type == OP_NOT_WORDCHAR);
9001   else
9002 #endif
9003     read_char8_type(common, backtracks, type == OP_NOT_WORDCHAR);
9004   OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_word);
9005   add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
9006   return cc;
9007 
9008   case OP_ANY:
9009   if (check_str_ptr)
9010     detect_partial_match(common, backtracks);
9011   read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
9012   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
9013     {
9014     jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
9015     end_list = NULL;
9016     if (common->mode != PCRE2_JIT_PARTIAL_HARD)
9017       add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
9018     else
9019       check_str_end(common, &end_list);
9020 
9021     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
9022     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
9023     set_jumps(end_list, LABEL());
9024     JUMPHERE(jump[0]);
9025     }
9026   else
9027     check_newlinechar(common, common->nltype, backtracks, TRUE);
9028   return cc;
9029 
9030   case OP_ALLANY:
9031   if (check_str_ptr)
9032     detect_partial_match(common, backtracks);
9033 #ifdef SUPPORT_UNICODE
9034   if (common->utf && common->invalid_utf)
9035     {
9036     read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR);
9037     return cc;
9038     }
9039 #endif /* SUPPORT_UNICODE */
9040 
9041   skip_valid_char(common);
9042   return cc;
9043 
9044   case OP_ANYBYTE:
9045   if (check_str_ptr)
9046     detect_partial_match(common, backtracks);
9047   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9048   return cc;
9049 
9050 #ifdef SUPPORT_UNICODE
9051   case OP_NOTPROP:
9052   case OP_PROP:
9053   propdata[0] = XCL_HASPROP;
9054   propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
9055   propdata[2] = cc[0];
9056   propdata[3] = cc[1];
9057   propdata[4] = XCL_END;
9058   if (check_str_ptr)
9059     detect_partial_match(common, backtracks);
9060   compile_xclass_matchingpath(common, propdata, backtracks);
9061   return cc + 2;
9062 #endif
9063 
9064   case OP_ANYNL:
9065   if (check_str_ptr)
9066     detect_partial_match(common, backtracks);
9067   read_char(common, common->bsr_nlmin, common->bsr_nlmax, NULL, 0);
9068   jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
9069   /* We don't need to handle soft partial matching case. */
9070   end_list = NULL;
9071   if (common->mode != PCRE2_JIT_PARTIAL_HARD)
9072     add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
9073   else
9074     check_str_end(common, &end_list);
9075   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
9076   jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
9077   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9078   jump[2] = JUMP(SLJIT_JUMP);
9079   JUMPHERE(jump[0]);
9080   check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
9081   set_jumps(end_list, LABEL());
9082   JUMPHERE(jump[1]);
9083   JUMPHERE(jump[2]);
9084   return cc;
9085 
9086   case OP_NOT_HSPACE:
9087   case OP_HSPACE:
9088   if (check_str_ptr)
9089     detect_partial_match(common, backtracks);
9090 
9091   if (type == OP_NOT_HSPACE)
9092     read_char(common, 0x9, 0x3000, backtracks, READ_CHAR_UPDATE_STR_PTR);
9093   else
9094     read_char(common, 0x9, 0x3000, NULL, 0);
9095 
9096   add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
9097   sljit_set_current_flags(compiler, SLJIT_SET_Z);
9098   add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
9099   return cc;
9100 
9101   case OP_NOT_VSPACE:
9102   case OP_VSPACE:
9103   if (check_str_ptr)
9104     detect_partial_match(common, backtracks);
9105 
9106   if (type == OP_NOT_VSPACE)
9107     read_char(common, 0xa, 0x2029, backtracks, READ_CHAR_UPDATE_STR_PTR);
9108   else
9109     read_char(common, 0xa, 0x2029, NULL, 0);
9110 
9111   add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
9112   sljit_set_current_flags(compiler, SLJIT_SET_Z);
9113   add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
9114   return cc;
9115 
9116 #ifdef SUPPORT_UNICODE
9117   case OP_EXTUNI:
9118   if (check_str_ptr)
9119     detect_partial_match(common, backtracks);
9120 
9121   SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
9122   OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
9123 
9124 #if PCRE2_CODE_UNIT_WIDTH != 32
9125   sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
9126     common->utf ? (common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_utf)) : SLJIT_FUNC_ADDR(do_extuni_no_utf));
9127   if (common->invalid_utf)
9128     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
9129 #else
9130   sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
9131     common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_no_utf));
9132   if (common->invalid_utf)
9133     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
9134 #endif
9135 
9136   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
9137 
9138   if (common->mode == PCRE2_JIT_PARTIAL_HARD)
9139     {
9140     jump[0] = CMP(SLJIT_LESS, SLJIT_RETURN_REG, 0, STR_END, 0);
9141     /* Since we successfully read a char above, partial matching must occure. */
9142     check_partial(common, TRUE);
9143     JUMPHERE(jump[0]);
9144     }
9145   return cc;
9146 #endif
9147 
9148   case OP_CHAR:
9149   case OP_CHARI:
9150   length = 1;
9151 #ifdef SUPPORT_UNICODE
9152   if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
9153 #endif
9154 
9155   if (check_str_ptr && common->mode != PCRE2_JIT_COMPLETE)
9156     detect_partial_match(common, backtracks);
9157 
9158   if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)
9159     {
9160     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
9161     if (length > 1 || (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE))
9162       add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
9163 
9164     context.length = IN_UCHARS(length);
9165     context.sourcereg = -1;
9166 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
9167     context.ucharptr = 0;
9168 #endif
9169     return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
9170     }
9171 
9172 #ifdef SUPPORT_UNICODE
9173   if (common->utf)
9174     {
9175     GETCHAR(c, cc);
9176     }
9177   else
9178 #endif
9179     c = *cc;
9180 
9181   SLJIT_ASSERT(type == OP_CHARI && char_has_othercase(common, cc));
9182 
9183   if (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE)
9184     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
9185 
9186   oc = char_othercase(common, c);
9187   read_char(common, c < oc ? c : oc, c > oc ? c : oc, NULL, 0);
9188 
9189   SLJIT_ASSERT(!is_powerof2(c ^ oc));
9190 
9191   if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
9192     {
9193     OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, oc);
9194     SELECT(SLJIT_EQUAL, TMP1, SLJIT_IMM, c, TMP1);
9195     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9196     }
9197   else
9198     {
9199     jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
9200     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
9201     JUMPHERE(jump[0]);
9202     }
9203   return cc + length;
9204 
9205   case OP_NOT:
9206   case OP_NOTI:
9207   if (check_str_ptr)
9208     detect_partial_match(common, backtracks);
9209 
9210   length = 1;
9211 #ifdef SUPPORT_UNICODE
9212   if (common->utf)
9213     {
9214 #if PCRE2_CODE_UNIT_WIDTH == 8
9215     c = *cc;
9216     if (c < 128 && !common->invalid_utf)
9217       {
9218       OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
9219       if (type == OP_NOT || !char_has_othercase(common, cc))
9220         add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9221       else
9222         {
9223         /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
9224         OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
9225         add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
9226         }
9227       /* Skip the variable-length character. */
9228       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9229       jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
9230       OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
9231       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
9232       JUMPHERE(jump[0]);
9233       return cc + 1;
9234       }
9235     else
9236 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
9237       {
9238       GETCHARLEN(c, cc, length);
9239       }
9240     }
9241   else
9242 #endif /* SUPPORT_UNICODE */
9243     c = *cc;
9244 
9245   if (type == OP_NOT || !char_has_othercase(common, cc))
9246     {
9247     read_char(common, c, c, backtracks, READ_CHAR_UPDATE_STR_PTR);
9248     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9249     }
9250   else
9251     {
9252     oc = char_othercase(common, c);
9253     read_char(common, c < oc ? c : oc, c > oc ? c : oc, backtracks, READ_CHAR_UPDATE_STR_PTR);
9254     bit = c ^ oc;
9255     if (is_powerof2(bit))
9256       {
9257       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
9258       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
9259       }
9260     else
9261       {
9262       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9263       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
9264       }
9265     }
9266   return cc + length;
9267 
9268   case OP_CLASS:
9269   case OP_NCLASS:
9270   if (check_str_ptr)
9271     detect_partial_match(common, backtracks);
9272 
9273 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
9274   bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
9275   if (type == OP_NCLASS)
9276     read_char(common, 0, bit, backtracks, READ_CHAR_UPDATE_STR_PTR);
9277   else
9278     read_char(common, 0, bit, NULL, 0);
9279 #else
9280   if (type == OP_NCLASS)
9281     read_char(common, 0, 255, backtracks, READ_CHAR_UPDATE_STR_PTR);
9282   else
9283     read_char(common, 0, 255, NULL, 0);
9284 #endif
9285 
9286   if (optimize_class(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
9287     return cc + 32 / sizeof(PCRE2_UCHAR);
9288 
9289 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
9290   jump[0] = NULL;
9291   if (common->utf)
9292     {
9293     jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
9294     if (type == OP_CLASS)
9295       {
9296       add_jump(compiler, backtracks, jump[0]);
9297       jump[0] = NULL;
9298       }
9299     }
9300 #elif PCRE2_CODE_UNIT_WIDTH != 8
9301   jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
9302   if (type == OP_CLASS)
9303     {
9304     add_jump(compiler, backtracks, jump[0]);
9305     jump[0] = NULL;
9306     }
9307 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
9308 
9309   OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
9310   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
9311   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
9312   OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
9313   OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
9314   add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
9315 
9316 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
9317   if (jump[0] != NULL)
9318     JUMPHERE(jump[0]);
9319 #endif
9320   return cc + 32 / sizeof(PCRE2_UCHAR);
9321 
9322 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
9323   case OP_XCLASS:
9324   if (check_str_ptr)
9325     detect_partial_match(common, backtracks);
9326   compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
9327   return cc + GET(cc, 0) - 1;
9328 #endif
9329   }
9330 SLJIT_UNREACHABLE();
9331 return cc;
9332 }
9333 
compile_charn_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,jump_list ** backtracks)9334 static SLJIT_INLINE PCRE2_SPTR compile_charn_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, jump_list **backtracks)
9335 {
9336 /* This function consumes at least one input character. */
9337 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
9338 DEFINE_COMPILER;
9339 PCRE2_SPTR ccbegin = cc;
9340 compare_context context;
9341 int size;
9342 
9343 context.length = 0;
9344 do
9345   {
9346   if (cc >= ccend)
9347     break;
9348 
9349   if (*cc == OP_CHAR)
9350     {
9351     size = 1;
9352 #ifdef SUPPORT_UNICODE
9353     if (common->utf && HAS_EXTRALEN(cc[1]))
9354       size += GET_EXTRALEN(cc[1]);
9355 #endif
9356     }
9357   else if (*cc == OP_CHARI)
9358     {
9359     size = 1;
9360 #ifdef SUPPORT_UNICODE
9361     if (common->utf)
9362       {
9363       if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
9364         size = 0;
9365       else if (HAS_EXTRALEN(cc[1]))
9366         size += GET_EXTRALEN(cc[1]);
9367       }
9368     else
9369 #endif
9370     if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
9371       size = 0;
9372     }
9373   else
9374     size = 0;
9375 
9376   cc += 1 + size;
9377   context.length += IN_UCHARS(size);
9378   }
9379 while (size > 0 && context.length <= 128);
9380 
9381 cc = ccbegin;
9382 if (context.length > 0)
9383   {
9384   /* We have a fixed-length byte sequence. */
9385   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
9386   add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
9387 
9388   context.sourcereg = -1;
9389 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
9390   context.ucharptr = 0;
9391 #endif
9392   do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
9393   return cc;
9394   }
9395 
9396 /* A non-fixed length character will be checked if length == 0. */
9397 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
9398 }
9399 
9400 /* Forward definitions. */
9401 static void compile_matchingpath(compiler_common *, PCRE2_SPTR, PCRE2_SPTR, backtrack_common *);
9402 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
9403 
9404 #define PUSH_BACKTRACK(size, ccstart, error) \
9405   do \
9406     { \
9407     backtrack = sljit_alloc_memory(compiler, (size)); \
9408     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9409       return error; \
9410     memset(backtrack, 0, size); \
9411     backtrack->prev = parent->top; \
9412     backtrack->cc = (ccstart); \
9413     parent->top = backtrack; \
9414     } \
9415   while (0)
9416 
9417 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
9418   do \
9419     { \
9420     backtrack = sljit_alloc_memory(compiler, (size)); \
9421     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9422       return; \
9423     memset(backtrack, 0, size); \
9424     backtrack->prev = parent->top; \
9425     backtrack->cc = (ccstart); \
9426     parent->top = backtrack; \
9427     } \
9428   while (0)
9429 
9430 #define BACKTRACK_AS(type) ((type *)backtrack)
9431 
compile_dnref_search(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks)9432 static void compile_dnref_search(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
9433 {
9434 /* The OVECTOR offset goes to TMP2. */
9435 DEFINE_COMPILER;
9436 int count = GET2(cc, 1 + IMM2_SIZE);
9437 PCRE2_SPTR slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
9438 unsigned int offset;
9439 jump_list *found = NULL;
9440 
9441 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
9442 
9443 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
9444 
9445 count--;
9446 while (count-- > 0)
9447   {
9448   offset = GET2(slot, 0) << 1;
9449   GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9450   add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9451   slot += common->name_entry_size;
9452   }
9453 
9454 offset = GET2(slot, 0) << 1;
9455 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9456 if (backtracks != NULL && !common->unset_backref)
9457   add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9458 
9459 set_jumps(found, LABEL());
9460 }
9461 
compile_ref_matchingpath(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks,BOOL withchecks,BOOL emptyfail)9462 static void compile_ref_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
9463 {
9464 DEFINE_COMPILER;
9465 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9466 int offset = 0;
9467 struct sljit_jump *jump = NULL;
9468 struct sljit_jump *partial;
9469 struct sljit_jump *nopartial;
9470 #if defined SUPPORT_UNICODE
9471 struct sljit_label *loop;
9472 struct sljit_label *caseless_loop;
9473 jump_list *no_match = NULL;
9474 int source_reg = COUNT_MATCH;
9475 int source_end_reg = ARGUMENTS;
9476 int char1_reg = STACK_LIMIT;
9477 #endif /* SUPPORT_UNICODE */
9478 
9479 if (ref)
9480   {
9481   offset = GET2(cc, 1) << 1;
9482   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9483   /* OVECTOR(1) contains the "string begin - 1" constant. */
9484   if (withchecks && !common->unset_backref)
9485     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9486   }
9487 else
9488   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9489 
9490 #if defined SUPPORT_UNICODE
9491 if (common->utf && *cc == OP_REFI)
9492   {
9493   SLJIT_ASSERT(common->iref_ptr != 0);
9494 
9495   if (ref)
9496     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9497   else
9498     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9499 
9500   if (withchecks && emptyfail)
9501     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0));
9502 
9503   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr, source_reg, 0);
9504   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw), source_end_reg, 0);
9505   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2, char1_reg, 0);
9506 
9507   OP1(SLJIT_MOV, source_reg, 0, TMP1, 0);
9508   OP1(SLJIT_MOV, source_end_reg, 0, TMP2, 0);
9509 
9510   loop = LABEL();
9511   jump = CMP(SLJIT_GREATER_EQUAL, source_reg, 0, source_end_reg, 0);
9512   partial = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
9513 
9514   /* Read original character. It must be a valid UTF character. */
9515   OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
9516   OP1(SLJIT_MOV, STR_PTR, 0, source_reg, 0);
9517 
9518   read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR | READ_CHAR_VALID_UTF);
9519 
9520   OP1(SLJIT_MOV, source_reg, 0, STR_PTR, 0);
9521   OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
9522   OP1(SLJIT_MOV, char1_reg, 0, TMP1, 0);
9523 
9524   /* Read second character. */
9525   read_char(common, 0, READ_CHAR_MAX, &no_match, READ_CHAR_UPDATE_STR_PTR);
9526 
9527   CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9528 
9529   OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
9530 
9531   add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
9532 
9533   OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
9534   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
9535   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
9536 
9537   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records));
9538 
9539   OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, other_case));
9540   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, caseset));
9541   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
9542   CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9543 
9544   add_jump(compiler, &no_match, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9545   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
9546   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_caseless_sets));
9547 
9548   caseless_loop = LABEL();
9549   OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9550   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(uint32_t));
9551   OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, TMP1, 0, char1_reg, 0);
9552   JUMPTO(SLJIT_EQUAL, loop);
9553   JUMPTO(SLJIT_LESS, caseless_loop);
9554 
9555   set_jumps(no_match, LABEL());
9556   if (common->mode == PCRE2_JIT_COMPLETE)
9557     JUMPHERE(partial);
9558 
9559   OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9560   OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9561   OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9562   add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9563 
9564   if (common->mode != PCRE2_JIT_COMPLETE)
9565     {
9566     JUMPHERE(partial);
9567     OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9568     OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9569     OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9570 
9571     check_partial(common, FALSE);
9572     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9573     }
9574 
9575   JUMPHERE(jump);
9576   OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9577   OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9578   OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9579   return;
9580   }
9581 else
9582 #endif /* SUPPORT_UNICODE */
9583   {
9584   if (ref)
9585     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
9586   else
9587     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
9588 
9589   if (withchecks)
9590     jump = JUMP(SLJIT_ZERO);
9591 
9592   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
9593   partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
9594   if (common->mode == PCRE2_JIT_COMPLETE)
9595     add_jump(compiler, backtracks, partial);
9596 
9597   add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9598   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9599 
9600   if (common->mode != PCRE2_JIT_COMPLETE)
9601     {
9602     nopartial = JUMP(SLJIT_JUMP);
9603     JUMPHERE(partial);
9604     /* TMP2 -= STR_END - STR_PTR */
9605     OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
9606     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
9607     partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
9608     OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
9609     add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9610     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9611     JUMPHERE(partial);
9612     check_partial(common, FALSE);
9613     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9614     JUMPHERE(nopartial);
9615     }
9616   }
9617 
9618 if (jump != NULL)
9619   {
9620   if (emptyfail)
9621     add_jump(compiler, backtracks, jump);
9622   else
9623     JUMPHERE(jump);
9624   }
9625 }
9626 
compile_ref_iterator_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9627 static SLJIT_INLINE PCRE2_SPTR compile_ref_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9628 {
9629 DEFINE_COMPILER;
9630 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9631 backtrack_common *backtrack;
9632 PCRE2_UCHAR type;
9633 int offset = 0;
9634 struct sljit_label *label;
9635 struct sljit_jump *zerolength;
9636 struct sljit_jump *jump = NULL;
9637 PCRE2_SPTR ccbegin = cc;
9638 int min = 0, max = 0;
9639 BOOL minimize;
9640 
9641 PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
9642 
9643 if (ref)
9644   offset = GET2(cc, 1) << 1;
9645 else
9646   cc += IMM2_SIZE;
9647 type = cc[1 + IMM2_SIZE];
9648 
9649 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
9650 minimize = (type & 0x1) != 0;
9651 switch(type)
9652   {
9653   case OP_CRSTAR:
9654   case OP_CRMINSTAR:
9655   min = 0;
9656   max = 0;
9657   cc += 1 + IMM2_SIZE + 1;
9658   break;
9659   case OP_CRPLUS:
9660   case OP_CRMINPLUS:
9661   min = 1;
9662   max = 0;
9663   cc += 1 + IMM2_SIZE + 1;
9664   break;
9665   case OP_CRQUERY:
9666   case OP_CRMINQUERY:
9667   min = 0;
9668   max = 1;
9669   cc += 1 + IMM2_SIZE + 1;
9670   break;
9671   case OP_CRRANGE:
9672   case OP_CRMINRANGE:
9673   min = GET2(cc, 1 + IMM2_SIZE + 1);
9674   max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
9675   cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
9676   break;
9677   default:
9678   SLJIT_UNREACHABLE();
9679   break;
9680   }
9681 
9682 if (!minimize)
9683   {
9684   if (min == 0)
9685     {
9686     allocate_stack(common, 2);
9687     if (ref)
9688       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9689     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9690     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9691     /* Temporary release of STR_PTR. */
9692     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9693     /* Handles both invalid and empty cases. Since the minimum repeat,
9694     is zero the invalid case is basically the same as an empty case. */
9695     if (ref)
9696       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9697     else
9698       {
9699       compile_dnref_search(common, ccbegin, NULL);
9700       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9701       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9702       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9703       }
9704     /* Restore if not zero length. */
9705     OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9706     }
9707   else
9708     {
9709     allocate_stack(common, 1);
9710     if (ref)
9711       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9712     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9713 
9714     if (ref)
9715       {
9716       if (!common->unset_backref)
9717         add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9718       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9719       }
9720     else
9721       {
9722       compile_dnref_search(common, ccbegin, &backtrack->own_backtracks);
9723       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9724       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9725       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9726       }
9727     }
9728 
9729   if (min > 1 || max > 1)
9730     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
9731 
9732   label = LABEL();
9733   if (!ref)
9734     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
9735   compile_ref_matchingpath(common, ccbegin, &backtrack->own_backtracks, FALSE, FALSE);
9736 
9737   if (min > 1 || max > 1)
9738     {
9739     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
9740     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9741     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
9742     if (min > 1)
9743       CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
9744     if (max > 1)
9745       {
9746       jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
9747       allocate_stack(common, 1);
9748       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9749       JUMPTO(SLJIT_JUMP, label);
9750       JUMPHERE(jump);
9751       }
9752     }
9753 
9754   if (max == 0)
9755     {
9756     /* Includes min > 1 case as well. */
9757     allocate_stack(common, 1);
9758     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9759     JUMPTO(SLJIT_JUMP, label);
9760     }
9761 
9762   JUMPHERE(zerolength);
9763   BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9764 
9765   count_match(common);
9766   return cc;
9767   }
9768 
9769 allocate_stack(common, ref ? 2 : 3);
9770 if (ref)
9771   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9772 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9773 if (type != OP_CRMINSTAR)
9774   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9775 
9776 if (min == 0)
9777   {
9778   /* Handles both invalid and empty cases. Since the minimum repeat,
9779   is zero the invalid case is basically the same as an empty case. */
9780   if (ref)
9781     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9782   else
9783     {
9784     compile_dnref_search(common, ccbegin, NULL);
9785     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9786     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9787     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9788     }
9789   /* Length is non-zero, we can match real repeats. */
9790   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9791   jump = JUMP(SLJIT_JUMP);
9792   }
9793 else
9794   {
9795   if (ref)
9796     {
9797     if (!common->unset_backref)
9798       add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9799     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9800     }
9801   else
9802     {
9803     compile_dnref_search(common, ccbegin, &backtrack->own_backtracks);
9804     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9805     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9806     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9807     }
9808   }
9809 
9810 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9811 if (max > 0)
9812   add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
9813 
9814 if (!ref)
9815   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
9816 compile_ref_matchingpath(common, ccbegin, &backtrack->own_backtracks, TRUE, TRUE);
9817 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9818 
9819 if (min > 1)
9820   {
9821   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9822   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9823   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9824   CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
9825   }
9826 else if (max > 0)
9827   OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
9828 
9829 if (jump != NULL)
9830   JUMPHERE(jump);
9831 JUMPHERE(zerolength);
9832 
9833 count_match(common);
9834 return cc;
9835 }
9836 
compile_recurse_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9837 static SLJIT_INLINE PCRE2_SPTR compile_recurse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9838 {
9839 DEFINE_COMPILER;
9840 backtrack_common *backtrack;
9841 recurse_entry *entry = common->entries;
9842 recurse_entry *prev = NULL;
9843 sljit_sw start = GET(cc, 1);
9844 PCRE2_SPTR start_cc;
9845 BOOL needs_control_head;
9846 
9847 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
9848 
9849 /* Inlining simple patterns. */
9850 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
9851   {
9852   start_cc = common->start + start;
9853   compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
9854   BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
9855   return cc + 1 + LINK_SIZE;
9856   }
9857 
9858 while (entry != NULL)
9859   {
9860   if (entry->start == start)
9861     break;
9862   prev = entry;
9863   entry = entry->next;
9864   }
9865 
9866 if (entry == NULL)
9867   {
9868   entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
9869   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9870     return NULL;
9871   entry->next = NULL;
9872   entry->entry_label = NULL;
9873   entry->backtrack_label = NULL;
9874   entry->entry_calls = NULL;
9875   entry->backtrack_calls = NULL;
9876   entry->start = start;
9877 
9878   if (prev != NULL)
9879     prev->next = entry;
9880   else
9881     common->entries = entry;
9882   }
9883 
9884 BACKTRACK_AS(recurse_backtrack)->entry = entry;
9885 
9886 if (entry->entry_label == NULL)
9887   add_jump(compiler, &entry->entry_calls, JUMP(SLJIT_FAST_CALL));
9888 else
9889   JUMPTO(SLJIT_FAST_CALL, entry->entry_label);
9890 /* Leave if the match is failed. */
9891 add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
9892 BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL();
9893 return cc + 1 + LINK_SIZE;
9894 }
9895 
do_callout_jit(struct jit_arguments * arguments,pcre2_callout_block * callout_block,PCRE2_SPTR * jit_ovector)9896 static sljit_s32 SLJIT_FUNC do_callout_jit(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
9897 {
9898 PCRE2_SPTR begin;
9899 PCRE2_SIZE *ovector;
9900 sljit_u32 oveccount, capture_top;
9901 
9902 if (arguments->callout == NULL)
9903   return 0;
9904 
9905 SLJIT_COMPILE_ASSERT(sizeof (PCRE2_SIZE) <= sizeof (sljit_sw), pcre2_size_must_be_lower_than_sljit_sw_size);
9906 
9907 begin = arguments->begin;
9908 ovector = (PCRE2_SIZE*)(callout_block + 1);
9909 oveccount = callout_block->capture_top;
9910 
9911 SLJIT_ASSERT(oveccount >= 1);
9912 
9913 callout_block->version = 2;
9914 callout_block->callout_flags = 0;
9915 
9916 /* Offsets in subject. */
9917 callout_block->subject_length = arguments->end - arguments->begin;
9918 callout_block->start_match = jit_ovector[0] - begin;
9919 callout_block->current_position = (PCRE2_SPTR)callout_block->offset_vector - begin;
9920 callout_block->subject = begin;
9921 
9922 /* Convert and copy the JIT offset vector to the ovector array. */
9923 callout_block->capture_top = 1;
9924 callout_block->offset_vector = ovector;
9925 
9926 ovector[0] = PCRE2_UNSET;
9927 ovector[1] = PCRE2_UNSET;
9928 ovector += 2;
9929 jit_ovector += 2;
9930 capture_top = 1;
9931 
9932 /* Convert pointers to sizes. */
9933 while (--oveccount != 0)
9934   {
9935   capture_top++;
9936 
9937   ovector[0] = (PCRE2_SIZE)(jit_ovector[0] - begin);
9938   ovector[1] = (PCRE2_SIZE)(jit_ovector[1] - begin);
9939 
9940   if (ovector[0] != PCRE2_UNSET)
9941     callout_block->capture_top = capture_top;
9942 
9943   ovector += 2;
9944   jit_ovector += 2;
9945   }
9946 
9947 return (arguments->callout)(callout_block, arguments->callout_data);
9948 }
9949 
9950 #define CALLOUT_ARG_OFFSET(arg) \
9951     SLJIT_OFFSETOF(pcre2_callout_block, arg)
9952 
compile_callout_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9953 static SLJIT_INLINE PCRE2_SPTR compile_callout_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9954 {
9955 DEFINE_COMPILER;
9956 backtrack_common *backtrack;
9957 sljit_s32 mov_opcode;
9958 unsigned int callout_length = (*cc == OP_CALLOUT)
9959     ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);
9960 sljit_sw value1;
9961 sljit_sw value2;
9962 sljit_sw value3;
9963 sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * SSIZE_OF(sw);
9964 
9965 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
9966 
9967 callout_arg_size = (sizeof(pcre2_callout_block) + callout_arg_size + sizeof(sljit_sw) - 1) / sizeof(sljit_sw);
9968 
9969 allocate_stack(common, callout_arg_size);
9970 
9971 SLJIT_ASSERT(common->capture_last_ptr != 0);
9972 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
9973 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9974 value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0;
9975 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1);
9976 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
9977 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_top), SLJIT_IMM, common->re->top_bracket + 1);
9978 
9979 /* These pointer sized fields temporarly stores internal variables. */
9980 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
9981 
9982 if (common->mark_ptr != 0)
9983   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
9984 mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
9985 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 1));
9986 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 1 + LINK_SIZE));
9987 
9988 if (*cc == OP_CALLOUT)
9989   {
9990   value1 = 0;
9991   value2 = 0;
9992   value3 = 0;
9993   }
9994 else
9995   {
9996   value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1);
9997   value2 = (callout_length - (1 + 4*LINK_SIZE + 2));
9998   value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE));
9999   }
10000 
10001 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);
10002 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);
10003 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3);
10004 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
10005 
10006 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
10007 
10008 /* Needed to save important temporary registers. */
10009 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
10010 /* SLJIT_R0 = arguments */
10011 OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
10012 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
10013 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(32, W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_callout_jit));
10014 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
10015 free_stack(common, callout_arg_size);
10016 
10017 /* Check return value. */
10018 OP2U(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
10019 add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_SIG_GREATER));
10020 if (common->abort_label == NULL)
10021   add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL) /* SIG_LESS */);
10022 else
10023   JUMPTO(SLJIT_NOT_EQUAL /* SIG_LESS */, common->abort_label);
10024 return cc + callout_length;
10025 }
10026 
10027 #undef CALLOUT_ARG_SIZE
10028 #undef CALLOUT_ARG_OFFSET
10029 
compile_reverse_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)10030 static PCRE2_SPTR compile_reverse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10031 {
10032 DEFINE_COMPILER;
10033 backtrack_common *backtrack = NULL;
10034 jump_list **reverse_failed;
10035 unsigned int lmin, lmax;
10036 #ifdef SUPPORT_UNICODE
10037 struct sljit_jump *jump;
10038 struct sljit_label *label;
10039 #endif
10040 
10041 SLJIT_ASSERT(parent->top == NULL);
10042 
10043 if (*cc == OP_REVERSE)
10044   {
10045   reverse_failed = &parent->own_backtracks;
10046   lmin = GET2(cc, 1);
10047   lmax = lmin;
10048   cc += 1 + IMM2_SIZE;
10049 
10050   SLJIT_ASSERT(lmin > 0);
10051   }
10052 else
10053   {
10054   SLJIT_ASSERT(*cc == OP_VREVERSE);
10055   PUSH_BACKTRACK(sizeof(vreverse_backtrack), cc, NULL);
10056 
10057   reverse_failed = &backtrack->own_backtracks;
10058   lmin = GET2(cc, 1);
10059   lmax = GET2(cc, 1 + IMM2_SIZE);
10060   cc += 1 + 2 * IMM2_SIZE;
10061 
10062   SLJIT_ASSERT(lmin < lmax);
10063   }
10064 
10065 if (HAS_VIRTUAL_REGISTERS)
10066   {
10067   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
10068   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
10069   }
10070 else
10071   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
10072 
10073 #ifdef SUPPORT_UNICODE
10074 if (common->utf)
10075   {
10076   if (lmin > 0)
10077     {
10078     OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, lmin);
10079     label = LABEL();
10080     add_jump(compiler, reverse_failed, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0));
10081     move_back(common, reverse_failed, FALSE);
10082     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
10083     JUMPTO(SLJIT_NOT_ZERO, label);
10084     }
10085 
10086   if (lmin < lmax)
10087     {
10088     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
10089 
10090     OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, lmax - lmin);
10091     label = LABEL();
10092     jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
10093     move_back(common, reverse_failed, FALSE);
10094     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
10095     JUMPTO(SLJIT_NOT_ZERO, label);
10096 
10097     JUMPHERE(jump);
10098     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
10099     }
10100   }
10101 else
10102 #endif
10103   {
10104   if (lmin > 0)
10105     {
10106     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(lmin));
10107     add_jump(compiler, reverse_failed, CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0));
10108     }
10109 
10110   if (lmin < lmax)
10111     {
10112     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
10113 
10114     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(lmax - lmin));
10115     OP2U(SLJIT_SUB | SLJIT_SET_LESS, STR_PTR, 0, TMP2, 0);
10116     SELECT(SLJIT_LESS, STR_PTR, TMP2, 0, STR_PTR);
10117 
10118     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
10119     }
10120   }
10121 
10122 check_start_used_ptr(common);
10123 
10124 if (lmin < lmax)
10125   BACKTRACK_AS(vreverse_backtrack)->matchingpath = LABEL();
10126 
10127 return cc;
10128 }
10129 
assert_needs_str_ptr_saving(PCRE2_SPTR cc)10130 static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(PCRE2_SPTR cc)
10131 {
10132 while (TRUE)
10133   {
10134   switch (*cc)
10135     {
10136     case OP_CALLOUT_STR:
10137     cc += GET(cc, 1 + 2*LINK_SIZE);
10138     break;
10139 
10140     case OP_NOT_WORD_BOUNDARY:
10141     case OP_WORD_BOUNDARY:
10142     case OP_CIRC:
10143     case OP_CIRCM:
10144     case OP_DOLL:
10145     case OP_DOLLM:
10146     case OP_CALLOUT:
10147     case OP_ALT:
10148     case OP_NOT_UCP_WORD_BOUNDARY:
10149     case OP_UCP_WORD_BOUNDARY:
10150     cc += PRIV(OP_lengths)[*cc];
10151     break;
10152 
10153     case OP_KET:
10154     return FALSE;
10155 
10156     default:
10157     return TRUE;
10158     }
10159   }
10160 }
10161 
compile_assert_matchingpath(compiler_common * common,PCRE2_SPTR cc,assert_backtrack * backtrack,BOOL conditional)10162 static PCRE2_SPTR compile_assert_matchingpath(compiler_common *common, PCRE2_SPTR cc, assert_backtrack *backtrack, BOOL conditional)
10163 {
10164 DEFINE_COMPILER;
10165 int framesize;
10166 int extrasize;
10167 BOOL local_quit_available = FALSE;
10168 BOOL needs_control_head;
10169 BOOL end_block_size = 0;
10170 BOOL has_vreverse;
10171 int private_data_ptr;
10172 backtrack_common altbacktrack;
10173 PCRE2_SPTR ccbegin;
10174 PCRE2_UCHAR opcode;
10175 PCRE2_UCHAR bra = OP_BRA;
10176 jump_list *tmp = NULL;
10177 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.own_backtracks;
10178 jump_list **found;
10179 /* Saving previous accept variables. */
10180 BOOL save_local_quit_available = common->local_quit_available;
10181 BOOL save_in_positive_assertion = common->in_positive_assertion;
10182 then_trap_backtrack *save_then_trap = common->then_trap;
10183 struct sljit_label *save_quit_label = common->quit_label;
10184 struct sljit_label *save_accept_label = common->accept_label;
10185 jump_list *save_quit = common->quit;
10186 jump_list *save_positive_assertion_quit = common->positive_assertion_quit;
10187 jump_list *save_accept = common->accept;
10188 struct sljit_jump *jump;
10189 struct sljit_jump *brajump = NULL;
10190 
10191 /* Assert captures then. */
10192 common->then_trap = NULL;
10193 
10194 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
10195   {
10196   SLJIT_ASSERT(!conditional);
10197   bra = *cc;
10198   cc++;
10199   }
10200 
10201 private_data_ptr = PRIVATE_DATA(cc);
10202 SLJIT_ASSERT(private_data_ptr != 0);
10203 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
10204 backtrack->framesize = framesize;
10205 backtrack->private_data_ptr = private_data_ptr;
10206 opcode = *cc;
10207 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
10208 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
10209 ccbegin = cc;
10210 cc += GET(cc, 1);
10211 
10212 if (bra == OP_BRAMINZERO)
10213   {
10214   /* This is a braminzero backtrack path. */
10215   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10216   free_stack(common, 1);
10217   brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10218   }
10219 
10220 if ((opcode == OP_ASSERTBACK || opcode == OP_ASSERTBACK_NOT) && find_vreverse(ccbegin))
10221   end_block_size = 3;
10222 
10223 if (framesize < 0)
10224   {
10225   extrasize = 1;
10226   if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
10227     extrasize = 0;
10228 
10229   extrasize += end_block_size;
10230 
10231   if (needs_control_head)
10232     extrasize++;
10233 
10234   if (framesize == no_frame)
10235     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10236 
10237   if (extrasize > 0)
10238     allocate_stack(common, extrasize);
10239 
10240   if (needs_control_head)
10241     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10242 
10243   if (extrasize > 0)
10244     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10245 
10246   if (needs_control_head)
10247     {
10248     SLJIT_ASSERT(extrasize == end_block_size + 2);
10249     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
10250     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP1, 0);
10251     }
10252   }
10253 else
10254   {
10255   extrasize = (needs_control_head ? 3 : 2) + end_block_size;
10256 
10257   OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
10258   allocate_stack(common, framesize + extrasize);
10259 
10260   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10261   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10262   if (needs_control_head)
10263     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10264   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10265 
10266   if (needs_control_head)
10267     {
10268     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 2), TMP1, 0);
10269     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP2, 0);
10270     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
10271     }
10272   else
10273     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP1, 0);
10274 
10275   init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize);
10276   }
10277 
10278 if (end_block_size > 0)
10279   {
10280   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_END, 0);
10281   OP1(SLJIT_MOV, STR_END, 0, STR_PTR, 0);
10282   }
10283 
10284 memset(&altbacktrack, 0, sizeof(backtrack_common));
10285 if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT))
10286   {
10287   /* Control verbs cannot escape from these asserts. */
10288   local_quit_available = TRUE;
10289   common->local_quit_available = TRUE;
10290   common->quit_label = NULL;
10291   common->quit = NULL;
10292   }
10293 
10294 common->in_positive_assertion = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK);
10295 common->positive_assertion_quit = NULL;
10296 
10297 while (1)
10298   {
10299   common->accept_label = NULL;
10300   common->accept = NULL;
10301   altbacktrack.top = NULL;
10302   altbacktrack.own_backtracks = NULL;
10303 
10304   if (*ccbegin == OP_ALT && extrasize > 0)
10305     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10306 
10307   altbacktrack.cc = ccbegin;
10308   ccbegin += 1 + LINK_SIZE;
10309 
10310   has_vreverse = (*ccbegin == OP_VREVERSE);
10311   if (*ccbegin == OP_REVERSE || has_vreverse)
10312     ccbegin = compile_reverse_matchingpath(common, ccbegin, &altbacktrack);
10313 
10314   compile_matchingpath(common, ccbegin, cc, &altbacktrack);
10315   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10316     {
10317     if (local_quit_available)
10318       {
10319       common->local_quit_available = save_local_quit_available;
10320       common->quit_label = save_quit_label;
10321       common->quit = save_quit;
10322       }
10323     common->in_positive_assertion = save_in_positive_assertion;
10324     common->then_trap = save_then_trap;
10325     common->accept_label = save_accept_label;
10326     common->positive_assertion_quit = save_positive_assertion_quit;
10327     common->accept = save_accept;
10328     return NULL;
10329     }
10330 
10331   if (has_vreverse)
10332     {
10333     SLJIT_ASSERT(altbacktrack.top != NULL);
10334     add_jump(compiler, &altbacktrack.top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
10335     }
10336 
10337   common->accept_label = LABEL();
10338   if (common->accept != NULL)
10339     set_jumps(common->accept, common->accept_label);
10340 
10341   /* Reset stack. */
10342   if (framesize < 0)
10343     {
10344     if (framesize == no_frame)
10345       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10346     else if (extrasize > 0)
10347       free_stack(common, extrasize);
10348 
10349     if (end_block_size > 0)
10350       OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
10351 
10352     if (needs_control_head)
10353       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10354     }
10355   else
10356     {
10357     if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
10358       {
10359       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10360       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
10361 
10362       if (end_block_size > 0)
10363         OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 2));
10364 
10365       if (needs_control_head)
10366         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10367       }
10368     else
10369       {
10370       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10371 
10372       if (end_block_size > 0)
10373         OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize + 1));
10374 
10375       if (needs_control_head)
10376         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));
10377       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10378       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
10379       }
10380     }
10381 
10382   if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
10383     {
10384     /* We know that STR_PTR was stored on the top of the stack. */
10385     if (conditional)
10386       {
10387       if (extrasize > 0)
10388         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-end_block_size - (needs_control_head ? 2 : 1)));
10389       }
10390     else if (bra == OP_BRAZERO)
10391       {
10392       if (framesize < 0)
10393         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
10394       else
10395         {
10396         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
10397         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize));
10398         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10399         }
10400       OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10401       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10402       }
10403     else if (framesize >= 0)
10404       {
10405       /* For OP_BRA and OP_BRAMINZERO. */
10406       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
10407       }
10408     }
10409   add_jump(compiler, found, JUMP(SLJIT_JUMP));
10410 
10411   compile_backtrackingpath(common, altbacktrack.top);
10412   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10413     {
10414     if (local_quit_available)
10415       {
10416       common->local_quit_available = save_local_quit_available;
10417       common->quit_label = save_quit_label;
10418       common->quit = save_quit;
10419       }
10420     common->in_positive_assertion = save_in_positive_assertion;
10421     common->then_trap = save_then_trap;
10422     common->accept_label = save_accept_label;
10423     common->positive_assertion_quit = save_positive_assertion_quit;
10424     common->accept = save_accept;
10425     return NULL;
10426     }
10427   set_jumps(altbacktrack.own_backtracks, LABEL());
10428 
10429   if (*cc != OP_ALT)
10430     break;
10431 
10432   ccbegin = cc;
10433   cc += GET(cc, 1);
10434   }
10435 
10436 if (local_quit_available)
10437   {
10438   SLJIT_ASSERT(common->positive_assertion_quit == NULL);
10439   /* Makes the check less complicated below. */
10440   common->positive_assertion_quit = common->quit;
10441   }
10442 
10443 /* None of them matched. */
10444 if (common->positive_assertion_quit != NULL)
10445   {
10446   jump = JUMP(SLJIT_JUMP);
10447   set_jumps(common->positive_assertion_quit, LABEL());
10448   SLJIT_ASSERT(framesize != no_stack);
10449   if (framesize < 0)
10450     OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
10451   else
10452     {
10453     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10454     add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10455     OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (extrasize + 1) * sizeof(sljit_sw));
10456     }
10457   JUMPHERE(jump);
10458   }
10459 
10460 if (end_block_size > 0)
10461   OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10462 
10463 if (needs_control_head)
10464   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1));
10465 
10466 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
10467   {
10468   /* Assert is failed. */
10469   if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
10470     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10471 
10472   if (framesize < 0)
10473     {
10474     /* The topmost item should be 0. */
10475     if (bra == OP_BRAZERO)
10476       {
10477       if (extrasize >= 2)
10478         free_stack(common, extrasize - 1);
10479       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10480       }
10481     else if (extrasize > 0)
10482       free_stack(common, extrasize);
10483     }
10484   else
10485     {
10486     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
10487     /* The topmost item should be 0. */
10488     if (bra == OP_BRAZERO)
10489       {
10490       free_stack(common, framesize + extrasize - 1);
10491       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10492       }
10493     else
10494       free_stack(common, framesize + extrasize);
10495     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10496     }
10497   jump = JUMP(SLJIT_JUMP);
10498   if (bra != OP_BRAZERO)
10499     add_jump(compiler, target, jump);
10500 
10501   /* Assert is successful. */
10502   set_jumps(tmp, LABEL());
10503   if (framesize < 0)
10504     {
10505     /* We know that STR_PTR was stored on the top of the stack. */
10506     if (extrasize > 0)
10507       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
10508 
10509     /* Keep the STR_PTR on the top of the stack. */
10510     if (bra == OP_BRAZERO)
10511       {
10512       /* This allocation is always successful. */
10513       OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10514       if (extrasize >= 2)
10515         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10516       }
10517     else if (bra == OP_BRAMINZERO)
10518       {
10519       OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10520       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10521       }
10522     }
10523   else
10524     {
10525     if (bra == OP_BRA)
10526       {
10527       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10528       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
10529       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
10530       }
10531     else
10532       {
10533       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10534       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + end_block_size + 2) * sizeof(sljit_sw));
10535 
10536       if (extrasize == 2 + end_block_size)
10537         {
10538         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10539         if (bra == OP_BRAMINZERO)
10540           OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10541         }
10542       else
10543         {
10544         SLJIT_ASSERT(extrasize == 3 + end_block_size);
10545         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10546         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
10547         }
10548       }
10549     }
10550 
10551   if (bra == OP_BRAZERO)
10552     {
10553     backtrack->matchingpath = LABEL();
10554     SET_LABEL(jump, backtrack->matchingpath);
10555     }
10556   else if (bra == OP_BRAMINZERO)
10557     {
10558     JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
10559     JUMPHERE(brajump);
10560     if (framesize >= 0)
10561       {
10562       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10563       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10564       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
10565       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
10566       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10567       }
10568     set_jumps(backtrack->common.own_backtracks, LABEL());
10569     }
10570   }
10571 else
10572   {
10573   /* AssertNot is successful. */
10574   if (framesize < 0)
10575     {
10576     if (extrasize > 0)
10577       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10578 
10579     if (bra != OP_BRA)
10580       {
10581       if (extrasize >= 2)
10582         free_stack(common, extrasize - 1);
10583       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10584       }
10585     else if (extrasize > 0)
10586       free_stack(common, extrasize);
10587     }
10588   else
10589     {
10590     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10591     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
10592     /* The topmost item should be 0. */
10593     if (bra != OP_BRA)
10594       {
10595       free_stack(common, framesize + extrasize - 1);
10596       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10597       }
10598     else
10599       free_stack(common, framesize + extrasize);
10600     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10601     }
10602 
10603   if (bra == OP_BRAZERO)
10604     backtrack->matchingpath = LABEL();
10605   else if (bra == OP_BRAMINZERO)
10606     {
10607     JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
10608     JUMPHERE(brajump);
10609     }
10610 
10611   if (bra != OP_BRA)
10612     {
10613     SLJIT_ASSERT(found == &backtrack->common.own_backtracks);
10614     set_jumps(backtrack->common.own_backtracks, LABEL());
10615     backtrack->common.own_backtracks = NULL;
10616     }
10617   }
10618 
10619 if (local_quit_available)
10620   {
10621   common->local_quit_available = save_local_quit_available;
10622   common->quit_label = save_quit_label;
10623   common->quit = save_quit;
10624   }
10625 common->in_positive_assertion = save_in_positive_assertion;
10626 common->then_trap = save_then_trap;
10627 common->accept_label = save_accept_label;
10628 common->positive_assertion_quit = save_positive_assertion_quit;
10629 common->accept = save_accept;
10630 return cc + 1 + LINK_SIZE;
10631 }
10632 
match_once_common(compiler_common * common,PCRE2_UCHAR ket,int framesize,int private_data_ptr,BOOL has_alternatives,BOOL needs_control_head)10633 static SLJIT_INLINE void match_once_common(compiler_common *common, PCRE2_UCHAR ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
10634 {
10635 DEFINE_COMPILER;
10636 int stacksize;
10637 
10638 if (framesize < 0)
10639   {
10640   if (framesize == no_frame)
10641     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10642   else
10643     {
10644     stacksize = needs_control_head ? 1 : 0;
10645     if (ket != OP_KET || has_alternatives)
10646       stacksize++;
10647 
10648     if (stacksize > 0)
10649       free_stack(common, stacksize);
10650     }
10651 
10652   if (needs_control_head)
10653     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1));
10654 
10655   /* TMP2 which is set here used by OP_KETRMAX below. */
10656   if (ket == OP_KETRMAX)
10657     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10658   else if (ket == OP_KETRMIN)
10659     {
10660     /* Move the STR_PTR to the private_data_ptr. */
10661     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10662     }
10663   }
10664 else
10665   {
10666   stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
10667   OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
10668   if (needs_control_head)
10669     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10670 
10671   if (ket == OP_KETRMAX)
10672     {
10673     /* TMP2 which is set here used by OP_KETRMAX below. */
10674     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10675     }
10676   }
10677 if (needs_control_head)
10678   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
10679 }
10680 
match_capture_common(compiler_common * common,int stacksize,int offset,int private_data_ptr)10681 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
10682 {
10683 DEFINE_COMPILER;
10684 
10685 if (common->capture_last_ptr != 0)
10686   {
10687   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
10688   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10689   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10690   stacksize++;
10691   }
10692 if (common->optimized_cbracket[offset >> 1] == 0)
10693   {
10694   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
10695   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
10696   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10697   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10698   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
10699   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10700   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10701   stacksize += 2;
10702   }
10703 return stacksize;
10704 }
10705 
do_script_run(PCRE2_SPTR ptr,PCRE2_SPTR endptr)10706 static PCRE2_SPTR SLJIT_FUNC do_script_run(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10707 {
10708   if (PRIV(script_run)(ptr, endptr, FALSE))
10709     return endptr;
10710   return NULL;
10711 }
10712 
10713 #ifdef SUPPORT_UNICODE
10714 
do_script_run_utf(PCRE2_SPTR ptr,PCRE2_SPTR endptr)10715 static PCRE2_SPTR SLJIT_FUNC do_script_run_utf(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10716 {
10717   if (PRIV(script_run)(ptr, endptr, TRUE))
10718     return endptr;
10719   return NULL;
10720 }
10721 
10722 #endif /* SUPPORT_UNICODE */
10723 
match_script_run_common(compiler_common * common,int private_data_ptr,backtrack_common * parent)10724 static void match_script_run_common(compiler_common *common, int private_data_ptr, backtrack_common *parent)
10725 {
10726 DEFINE_COMPILER;
10727 
10728 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
10729 
10730 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10731 #ifdef SUPPORT_UNICODE
10732 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
10733   common->utf ? SLJIT_FUNC_ADDR(do_script_run_utf) : SLJIT_FUNC_ADDR(do_script_run));
10734 #else
10735 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_script_run));
10736 #endif
10737 
10738 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
10739 add_jump(compiler, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
10740 }
10741 
10742 /*
10743   Handling bracketed expressions is probably the most complex part.
10744 
10745   Stack layout naming characters:
10746     S - Push the current STR_PTR
10747     0 - Push a 0 (NULL)
10748     A - Push the current STR_PTR. Needed for restoring the STR_PTR
10749         before the next alternative. Not pushed if there are no alternatives.
10750     M - Any values pushed by the current alternative. Can be empty, or anything.
10751     C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
10752     L - Push the previous local (pointed by localptr) to the stack
10753    () - opional values stored on the stack
10754   ()* - optonal, can be stored multiple times
10755 
10756   The following list shows the regular expression templates, their PCRE byte codes
10757   and stack layout supported by pcre-sljit.
10758 
10759   (?:)                     OP_BRA     | OP_KET                A M
10760   ()                       OP_CBRA    | OP_KET                C M
10761   (?:)+                    OP_BRA     | OP_KETRMAX        0   A M S   ( A M S )*
10762                            OP_SBRA    | OP_KETRMAX        0   L M S   ( L M S )*
10763   (?:)+?                   OP_BRA     | OP_KETRMIN        0   A M S   ( A M S )*
10764                            OP_SBRA    | OP_KETRMIN        0   L M S   ( L M S )*
10765   ()+                      OP_CBRA    | OP_KETRMAX        0   C M S   ( C M S )*
10766                            OP_SCBRA   | OP_KETRMAX        0   C M S   ( C M S )*
10767   ()+?                     OP_CBRA    | OP_KETRMIN        0   C M S   ( C M S )*
10768                            OP_SCBRA   | OP_KETRMIN        0   C M S   ( C M S )*
10769   (?:)?    OP_BRAZERO    | OP_BRA     | OP_KET            S ( A M 0 )
10770   (?:)??   OP_BRAMINZERO | OP_BRA     | OP_KET            S ( A M 0 )
10771   ()?      OP_BRAZERO    | OP_CBRA    | OP_KET            S ( C M 0 )
10772   ()??     OP_BRAMINZERO | OP_CBRA    | OP_KET            S ( C M 0 )
10773   (?:)*    OP_BRAZERO    | OP_BRA     | OP_KETRMAX      S 0 ( A M S )*
10774            OP_BRAZERO    | OP_SBRA    | OP_KETRMAX      S 0 ( L M S )*
10775   (?:)*?   OP_BRAMINZERO | OP_BRA     | OP_KETRMIN      S 0 ( A M S )*
10776            OP_BRAMINZERO | OP_SBRA    | OP_KETRMIN      S 0 ( L M S )*
10777   ()*      OP_BRAZERO    | OP_CBRA    | OP_KETRMAX      S 0 ( C M S )*
10778            OP_BRAZERO    | OP_SCBRA   | OP_KETRMAX      S 0 ( C M S )*
10779   ()*?     OP_BRAMINZERO | OP_CBRA    | OP_KETRMIN      S 0 ( C M S )*
10780            OP_BRAMINZERO | OP_SCBRA   | OP_KETRMIN      S 0 ( C M S )*
10781 
10782 
10783   Stack layout naming characters:
10784     A - Push the alternative index (starting from 0) on the stack.
10785         Not pushed if there is no alternatives.
10786     M - Any values pushed by the current alternative. Can be empty, or anything.
10787 
10788   The next list shows the possible content of a bracket:
10789   (|)     OP_*BRA    | OP_ALT ...         M A
10790   (?()|)  OP_*COND   | OP_ALT             M A
10791   (?>|)   OP_ONCE    | OP_ALT ...         [stack trace] M A
10792                                           Or nothing, if trace is unnecessary
10793 */
10794 
compile_bracket_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)10795 static PCRE2_SPTR compile_bracket_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10796 {
10797 DEFINE_COMPILER;
10798 backtrack_common *backtrack;
10799 PCRE2_UCHAR opcode;
10800 int private_data_ptr = 0;
10801 int offset = 0;
10802 int i, stacksize;
10803 int repeat_ptr = 0, repeat_length = 0;
10804 int repeat_type = 0, repeat_count = 0;
10805 PCRE2_SPTR ccbegin;
10806 PCRE2_SPTR matchingpath;
10807 PCRE2_SPTR slot;
10808 PCRE2_UCHAR bra = OP_BRA;
10809 PCRE2_UCHAR ket;
10810 assert_backtrack *assert;
10811 BOOL has_alternatives;
10812 BOOL needs_control_head = FALSE;
10813 BOOL has_vreverse = FALSE;
10814 struct sljit_jump *jump;
10815 struct sljit_jump *skip;
10816 struct sljit_label *rmax_label = NULL;
10817 struct sljit_jump *braminzero = NULL;
10818 
10819 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
10820 
10821 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
10822   {
10823   bra = *cc;
10824   cc++;
10825   opcode = *cc;
10826   }
10827 
10828 opcode = *cc;
10829 ccbegin = cc;
10830 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
10831 ket = *matchingpath;
10832 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
10833   {
10834   repeat_ptr = PRIVATE_DATA(matchingpath);
10835   repeat_length = PRIVATE_DATA(matchingpath + 1);
10836   repeat_type = PRIVATE_DATA(matchingpath + 2);
10837   repeat_count = PRIVATE_DATA(matchingpath + 3);
10838   SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
10839   if (repeat_type == OP_UPTO)
10840     ket = OP_KETRMAX;
10841   if (repeat_type == OP_MINUPTO)
10842     ket = OP_KETRMIN;
10843   }
10844 
10845 matchingpath = ccbegin + 1 + LINK_SIZE;
10846 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
10847 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
10848 cc += GET(cc, 1);
10849 
10850 has_alternatives = *cc == OP_ALT;
10851 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
10852   {
10853   SLJIT_COMPILE_ASSERT(OP_DNRREF == OP_RREF + 1 && OP_FALSE == OP_RREF + 2 && OP_TRUE == OP_RREF + 3,
10854     compile_time_checks_must_be_grouped_together);
10855   has_alternatives = ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL) ? FALSE : TRUE;
10856   }
10857 
10858 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
10859   opcode = OP_SCOND;
10860 
10861 if (opcode == OP_CBRA || opcode == OP_SCBRA)
10862   {
10863   /* Capturing brackets has a pre-allocated space. */
10864   offset = GET2(ccbegin, 1 + LINK_SIZE);
10865   if (common->optimized_cbracket[offset] == 0)
10866     {
10867     private_data_ptr = OVECTOR_PRIV(offset);
10868     offset <<= 1;
10869     }
10870   else
10871     {
10872     offset <<= 1;
10873     private_data_ptr = OVECTOR(offset);
10874     }
10875   BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10876   matchingpath += IMM2_SIZE;
10877   }
10878 else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_ONCE || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
10879   {
10880   /* Other brackets simply allocate the next entry. */
10881   private_data_ptr = PRIVATE_DATA(ccbegin);
10882   SLJIT_ASSERT(private_data_ptr != 0);
10883   BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10884   if (opcode == OP_ONCE)
10885     BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
10886   }
10887 
10888 /* Instructions before the first alternative. */
10889 stacksize = 0;
10890 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10891   stacksize++;
10892 if (bra == OP_BRAZERO)
10893   stacksize++;
10894 
10895 if (stacksize > 0)
10896   allocate_stack(common, stacksize);
10897 
10898 stacksize = 0;
10899 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10900   {
10901   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10902   stacksize++;
10903   }
10904 
10905 if (bra == OP_BRAZERO)
10906   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10907 
10908 if (bra == OP_BRAMINZERO)
10909   {
10910   /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
10911   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10912   if (ket != OP_KETRMIN)
10913     {
10914     free_stack(common, 1);
10915     braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10916     }
10917   else if (opcode == OP_ONCE || opcode >= OP_SBRA)
10918     {
10919     jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10920     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10921     /* Nothing stored during the first run. */
10922     skip = JUMP(SLJIT_JUMP);
10923     JUMPHERE(jump);
10924     /* Checking zero-length iteration. */
10925     if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10926       {
10927       /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
10928       braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10929       }
10930     else
10931       {
10932       /* Except when the whole stack frame must be saved. */
10933       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10934       braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));
10935       }
10936     JUMPHERE(skip);
10937     }
10938   else
10939     {
10940     jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10941     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10942     JUMPHERE(jump);
10943     }
10944   }
10945 
10946 if (repeat_type != 0)
10947   {
10948   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
10949   if (repeat_type == OP_EXACT)
10950     rmax_label = LABEL();
10951   }
10952 
10953 if (ket == OP_KETRMIN)
10954   BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10955 
10956 if (ket == OP_KETRMAX)
10957   {
10958   rmax_label = LABEL();
10959   if (has_alternatives && opcode >= OP_BRA && opcode < OP_SBRA && repeat_type == 0)
10960     BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
10961   }
10962 
10963 /* Handling capturing brackets and alternatives. */
10964 if (opcode == OP_ONCE)
10965   {
10966   stacksize = 0;
10967   if (needs_control_head)
10968     {
10969     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10970     stacksize++;
10971     }
10972 
10973   if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10974     {
10975     /* Neither capturing brackets nor recursions are found in the block. */
10976     if (ket == OP_KETRMIN)
10977       {
10978       stacksize += 2;
10979       if (!needs_control_head)
10980         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10981       }
10982     else
10983       {
10984       if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
10985         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10986       if (ket == OP_KETRMAX || has_alternatives)
10987         stacksize++;
10988       }
10989 
10990     if (stacksize > 0)
10991       allocate_stack(common, stacksize);
10992 
10993     stacksize = 0;
10994     if (needs_control_head)
10995       {
10996       stacksize++;
10997       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10998       }
10999 
11000     if (ket == OP_KETRMIN)
11001       {
11002       if (needs_control_head)
11003         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11004       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
11005       if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
11006         OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
11007       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
11008       }
11009     else if (ket == OP_KETRMAX || has_alternatives)
11010       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
11011     }
11012   else
11013     {
11014     if (ket != OP_KET || has_alternatives)
11015       stacksize++;
11016 
11017     stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
11018     allocate_stack(common, stacksize);
11019 
11020     if (needs_control_head)
11021       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11022 
11023     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11024     OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11025 
11026     stacksize = needs_control_head ? 1 : 0;
11027     if (ket != OP_KET || has_alternatives)
11028       {
11029       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
11030       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
11031       stacksize++;
11032       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
11033       }
11034     else
11035       {
11036       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
11037       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
11038       }
11039     init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1);
11040     }
11041   }
11042 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
11043   {
11044   /* Saving the previous values. */
11045   if (common->optimized_cbracket[offset >> 1] != 0)
11046     {
11047     SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
11048     allocate_stack(common, 2);
11049     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11050     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
11051     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
11052     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
11053     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
11054     }
11055   else
11056     {
11057     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11058     allocate_stack(common, 1);
11059     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
11060     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11061     }
11062   }
11063 else if (opcode == OP_ASSERTBACK_NA && PRIVATE_DATA(ccbegin + 1))
11064   {
11065   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11066   allocate_stack(common, 4);
11067   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
11068   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
11069   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), STR_END, 0);
11070   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11071   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
11072   OP1(SLJIT_MOV, STR_END, 0, STR_PTR, 0);
11073 
11074   has_vreverse = (*matchingpath == OP_VREVERSE);
11075   if (*matchingpath == OP_REVERSE || has_vreverse)
11076     matchingpath = compile_reverse_matchingpath(common, matchingpath, backtrack);
11077   }
11078 else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
11079   {
11080   /* Saving the previous value. */
11081   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11082   allocate_stack(common, 1);
11083   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
11084   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11085 
11086   if (*matchingpath == OP_REVERSE)
11087     matchingpath = compile_reverse_matchingpath(common, matchingpath, backtrack);
11088   }
11089 else if (has_alternatives)
11090   {
11091   /* Pushing the starting string pointer. */
11092   allocate_stack(common, 1);
11093   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11094   }
11095 
11096 /* Generating code for the first alternative. */
11097 if (opcode == OP_COND || opcode == OP_SCOND)
11098   {
11099   if (*matchingpath == OP_CREF)
11100     {
11101     SLJIT_ASSERT(has_alternatives);
11102     add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
11103       CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
11104     matchingpath += 1 + IMM2_SIZE;
11105     }
11106   else if (*matchingpath == OP_DNCREF)
11107     {
11108     SLJIT_ASSERT(has_alternatives);
11109 
11110     i = GET2(matchingpath, 1 + IMM2_SIZE);
11111     slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
11112     OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
11113     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
11114     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
11115     slot += common->name_entry_size;
11116     i--;
11117     while (i-- > 0)
11118       {
11119       OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
11120       OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);
11121       slot += common->name_entry_size;
11122       }
11123     OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
11124     add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
11125     matchingpath += 1 + 2 * IMM2_SIZE;
11126     }
11127   else if ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL)
11128     {
11129     /* Never has other case. */
11130     BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
11131     SLJIT_ASSERT(!has_alternatives);
11132 
11133     if (*matchingpath == OP_TRUE)
11134       {
11135       stacksize = 1;
11136       matchingpath++;
11137       }
11138     else if (*matchingpath == OP_FALSE || *matchingpath == OP_FAIL)
11139       stacksize = 0;
11140     else if (*matchingpath == OP_RREF)
11141       {
11142       stacksize = GET2(matchingpath, 1);
11143       if (common->currententry == NULL)
11144         stacksize = 0;
11145       else if (stacksize == RREF_ANY)
11146         stacksize = 1;
11147       else if (common->currententry->start == 0)
11148         stacksize = stacksize == 0;
11149       else
11150         stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
11151 
11152       if (stacksize != 0)
11153         matchingpath += 1 + IMM2_SIZE;
11154       }
11155     else
11156       {
11157       if (common->currententry == NULL || common->currententry->start == 0)
11158         stacksize = 0;
11159       else
11160         {
11161         stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
11162         slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
11163         i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
11164         while (stacksize > 0)
11165           {
11166           if ((int)GET2(slot, 0) == i)
11167             break;
11168           slot += common->name_entry_size;
11169           stacksize--;
11170           }
11171         }
11172 
11173       if (stacksize != 0)
11174         matchingpath += 1 + 2 * IMM2_SIZE;
11175       }
11176 
11177       /* The stacksize == 0 is a common "else" case. */
11178       if (stacksize == 0)
11179         {
11180         if (*cc == OP_ALT)
11181           {
11182           matchingpath = cc + 1 + LINK_SIZE;
11183           cc += GET(cc, 1);
11184           }
11185         else
11186           matchingpath = cc;
11187         }
11188     }
11189   else
11190     {
11191     SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
11192     /* Similar code as PUSH_BACKTRACK macro. */
11193     assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
11194     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11195       return NULL;
11196     memset(assert, 0, sizeof(assert_backtrack));
11197     assert->common.cc = matchingpath;
11198     BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
11199     matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
11200     }
11201   }
11202 
11203 compile_matchingpath(common, matchingpath, cc, backtrack);
11204 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11205   return NULL;
11206 
11207 switch (opcode)
11208   {
11209   case OP_ASSERTBACK_NA:
11210     if (has_vreverse)
11211       {
11212       SLJIT_ASSERT(backtrack->top != NULL && PRIVATE_DATA(ccbegin + 1));
11213       add_jump(compiler, &backtrack->top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
11214       }
11215 
11216     if (PRIVATE_DATA(ccbegin + 1))
11217       OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
11218     break;
11219   case OP_ASSERT_NA:
11220     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11221     break;
11222   case OP_ONCE:
11223     match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
11224     break;
11225   case OP_SCRIPT_RUN:
11226     match_script_run_common(common, private_data_ptr, backtrack);
11227     break;
11228   }
11229 
11230 stacksize = 0;
11231 if (repeat_type == OP_MINUPTO)
11232   {
11233   /* We need to preserve the counter. TMP2 will be used below. */
11234   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
11235   stacksize++;
11236   }
11237 if (ket != OP_KET || bra != OP_BRA)
11238   stacksize++;
11239 if (offset != 0)
11240   {
11241   if (common->capture_last_ptr != 0)
11242     stacksize++;
11243   if (common->optimized_cbracket[offset >> 1] == 0)
11244     stacksize += 2;
11245   }
11246 if (has_alternatives && opcode != OP_ONCE)
11247   stacksize++;
11248 
11249 if (stacksize > 0)
11250   allocate_stack(common, stacksize);
11251 
11252 stacksize = 0;
11253 if (repeat_type == OP_MINUPTO)
11254   {
11255   /* TMP2 was set above. */
11256   OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
11257   stacksize++;
11258   }
11259 
11260 if (ket != OP_KET || bra != OP_BRA)
11261   {
11262   if (ket != OP_KET)
11263     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
11264   else
11265     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
11266   stacksize++;
11267   }
11268 
11269 if (offset != 0)
11270   stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
11271 
11272 /* Skip and count the other alternatives. */
11273 i = 1;
11274 while (*cc == OP_ALT)
11275   {
11276   cc += GET(cc, 1);
11277   i++;
11278   }
11279 
11280 if (has_alternatives)
11281   {
11282   if (opcode != OP_ONCE)
11283     {
11284     if (i <= 3)
11285       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
11286     else
11287       BACKTRACK_AS(bracket_backtrack)->u.matching_mov_addr = sljit_emit_mov_addr(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
11288     }
11289   if (ket != OP_KETRMAX)
11290     BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
11291   }
11292 
11293 /* Must be after the matchingpath label. */
11294 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
11295   {
11296   SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
11297   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11298   }
11299 
11300 if (ket == OP_KETRMAX)
11301   {
11302   if (repeat_type != 0)
11303     {
11304     if (has_alternatives)
11305       BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
11306     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
11307     JUMPTO(SLJIT_NOT_ZERO, rmax_label);
11308     /* Drop STR_PTR for greedy plus quantifier. */
11309     if (opcode != OP_ONCE)
11310       free_stack(common, 1);
11311     }
11312   else if (opcode < OP_BRA || opcode >= OP_SBRA)
11313     {
11314     if (has_alternatives)
11315       BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
11316 
11317     /* Checking zero-length iteration. */
11318     if (opcode != OP_ONCE)
11319       {
11320       /* This case includes opcodes such as OP_SCRIPT_RUN. */
11321       CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
11322       /* Drop STR_PTR for greedy plus quantifier. */
11323       if (bra != OP_BRAZERO)
11324         free_stack(common, 1);
11325       }
11326     else
11327       /* TMP2 must contain the starting STR_PTR. */
11328       CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
11329     }
11330   else
11331     JUMPTO(SLJIT_JUMP, rmax_label);
11332   BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
11333   }
11334 
11335 if (repeat_type == OP_EXACT)
11336   {
11337   count_match(common);
11338   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
11339   JUMPTO(SLJIT_NOT_ZERO, rmax_label);
11340   }
11341 else if (repeat_type == OP_UPTO)
11342   {
11343   /* We need to preserve the counter. */
11344   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
11345   allocate_stack(common, 1);
11346   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11347   }
11348 
11349 if (bra == OP_BRAZERO)
11350   BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
11351 
11352 if (bra == OP_BRAMINZERO)
11353   {
11354   /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
11355   JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
11356   if (braminzero != NULL)
11357     {
11358     JUMPHERE(braminzero);
11359     /* We need to release the end pointer to perform the
11360     backtrack for the zero-length iteration. When
11361     framesize is < 0, OP_ONCE will do the release itself. */
11362     if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
11363       {
11364       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11365       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
11366       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
11367       }
11368     else if (ket == OP_KETRMIN && opcode != OP_ONCE)
11369       free_stack(common, 1);
11370     }
11371   /* Continue to the normal backtrack. */
11372   }
11373 
11374 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO || (has_alternatives && repeat_type != OP_EXACT))
11375   count_match(common);
11376 
11377 cc += 1 + LINK_SIZE;
11378 
11379 if (opcode == OP_ONCE)
11380   {
11381   int data;
11382   int framesize = BACKTRACK_AS(bracket_backtrack)->u.framesize;
11383 
11384   SLJIT_ASSERT(SHRT_MIN <= framesize && framesize < SHRT_MAX/2);
11385   /* We temporarily encode the needs_control_head in the lowest bit.
11386      The real value should be short enough for this operation to work
11387      without triggering Undefined Behaviour. */
11388   data = (int)((short)((unsigned short)framesize << 1) | (needs_control_head ? 1 : 0));
11389   BACKTRACK_AS(bracket_backtrack)->u.framesize = data;
11390   }
11391 return cc + repeat_length;
11392 }
11393 
compile_bracketpos_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11394 static PCRE2_SPTR compile_bracketpos_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11395 {
11396 DEFINE_COMPILER;
11397 backtrack_common *backtrack;
11398 PCRE2_UCHAR opcode;
11399 int private_data_ptr;
11400 int cbraprivptr = 0;
11401 BOOL needs_control_head;
11402 int framesize;
11403 int stacksize;
11404 int offset = 0;
11405 BOOL zero = FALSE;
11406 PCRE2_SPTR ccbegin = NULL;
11407 int stack; /* Also contains the offset of control head. */
11408 struct sljit_label *loop = NULL;
11409 struct jump_list *emptymatch = NULL;
11410 
11411 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
11412 if (*cc == OP_BRAPOSZERO)
11413   {
11414   zero = TRUE;
11415   cc++;
11416   }
11417 
11418 opcode = *cc;
11419 private_data_ptr = PRIVATE_DATA(cc);
11420 SLJIT_ASSERT(private_data_ptr != 0);
11421 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
11422 switch(opcode)
11423   {
11424   case OP_BRAPOS:
11425   case OP_SBRAPOS:
11426   ccbegin = cc + 1 + LINK_SIZE;
11427   break;
11428 
11429   case OP_CBRAPOS:
11430   case OP_SCBRAPOS:
11431   offset = GET2(cc, 1 + LINK_SIZE);
11432   /* This case cannot be optimized in the same way as
11433   normal capturing brackets. */
11434   SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
11435   cbraprivptr = OVECTOR_PRIV(offset);
11436   offset <<= 1;
11437   ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
11438   break;
11439 
11440   default:
11441   SLJIT_UNREACHABLE();
11442   break;
11443   }
11444 
11445 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
11446 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
11447 if (framesize < 0)
11448   {
11449   if (offset != 0)
11450     {
11451     stacksize = 2;
11452     if (common->capture_last_ptr != 0)
11453       stacksize++;
11454     }
11455   else
11456     stacksize = 1;
11457 
11458   if (needs_control_head)
11459     stacksize++;
11460   if (!zero)
11461     stacksize++;
11462 
11463   BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
11464   allocate_stack(common, stacksize);
11465   if (framesize == no_frame)
11466     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
11467 
11468   stack = 0;
11469   if (offset != 0)
11470     {
11471     stack = 2;
11472     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
11473     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
11474     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
11475     if (common->capture_last_ptr != 0)
11476       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
11477     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
11478     if (needs_control_head)
11479       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11480     if (common->capture_last_ptr != 0)
11481       {
11482       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
11483       stack = 3;
11484       }
11485     }
11486   else
11487     {
11488     if (needs_control_head)
11489       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11490     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11491     stack = 1;
11492     }
11493 
11494   if (needs_control_head)
11495     stack++;
11496   if (!zero)
11497     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
11498   if (needs_control_head)
11499     {
11500     stack--;
11501     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
11502     }
11503   }
11504 else
11505   {
11506   stacksize = framesize + 1;
11507   if (!zero)
11508     stacksize++;
11509   if (needs_control_head)
11510     stacksize++;
11511   if (offset == 0)
11512     stacksize++;
11513   BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
11514 
11515   allocate_stack(common, stacksize);
11516   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11517   if (needs_control_head)
11518     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11519   OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11520 
11521   stack = 0;
11522   if (!zero)
11523     {
11524     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
11525     stack = 1;
11526     }
11527   if (needs_control_head)
11528     {
11529     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
11530     stack++;
11531     }
11532   if (offset == 0)
11533     {
11534     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
11535     stack++;
11536     }
11537   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
11538   init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize);
11539   stack -= 1 + (offset == 0);
11540   }
11541 
11542 if (offset != 0)
11543   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11544 
11545 loop = LABEL();
11546 while (*cc != OP_KETRPOS)
11547   {
11548   backtrack->top = NULL;
11549   backtrack->own_backtracks = NULL;
11550   cc += GET(cc, 1);
11551 
11552   compile_matchingpath(common, ccbegin, cc, backtrack);
11553   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11554     return NULL;
11555 
11556   if (framesize < 0)
11557     {
11558     if (framesize == no_frame)
11559       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11560 
11561     if (offset != 0)
11562       {
11563       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11564       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11565       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11566       if (common->capture_last_ptr != 0)
11567         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
11568       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11569       }
11570     else
11571       {
11572       if (opcode == OP_SBRAPOS)
11573         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11574       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11575       }
11576 
11577     /* Even if the match is empty, we need to reset the control head. */
11578     if (needs_control_head)
11579       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
11580 
11581     if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
11582       add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
11583 
11584     if (!zero)
11585       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
11586     }
11587   else
11588     {
11589     if (offset != 0)
11590       {
11591       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11592       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11593       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11594       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11595       if (common->capture_last_ptr != 0)
11596         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
11597       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11598       }
11599     else
11600       {
11601       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11602       OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11603       if (opcode == OP_SBRAPOS)
11604         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
11605       OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0);
11606       }
11607 
11608     /* Even if the match is empty, we need to reset the control head. */
11609     if (needs_control_head)
11610       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
11611 
11612     if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
11613       add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
11614 
11615     if (!zero)
11616       {
11617       if (framesize < 0)
11618         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
11619       else
11620         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
11621       }
11622     }
11623 
11624   JUMPTO(SLJIT_JUMP, loop);
11625   flush_stubs(common);
11626 
11627   compile_backtrackingpath(common, backtrack->top);
11628   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11629     return NULL;
11630   set_jumps(backtrack->own_backtracks, LABEL());
11631 
11632   if (framesize < 0)
11633     {
11634     if (offset != 0)
11635       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11636     else
11637       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11638     }
11639   else
11640     {
11641     if (offset != 0)
11642       {
11643       /* Last alternative. */
11644       if (*cc == OP_KETRPOS)
11645         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11646       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11647       }
11648     else
11649       {
11650       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11651       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
11652       }
11653     }
11654 
11655   if (*cc == OP_KETRPOS)
11656     break;
11657   ccbegin = cc + 1 + LINK_SIZE;
11658   }
11659 
11660 /* We don't have to restore the control head in case of a failed match. */
11661 
11662 backtrack->own_backtracks = NULL;
11663 if (!zero)
11664   {
11665   if (framesize < 0)
11666     add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
11667   else /* TMP2 is set to [private_data_ptr] above. */
11668     add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));
11669   }
11670 
11671 /* None of them matched. */
11672 set_jumps(emptymatch, LABEL());
11673 count_match(common);
11674 return cc + 1 + LINK_SIZE;
11675 }
11676 
get_iterator_parameters(compiler_common * common,PCRE2_SPTR cc,PCRE2_UCHAR * opcode,PCRE2_UCHAR * type,sljit_u32 * max,sljit_u32 * exact,PCRE2_SPTR * end)11677 static SLJIT_INLINE PCRE2_SPTR get_iterator_parameters(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *opcode, PCRE2_UCHAR *type, sljit_u32 *max, sljit_u32 *exact, PCRE2_SPTR *end)
11678 {
11679 int class_len;
11680 
11681 *opcode = *cc;
11682 *exact = 0;
11683 
11684 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
11685   {
11686   cc++;
11687   *type = OP_CHAR;
11688   }
11689 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
11690   {
11691   cc++;
11692   *type = OP_CHARI;
11693   *opcode -= OP_STARI - OP_STAR;
11694   }
11695 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
11696   {
11697   cc++;
11698   *type = OP_NOT;
11699   *opcode -= OP_NOTSTAR - OP_STAR;
11700   }
11701 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
11702   {
11703   cc++;
11704   *type = OP_NOTI;
11705   *opcode -= OP_NOTSTARI - OP_STAR;
11706   }
11707 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
11708   {
11709   cc++;
11710   *opcode -= OP_TYPESTAR - OP_STAR;
11711   *type = OP_END;
11712   }
11713 else
11714   {
11715   SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
11716   *type = *opcode;
11717   cc++;
11718   class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(PCRE2_UCHAR))) : GET(cc, 0);
11719   *opcode = cc[class_len - 1];
11720 
11721   if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
11722     {
11723     *opcode -= OP_CRSTAR - OP_STAR;
11724     *end = cc + class_len;
11725 
11726     if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
11727       {
11728       *exact = 1;
11729       *opcode -= OP_PLUS - OP_STAR;
11730       }
11731     }
11732   else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
11733     {
11734     *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
11735     *end = cc + class_len;
11736 
11737     if (*opcode == OP_POSPLUS)
11738       {
11739       *exact = 1;
11740       *opcode = OP_POSSTAR;
11741       }
11742     }
11743   else
11744     {
11745     SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
11746     *max = GET2(cc, (class_len + IMM2_SIZE));
11747     *exact = GET2(cc, class_len);
11748 
11749     if (*max == 0)
11750       {
11751       if (*opcode == OP_CRPOSRANGE)
11752         *opcode = OP_POSSTAR;
11753       else
11754         *opcode -= OP_CRRANGE - OP_STAR;
11755       }
11756     else
11757       {
11758       *max -= *exact;
11759       if (*max == 0)
11760         *opcode = OP_EXACT;
11761       else if (*max == 1)
11762         {
11763         if (*opcode == OP_CRPOSRANGE)
11764           *opcode = OP_POSQUERY;
11765         else
11766           *opcode -= OP_CRRANGE - OP_QUERY;
11767         }
11768       else
11769         {
11770         if (*opcode == OP_CRPOSRANGE)
11771           *opcode = OP_POSUPTO;
11772         else
11773           *opcode -= OP_CRRANGE - OP_UPTO;
11774         }
11775       }
11776     *end = cc + class_len + 2 * IMM2_SIZE;
11777     }
11778   return cc;
11779   }
11780 
11781 switch(*opcode)
11782   {
11783   case OP_EXACT:
11784   *exact = GET2(cc, 0);
11785   cc += IMM2_SIZE;
11786   break;
11787 
11788   case OP_PLUS:
11789   case OP_MINPLUS:
11790   *exact = 1;
11791   *opcode -= OP_PLUS - OP_STAR;
11792   break;
11793 
11794   case OP_POSPLUS:
11795   *exact = 1;
11796   *opcode = OP_POSSTAR;
11797   break;
11798 
11799   case OP_UPTO:
11800   case OP_MINUPTO:
11801   case OP_POSUPTO:
11802   *max = GET2(cc, 0);
11803   cc += IMM2_SIZE;
11804   break;
11805   }
11806 
11807 if (*type == OP_END)
11808   {
11809   *type = *cc;
11810   *end = next_opcode(common, cc);
11811   cc++;
11812   return cc;
11813   }
11814 
11815 *end = cc + 1;
11816 #ifdef SUPPORT_UNICODE
11817 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
11818 #endif
11819 return cc;
11820 }
11821 
compile_iterator_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11822 static PCRE2_SPTR compile_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11823 {
11824 DEFINE_COMPILER;
11825 backtrack_common *backtrack;
11826 PCRE2_UCHAR opcode;
11827 PCRE2_UCHAR type;
11828 sljit_u32 max = 0, exact;
11829 sljit_s32 early_fail_ptr = PRIVATE_DATA(cc + 1);
11830 sljit_s32 early_fail_type;
11831 BOOL charpos_enabled;
11832 PCRE2_UCHAR charpos_char;
11833 unsigned int charpos_othercasebit;
11834 PCRE2_SPTR end;
11835 jump_list *no_match = NULL;
11836 jump_list *no_char1_match = NULL;
11837 struct sljit_jump *jump = NULL;
11838 struct sljit_label *label;
11839 int private_data_ptr = PRIVATE_DATA(cc);
11840 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
11841 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
11842 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw);
11843 int tmp_base, tmp_offset;
11844 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11845 BOOL use_tmp;
11846 #endif
11847 
11848 PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL);
11849 
11850 early_fail_type = (early_fail_ptr & 0x7);
11851 early_fail_ptr >>= 3;
11852 
11853 /* During recursion, these optimizations are disabled. */
11854 if (common->early_fail_start_ptr == 0 && common->fast_forward_bc_ptr == NULL)
11855   {
11856   early_fail_ptr = 0;
11857   early_fail_type = type_skip;
11858   }
11859 
11860 SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || early_fail_ptr == 0
11861   || (early_fail_ptr >= common->early_fail_start_ptr && early_fail_ptr <= common->early_fail_end_ptr));
11862 
11863 if (early_fail_type == type_fail)
11864   add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr));
11865 
11866 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
11867 
11868 if (type != OP_EXTUNI)
11869   {
11870   tmp_base = TMP3;
11871   tmp_offset = 0;
11872   }
11873 else
11874   {
11875   tmp_base = SLJIT_MEM1(SLJIT_SP);
11876   tmp_offset = POSSESSIVE0;
11877   }
11878 
11879 /* Handle fixed part first. */
11880 if (exact > 1)
11881   {
11882   SLJIT_ASSERT(early_fail_ptr == 0);
11883 
11884   if (common->mode == PCRE2_JIT_COMPLETE
11885 #ifdef SUPPORT_UNICODE
11886       && !common->utf
11887 #endif
11888       && type != OP_ANYNL && type != OP_EXTUNI)
11889     {
11890     OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
11891     add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
11892     OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11893     label = LABEL();
11894     compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, FALSE);
11895     OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11896     JUMPTO(SLJIT_NOT_ZERO, label);
11897     }
11898   else
11899     {
11900     OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11901     label = LABEL();
11902     compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);
11903     OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11904     JUMPTO(SLJIT_NOT_ZERO, label);
11905     }
11906   }
11907 else if (exact == 1)
11908   compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);
11909 
11910 if (early_fail_type == type_fail_range)
11911   {
11912   /* Range end first, followed by range start. */
11913   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr);
11914   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw));
11915   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0);
11916   OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0);
11917   add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0));
11918 
11919   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11920   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw), STR_PTR, 0);
11921   }
11922 
11923 switch(opcode)
11924   {
11925   case OP_STAR:
11926   case OP_UPTO:
11927   SLJIT_ASSERT(early_fail_ptr == 0 || opcode == OP_STAR);
11928 
11929   if (type == OP_ANYNL || type == OP_EXTUNI)
11930     {
11931     SLJIT_ASSERT(private_data_ptr == 0);
11932     SLJIT_ASSERT(early_fail_ptr == 0);
11933 
11934     allocate_stack(common, 2);
11935     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11936     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
11937 
11938     if (opcode == OP_UPTO)
11939       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max);
11940 
11941     label = LABEL();
11942     compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
11943     if (opcode == OP_UPTO)
11944       {
11945       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
11946       OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
11947       jump = JUMP(SLJIT_ZERO);
11948       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
11949       }
11950 
11951     /* We cannot use TMP3 because of allocate_stack. */
11952     allocate_stack(common, 1);
11953     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11954     JUMPTO(SLJIT_JUMP, label);
11955     if (jump != NULL)
11956       JUMPHERE(jump);
11957     BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11958     break;
11959     }
11960 #ifdef SUPPORT_UNICODE
11961   else if (type == OP_ALLANY && !common->invalid_utf)
11962 #else
11963   else if (type == OP_ALLANY)
11964 #endif
11965     {
11966     if (opcode == OP_STAR)
11967       {
11968       if (private_data_ptr == 0)
11969         allocate_stack(common, 2);
11970 
11971       OP1(SLJIT_MOV, base, offset0, STR_END, 0);
11972       OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11973 
11974       OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
11975       process_partial_match(common);
11976 
11977       if (early_fail_ptr != 0)
11978         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
11979       BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11980       break;
11981       }
11982 #ifdef SUPPORT_UNICODE
11983     else if (!common->utf)
11984 #else
11985     else
11986 #endif
11987       {
11988       if (private_data_ptr == 0)
11989         allocate_stack(common, 2);
11990 
11991       OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11992       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
11993 
11994       if (common->mode == PCRE2_JIT_COMPLETE)
11995         {
11996         OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
11997         SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
11998         }
11999       else
12000         {
12001         jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
12002         process_partial_match(common);
12003         JUMPHERE(jump);
12004         }
12005 
12006       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12007 
12008       if (early_fail_ptr != 0)
12009         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12010       BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
12011       break;
12012       }
12013     }
12014 
12015   charpos_enabled = FALSE;
12016   charpos_char = 0;
12017   charpos_othercasebit = 0;
12018 
12019   if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
12020     {
12021 #ifdef SUPPORT_UNICODE
12022     charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
12023 #else
12024     charpos_enabled = TRUE;
12025 #endif
12026     if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
12027       {
12028       charpos_othercasebit = char_get_othercase_bit(common, end + 1);
12029       if (charpos_othercasebit == 0)
12030         charpos_enabled = FALSE;
12031       }
12032 
12033     if (charpos_enabled)
12034       {
12035       charpos_char = end[1];
12036       /* Consume the OP_CHAR opcode. */
12037       end += 2;
12038 #if PCRE2_CODE_UNIT_WIDTH == 8
12039       SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
12040 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
12041       SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
12042       if ((charpos_othercasebit & 0x100) != 0)
12043         charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
12044 #endif
12045       if (charpos_othercasebit != 0)
12046         charpos_char |= charpos_othercasebit;
12047 
12048       BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE;
12049       BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char;
12050       BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit;
12051       }
12052     }
12053 
12054   if (charpos_enabled)
12055     {
12056     if (opcode == OP_UPTO)
12057       OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1);
12058 
12059     /* Search the first instance of charpos_char. */
12060     jump = JUMP(SLJIT_JUMP);
12061     label = LABEL();
12062     if (opcode == OP_UPTO)
12063       {
12064       OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12065       add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_ZERO));
12066       }
12067     compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, FALSE);
12068     if (early_fail_ptr != 0)
12069       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12070     JUMPHERE(jump);
12071 
12072     detect_partial_match(common, &backtrack->own_backtracks);
12073     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
12074     if (charpos_othercasebit != 0)
12075       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
12076     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
12077 
12078     if (private_data_ptr == 0)
12079       allocate_stack(common, 2);
12080     OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12081     OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
12082 
12083     if (opcode == OP_UPTO)
12084       {
12085       OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12086       add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
12087       }
12088 
12089     /* Search the last instance of charpos_char. */
12090     label = LABEL();
12091     compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
12092     if (early_fail_ptr != 0)
12093       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12094     detect_partial_match(common, &no_match);
12095     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
12096     if (charpos_othercasebit != 0)
12097       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
12098 
12099     if (opcode == OP_STAR)
12100       {
12101       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
12102       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12103       JUMPTO(SLJIT_JUMP, label);
12104       }
12105     else
12106       {
12107       jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char);
12108       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12109       JUMPHERE(jump);
12110       OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12111       JUMPTO(SLJIT_NOT_ZERO, label);
12112       }
12113 
12114     set_jumps(no_match, LABEL());
12115     OP2(SLJIT_ADD, STR_PTR, 0, base, offset0, SLJIT_IMM, IN_UCHARS(1));
12116     OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12117     }
12118   else
12119     {
12120     if (private_data_ptr == 0)
12121       allocate_stack(common, 2);
12122 
12123     OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
12124 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
12125     use_tmp = (!HAS_VIRTUAL_REGISTERS && opcode == OP_STAR);
12126     SLJIT_ASSERT(!use_tmp || tmp_base == TMP3);
12127 
12128     if (common->utf)
12129       OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
12130 #endif
12131     if (opcode == OP_UPTO)
12132       OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
12133 
12134     detect_partial_match(common, &no_match);
12135     label = LABEL();
12136     compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
12137 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
12138     if (common->utf)
12139       OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
12140 #endif
12141 
12142     if (opcode == OP_UPTO)
12143       {
12144       OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12145       add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
12146       }
12147 
12148     detect_partial_match_to(common, label);
12149     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12150 
12151     set_jumps(no_char1_match, LABEL());
12152 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
12153     if (common->utf)
12154       {
12155       set_jumps(no_match, LABEL());
12156       if (use_tmp)
12157         {
12158         OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
12159         OP1(SLJIT_MOV, base, offset0, TMP3, 0);
12160         }
12161       else
12162         OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12163       }
12164     else
12165 #endif
12166       {
12167       OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12168       set_jumps(no_match, LABEL());
12169       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12170       }
12171 
12172     if (early_fail_ptr != 0)
12173       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12174     }
12175 
12176   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
12177   break;
12178 
12179   case OP_MINSTAR:
12180   if (private_data_ptr == 0)
12181     allocate_stack(common, 1);
12182   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12183   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
12184   if (early_fail_ptr != 0)
12185     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12186   break;
12187 
12188   case OP_MINUPTO:
12189   SLJIT_ASSERT(early_fail_ptr == 0);
12190   if (private_data_ptr == 0)
12191     allocate_stack(common, 2);
12192   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12193   OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
12194   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
12195   break;
12196 
12197   case OP_QUERY:
12198   case OP_MINQUERY:
12199   SLJIT_ASSERT(early_fail_ptr == 0);
12200   if (private_data_ptr == 0)
12201     allocate_stack(common, 1);
12202   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12203   if (opcode == OP_QUERY)
12204     compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
12205   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
12206   break;
12207 
12208   case OP_EXACT:
12209   break;
12210 
12211   case OP_POSSTAR:
12212 #if defined SUPPORT_UNICODE
12213   if (type == OP_ALLANY && !common->invalid_utf)
12214 #else
12215   if (type == OP_ALLANY)
12216 #endif
12217     {
12218     OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
12219     process_partial_match(common);
12220     if (early_fail_ptr != 0)
12221       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
12222     break;
12223     }
12224 
12225 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
12226   if (type == OP_EXTUNI || common->utf)
12227     {
12228     OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
12229     detect_partial_match(common, &no_match);
12230     label = LABEL();
12231     compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
12232     OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
12233     detect_partial_match_to(common, label);
12234 
12235     set_jumps(no_match, LABEL());
12236     OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
12237     if (early_fail_ptr != 0)
12238       {
12239       if (!HAS_VIRTUAL_REGISTERS && tmp_base == TMP3)
12240         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, TMP3, 0);
12241       else
12242         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12243       }
12244     break;
12245     }
12246 #endif
12247 
12248   detect_partial_match(common, &no_match);
12249   label = LABEL();
12250   compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
12251   detect_partial_match_to(common, label);
12252   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12253 
12254   set_jumps(no_char1_match, LABEL());
12255   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12256   set_jumps(no_match, LABEL());
12257   if (early_fail_ptr != 0)
12258     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12259   break;
12260 
12261   case OP_POSUPTO:
12262   SLJIT_ASSERT(early_fail_ptr == 0);
12263 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
12264   if (common->utf)
12265     {
12266     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
12267     OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
12268 
12269     detect_partial_match(common, &no_match);
12270     label = LABEL();
12271     compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
12272     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
12273     OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12274     add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
12275     detect_partial_match_to(common, label);
12276 
12277     set_jumps(no_match, LABEL());
12278     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
12279     break;
12280     }
12281 #endif
12282 
12283   if (type == OP_ALLANY)
12284     {
12285     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
12286 
12287     if (common->mode == PCRE2_JIT_COMPLETE)
12288       {
12289       OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
12290       SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
12291       }
12292     else
12293       {
12294       jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
12295       process_partial_match(common);
12296       JUMPHERE(jump);
12297       }
12298     break;
12299     }
12300 
12301   OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
12302 
12303   detect_partial_match(common, &no_match);
12304   label = LABEL();
12305   compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
12306   OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12307   add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
12308   detect_partial_match_to(common, label);
12309   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12310 
12311   set_jumps(no_char1_match, LABEL());
12312   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12313   set_jumps(no_match, LABEL());
12314   break;
12315 
12316   case OP_POSQUERY:
12317   SLJIT_ASSERT(early_fail_ptr == 0);
12318   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
12319   compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
12320   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
12321   set_jumps(no_match, LABEL());
12322   OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
12323   break;
12324 
12325   default:
12326   SLJIT_UNREACHABLE();
12327   break;
12328   }
12329 
12330 count_match(common);
12331 return end;
12332 }
12333 
compile_fail_accept_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)12334 static SLJIT_INLINE PCRE2_SPTR compile_fail_accept_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
12335 {
12336 DEFINE_COMPILER;
12337 backtrack_common *backtrack;
12338 
12339 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
12340 
12341 if (*cc == OP_FAIL)
12342   {
12343   add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_JUMP));
12344   return cc + 1;
12345   }
12346 
12347 if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0)
12348   add_jump(compiler, &common->restart_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
12349 
12350 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
12351   {
12352   /* No need to check notempty conditions. */
12353   if (common->accept_label == NULL)
12354     add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
12355   else
12356     JUMPTO(SLJIT_JUMP, common->accept_label);
12357   return cc + 1;
12358   }
12359 
12360 if (common->accept_label == NULL)
12361   add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
12362 else
12363   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
12364 
12365 if (HAS_VIRTUAL_REGISTERS)
12366   {
12367   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12368   OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
12369   }
12370 else
12371   OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options));
12372 
12373 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
12374 add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_NOT_ZERO));
12375 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
12376 if (common->accept_label == NULL)
12377   add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO));
12378 else
12379   JUMPTO(SLJIT_ZERO, common->accept_label);
12380 
12381 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
12382 if (common->accept_label == NULL)
12383   add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
12384 else
12385   CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
12386 add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_JUMP));
12387 return cc + 1;
12388 }
12389 
compile_close_matchingpath(compiler_common * common,PCRE2_SPTR cc)12390 static SLJIT_INLINE PCRE2_SPTR compile_close_matchingpath(compiler_common *common, PCRE2_SPTR cc)
12391 {
12392 DEFINE_COMPILER;
12393 int offset = GET2(cc, 1);
12394 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
12395 
12396 /* Data will be discarded anyway... */
12397 if (common->currententry != NULL)
12398   return cc + 1 + IMM2_SIZE;
12399 
12400 if (!optimized_cbracket)
12401   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
12402 offset <<= 1;
12403 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
12404 if (!optimized_cbracket)
12405   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12406 return cc + 1 + IMM2_SIZE;
12407 }
12408 
compile_control_verb_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)12409 static SLJIT_INLINE PCRE2_SPTR compile_control_verb_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
12410 {
12411 DEFINE_COMPILER;
12412 backtrack_common *backtrack;
12413 PCRE2_UCHAR opcode = *cc;
12414 PCRE2_SPTR ccend = cc + 1;
12415 
12416 if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG ||
12417     opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
12418   ccend += 2 + cc[1];
12419 
12420 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
12421 
12422 if (opcode == OP_SKIP)
12423   {
12424   allocate_stack(common, 1);
12425   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
12426   return ccend;
12427   }
12428 
12429 if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
12430   {
12431   if (HAS_VIRTUAL_REGISTERS)
12432     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12433   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
12434   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
12435   OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
12436   }
12437 
12438 return ccend;
12439 }
12440 
12441 static PCRE2_UCHAR then_trap_opcode[1] = { OP_THEN_TRAP };
12442 
compile_then_trap_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,backtrack_common * parent)12443 static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
12444 {
12445 DEFINE_COMPILER;
12446 backtrack_common *backtrack;
12447 BOOL needs_control_head;
12448 int size;
12449 
12450 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
12451 common->then_trap = BACKTRACK_AS(then_trap_backtrack);
12452 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
12453 BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
12454 BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
12455 
12456 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
12457 size = 3 + (size < 0 ? 0 : size);
12458 
12459 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12460 allocate_stack(common, size);
12461 if (size > 3)
12462   OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
12463 else
12464   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
12465 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
12466 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
12467 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
12468 
12469 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
12470 if (size >= 0)
12471   init_frame(common, cc, ccend, size - 1, 0);
12472 }
12473 
compile_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,backtrack_common * parent)12474 static void compile_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
12475 {
12476 DEFINE_COMPILER;
12477 backtrack_common *backtrack;
12478 BOOL has_then_trap = FALSE;
12479 then_trap_backtrack *save_then_trap = NULL;
12480 
12481 SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
12482 
12483 if (common->has_then && common->then_offsets[cc - common->start] != 0)
12484   {
12485   SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
12486   has_then_trap = TRUE;
12487   save_then_trap = common->then_trap;
12488   /* Tail item on backtrack. */
12489   compile_then_trap_matchingpath(common, cc, ccend, parent);
12490   }
12491 
12492 while (cc < ccend)
12493   {
12494   switch(*cc)
12495     {
12496     case OP_SOD:
12497     case OP_SOM:
12498     case OP_NOT_WORD_BOUNDARY:
12499     case OP_WORD_BOUNDARY:
12500     case OP_EODN:
12501     case OP_EOD:
12502     case OP_DOLL:
12503     case OP_DOLLM:
12504     case OP_CIRC:
12505     case OP_CIRCM:
12506     case OP_NOT_UCP_WORD_BOUNDARY:
12507     case OP_UCP_WORD_BOUNDARY:
12508     cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
12509     break;
12510 
12511     case OP_NOT_DIGIT:
12512     case OP_DIGIT:
12513     case OP_NOT_WHITESPACE:
12514     case OP_WHITESPACE:
12515     case OP_NOT_WORDCHAR:
12516     case OP_WORDCHAR:
12517     case OP_ANY:
12518     case OP_ALLANY:
12519     case OP_ANYBYTE:
12520     case OP_NOTPROP:
12521     case OP_PROP:
12522     case OP_ANYNL:
12523     case OP_NOT_HSPACE:
12524     case OP_HSPACE:
12525     case OP_NOT_VSPACE:
12526     case OP_VSPACE:
12527     case OP_EXTUNI:
12528     case OP_NOT:
12529     case OP_NOTI:
12530     cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
12531     break;
12532 
12533     case OP_SET_SOM:
12534     PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
12535     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
12536     allocate_stack(common, 1);
12537     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
12538     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
12539     cc++;
12540     break;
12541 
12542     case OP_CHAR:
12543     case OP_CHARI:
12544     if (common->mode == PCRE2_JIT_COMPLETE)
12545       cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
12546     else
12547       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
12548     break;
12549 
12550     case OP_STAR:
12551     case OP_MINSTAR:
12552     case OP_PLUS:
12553     case OP_MINPLUS:
12554     case OP_QUERY:
12555     case OP_MINQUERY:
12556     case OP_UPTO:
12557     case OP_MINUPTO:
12558     case OP_EXACT:
12559     case OP_POSSTAR:
12560     case OP_POSPLUS:
12561     case OP_POSQUERY:
12562     case OP_POSUPTO:
12563     case OP_STARI:
12564     case OP_MINSTARI:
12565     case OP_PLUSI:
12566     case OP_MINPLUSI:
12567     case OP_QUERYI:
12568     case OP_MINQUERYI:
12569     case OP_UPTOI:
12570     case OP_MINUPTOI:
12571     case OP_EXACTI:
12572     case OP_POSSTARI:
12573     case OP_POSPLUSI:
12574     case OP_POSQUERYI:
12575     case OP_POSUPTOI:
12576     case OP_NOTSTAR:
12577     case OP_NOTMINSTAR:
12578     case OP_NOTPLUS:
12579     case OP_NOTMINPLUS:
12580     case OP_NOTQUERY:
12581     case OP_NOTMINQUERY:
12582     case OP_NOTUPTO:
12583     case OP_NOTMINUPTO:
12584     case OP_NOTEXACT:
12585     case OP_NOTPOSSTAR:
12586     case OP_NOTPOSPLUS:
12587     case OP_NOTPOSQUERY:
12588     case OP_NOTPOSUPTO:
12589     case OP_NOTSTARI:
12590     case OP_NOTMINSTARI:
12591     case OP_NOTPLUSI:
12592     case OP_NOTMINPLUSI:
12593     case OP_NOTQUERYI:
12594     case OP_NOTMINQUERYI:
12595     case OP_NOTUPTOI:
12596     case OP_NOTMINUPTOI:
12597     case OP_NOTEXACTI:
12598     case OP_NOTPOSSTARI:
12599     case OP_NOTPOSPLUSI:
12600     case OP_NOTPOSQUERYI:
12601     case OP_NOTPOSUPTOI:
12602     case OP_TYPESTAR:
12603     case OP_TYPEMINSTAR:
12604     case OP_TYPEPLUS:
12605     case OP_TYPEMINPLUS:
12606     case OP_TYPEQUERY:
12607     case OP_TYPEMINQUERY:
12608     case OP_TYPEUPTO:
12609     case OP_TYPEMINUPTO:
12610     case OP_TYPEEXACT:
12611     case OP_TYPEPOSSTAR:
12612     case OP_TYPEPOSPLUS:
12613     case OP_TYPEPOSQUERY:
12614     case OP_TYPEPOSUPTO:
12615     cc = compile_iterator_matchingpath(common, cc, parent);
12616     break;
12617 
12618     case OP_CLASS:
12619     case OP_NCLASS:
12620     if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE)
12621       cc = compile_iterator_matchingpath(common, cc, parent);
12622     else
12623       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
12624     break;
12625 
12626 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
12627     case OP_XCLASS:
12628     if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
12629       cc = compile_iterator_matchingpath(common, cc, parent);
12630     else
12631       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
12632     break;
12633 #endif
12634 
12635     case OP_REF:
12636     case OP_REFI:
12637     if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
12638       cc = compile_ref_iterator_matchingpath(common, cc, parent);
12639     else
12640       {
12641       compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE, FALSE);
12642       cc += 1 + IMM2_SIZE;
12643       }
12644     break;
12645 
12646     case OP_DNREF:
12647     case OP_DNREFI:
12648     if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
12649       cc = compile_ref_iterator_matchingpath(common, cc, parent);
12650     else
12651       {
12652       compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
12653       compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE, FALSE);
12654       cc += 1 + 2 * IMM2_SIZE;
12655       }
12656     break;
12657 
12658     case OP_RECURSE:
12659     cc = compile_recurse_matchingpath(common, cc, parent);
12660     break;
12661 
12662     case OP_CALLOUT:
12663     case OP_CALLOUT_STR:
12664     cc = compile_callout_matchingpath(common, cc, parent);
12665     break;
12666 
12667     case OP_ASSERT:
12668     case OP_ASSERT_NOT:
12669     case OP_ASSERTBACK:
12670     case OP_ASSERTBACK_NOT:
12671     PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12672     cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12673     break;
12674 
12675     case OP_BRAMINZERO:
12676     PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
12677     cc = bracketend(cc + 1);
12678     if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
12679       {
12680       allocate_stack(common, 1);
12681       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
12682       }
12683     else
12684       {
12685       allocate_stack(common, 2);
12686       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12687       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
12688       }
12689     BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
12690     count_match(common);
12691     break;
12692 
12693     case OP_ASSERT_NA:
12694     case OP_ASSERTBACK_NA:
12695     case OP_ONCE:
12696     case OP_SCRIPT_RUN:
12697     case OP_BRA:
12698     case OP_CBRA:
12699     case OP_COND:
12700     case OP_SBRA:
12701     case OP_SCBRA:
12702     case OP_SCOND:
12703     cc = compile_bracket_matchingpath(common, cc, parent);
12704     break;
12705 
12706     case OP_BRAZERO:
12707     if (cc[1] > OP_ASSERTBACK_NOT)
12708       cc = compile_bracket_matchingpath(common, cc, parent);
12709     else
12710       {
12711       PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12712       cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12713       }
12714     break;
12715 
12716     case OP_BRAPOS:
12717     case OP_CBRAPOS:
12718     case OP_SBRAPOS:
12719     case OP_SCBRAPOS:
12720     case OP_BRAPOSZERO:
12721     cc = compile_bracketpos_matchingpath(common, cc, parent);
12722     break;
12723 
12724     case OP_MARK:
12725     PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
12726     SLJIT_ASSERT(common->mark_ptr != 0);
12727     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
12728     allocate_stack(common, common->has_skip_arg ? 5 : 1);
12729     if (HAS_VIRTUAL_REGISTERS)
12730       OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12731     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
12732     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
12733     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
12734     OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
12735     if (common->has_skip_arg)
12736       {
12737       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12738       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
12739       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
12740       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
12741       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
12742       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
12743       }
12744     cc += 1 + 2 + cc[1];
12745     break;
12746 
12747     case OP_PRUNE:
12748     case OP_PRUNE_ARG:
12749     case OP_SKIP:
12750     case OP_SKIP_ARG:
12751     case OP_THEN:
12752     case OP_THEN_ARG:
12753     case OP_COMMIT:
12754     case OP_COMMIT_ARG:
12755     cc = compile_control_verb_matchingpath(common, cc, parent);
12756     break;
12757 
12758     case OP_FAIL:
12759     case OP_ACCEPT:
12760     case OP_ASSERT_ACCEPT:
12761     cc = compile_fail_accept_matchingpath(common, cc, parent);
12762     break;
12763 
12764     case OP_CLOSE:
12765     cc = compile_close_matchingpath(common, cc);
12766     break;
12767 
12768     case OP_SKIPZERO:
12769     cc = bracketend(cc + 1);
12770     break;
12771 
12772     default:
12773     SLJIT_UNREACHABLE();
12774     return;
12775     }
12776   if (cc == NULL)
12777     return;
12778   }
12779 
12780 if (has_then_trap)
12781   {
12782   /* Head item on backtrack. */
12783   PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
12784   BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
12785   BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
12786   common->then_trap = save_then_trap;
12787   }
12788 SLJIT_ASSERT(cc == ccend);
12789 }
12790 
12791 #undef PUSH_BACKTRACK
12792 #undef PUSH_BACKTRACK_NOVALUE
12793 #undef BACKTRACK_AS
12794 
12795 #define COMPILE_BACKTRACKINGPATH(current) \
12796   do \
12797     { \
12798     compile_backtrackingpath(common, (current)); \
12799     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
12800       return; \
12801     } \
12802   while (0)
12803 
12804 #define CURRENT_AS(type) ((type *)current)
12805 
compile_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)12806 static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12807 {
12808 DEFINE_COMPILER;
12809 PCRE2_SPTR cc = current->cc;
12810 PCRE2_UCHAR opcode;
12811 PCRE2_UCHAR type;
12812 sljit_u32 max = 0, exact;
12813 struct sljit_label *label = NULL;
12814 struct sljit_jump *jump = NULL;
12815 jump_list *jumplist = NULL;
12816 PCRE2_SPTR end;
12817 int private_data_ptr = PRIVATE_DATA(cc);
12818 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
12819 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
12820 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw);
12821 
12822 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
12823 
12824 switch(opcode)
12825   {
12826   case OP_STAR:
12827   case OP_UPTO:
12828   if (type == OP_ANYNL || type == OP_EXTUNI)
12829     {
12830     SLJIT_ASSERT(private_data_ptr == 0);
12831     set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12832     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12833     free_stack(common, 1);
12834     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12835     }
12836   else
12837     {
12838     if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled)
12839       {
12840       OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12841       OP1(SLJIT_MOV, TMP2, 0, base, offset1);
12842       OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12843 
12844       jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
12845       label = LABEL();
12846       OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
12847       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12848       if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0)
12849         OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit);
12850       CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12851       move_back(common, NULL, TRUE);
12852       CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label);
12853       }
12854     else
12855       {
12856       OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12857       jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
12858       move_back(common, NULL, TRUE);
12859       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12860       JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12861       }
12862     JUMPHERE(jump);
12863     if (private_data_ptr == 0)
12864       free_stack(common, 2);
12865     }
12866   break;
12867 
12868   case OP_MINSTAR:
12869   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12870   compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12871   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12872   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12873   set_jumps(jumplist, LABEL());
12874   if (private_data_ptr == 0)
12875     free_stack(common, 1);
12876   break;
12877 
12878   case OP_MINUPTO:
12879   OP1(SLJIT_MOV, TMP1, 0, base, offset1);
12880   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12881   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
12882   add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
12883 
12884   OP1(SLJIT_MOV, base, offset1, TMP1, 0);
12885   compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12886   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12887   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12888 
12889   set_jumps(jumplist, LABEL());
12890   if (private_data_ptr == 0)
12891     free_stack(common, 2);
12892   break;
12893 
12894   case OP_QUERY:
12895   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12896   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12897   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12898   jump = JUMP(SLJIT_JUMP);
12899   set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12900   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12901   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12902   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12903   JUMPHERE(jump);
12904   if (private_data_ptr == 0)
12905     free_stack(common, 1);
12906   break;
12907 
12908   case OP_MINQUERY:
12909   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12910   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12911   jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12912   compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12913   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12914   set_jumps(jumplist, LABEL());
12915   JUMPHERE(jump);
12916   if (private_data_ptr == 0)
12917     free_stack(common, 1);
12918   break;
12919 
12920   case OP_EXACT:
12921   case OP_POSSTAR:
12922   case OP_POSQUERY:
12923   case OP_POSUPTO:
12924   break;
12925 
12926   default:
12927   SLJIT_UNREACHABLE();
12928   break;
12929   }
12930 
12931 set_jumps(current->own_backtracks, LABEL());
12932 }
12933 
compile_ref_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)12934 static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12935 {
12936 DEFINE_COMPILER;
12937 PCRE2_SPTR cc = current->cc;
12938 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
12939 PCRE2_UCHAR type;
12940 
12941 type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
12942 
12943 if ((type & 0x1) == 0)
12944   {
12945   /* Maximize case. */
12946   set_jumps(current->own_backtracks, LABEL());
12947   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12948   free_stack(common, 1);
12949   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12950   return;
12951   }
12952 
12953 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12954 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12955 set_jumps(current->own_backtracks, LABEL());
12956 free_stack(common, ref ? 2 : 3);
12957 }
12958 
compile_recurse_backtrackingpath(compiler_common * common,struct backtrack_common * current)12959 static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12960 {
12961 DEFINE_COMPILER;
12962 recurse_entry *entry;
12963 
12964 if (!CURRENT_AS(recurse_backtrack)->inlined_pattern)
12965   {
12966   entry = CURRENT_AS(recurse_backtrack)->entry;
12967   if (entry->backtrack_label == NULL)
12968     add_jump(compiler, &entry->backtrack_calls, JUMP(SLJIT_FAST_CALL));
12969   else
12970     JUMPTO(SLJIT_FAST_CALL, entry->backtrack_label);
12971   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(recurse_backtrack)->matchingpath);
12972   }
12973 else
12974   compile_backtrackingpath(common, current->top);
12975 
12976 set_jumps(current->own_backtracks, LABEL());
12977 }
12978 
compile_assert_backtrackingpath(compiler_common * common,struct backtrack_common * current)12979 static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12980 {
12981 DEFINE_COMPILER;
12982 PCRE2_SPTR cc = current->cc;
12983 PCRE2_UCHAR bra = OP_BRA;
12984 struct sljit_jump *brajump = NULL;
12985 
12986 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
12987 if (*cc == OP_BRAZERO)
12988   {
12989   bra = *cc;
12990   cc++;
12991   }
12992 
12993 if (bra == OP_BRAZERO)
12994   {
12995   SLJIT_ASSERT(current->own_backtracks == NULL);
12996   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12997   }
12998 
12999 if (CURRENT_AS(assert_backtrack)->framesize < 0)
13000   {
13001   set_jumps(current->own_backtracks, LABEL());
13002 
13003   if (bra == OP_BRAZERO)
13004     {
13005     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
13006     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
13007     free_stack(common, 1);
13008     }
13009   return;
13010   }
13011 
13012 if (bra == OP_BRAZERO)
13013   {
13014   if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
13015     {
13016     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
13017     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
13018     free_stack(common, 1);
13019     return;
13020     }
13021   free_stack(common, 1);
13022   brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
13023   }
13024 
13025 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
13026   {
13027   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
13028   add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13029   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
13030   OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(assert_backtrack)->framesize - 1) * sizeof(sljit_sw));
13031   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, TMP1, 0);
13032 
13033   set_jumps(current->own_backtracks, LABEL());
13034   }
13035 else
13036   set_jumps(current->own_backtracks, LABEL());
13037 
13038 if (bra == OP_BRAZERO)
13039   {
13040   /* We know there is enough place on the stack. */
13041   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
13042   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
13043   JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
13044   JUMPHERE(brajump);
13045   }
13046 }
13047 
compile_bracket_backtrackingpath(compiler_common * common,struct backtrack_common * current)13048 static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13049 {
13050 DEFINE_COMPILER;
13051 int opcode, stacksize, alt_count, alt_max;
13052 int offset = 0;
13053 int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
13054 int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
13055 PCRE2_SPTR cc = current->cc;
13056 PCRE2_SPTR ccbegin;
13057 PCRE2_SPTR ccprev;
13058 PCRE2_UCHAR bra = OP_BRA;
13059 PCRE2_UCHAR ket;
13060 assert_backtrack *assert;
13061 BOOL has_alternatives;
13062 BOOL needs_control_head = FALSE;
13063 BOOL has_vreverse;
13064 struct sljit_jump *brazero = NULL;
13065 struct sljit_jump *next_alt = NULL;
13066 struct sljit_jump *once = NULL;
13067 struct sljit_jump *cond = NULL;
13068 struct sljit_label *rmin_label = NULL;
13069 struct sljit_label *exact_label = NULL;
13070 struct sljit_jump *mov_addr = NULL;
13071 
13072 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
13073   {
13074   bra = *cc;
13075   cc++;
13076   }
13077 
13078 opcode = *cc;
13079 ccbegin = bracketend(cc) - 1 - LINK_SIZE;
13080 ket = *ccbegin;
13081 if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
13082   {
13083   repeat_ptr = PRIVATE_DATA(ccbegin);
13084   repeat_type = PRIVATE_DATA(ccbegin + 2);
13085   repeat_count = PRIVATE_DATA(ccbegin + 3);
13086   SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
13087   if (repeat_type == OP_UPTO)
13088     ket = OP_KETRMAX;
13089   if (repeat_type == OP_MINUPTO)
13090     ket = OP_KETRMIN;
13091   }
13092 ccbegin = cc;
13093 cc += GET(cc, 1);
13094 has_alternatives = *cc == OP_ALT;
13095 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
13096   has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
13097 if (opcode == OP_CBRA || opcode == OP_SCBRA)
13098   offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
13099 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
13100   opcode = OP_SCOND;
13101 
13102 alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
13103 
13104 /* Decoding the needs_control_head in framesize. */
13105 if (opcode == OP_ONCE)
13106   {
13107   needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
13108   CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
13109   }
13110 
13111 if (ket != OP_KET && repeat_type != 0)
13112   {
13113   /* TMP1 is used in OP_KETRMIN below. */
13114   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13115   free_stack(common, 1);
13116   if (repeat_type == OP_UPTO)
13117     OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
13118   else
13119     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
13120   }
13121 
13122 if (ket == OP_KETRMAX)
13123   {
13124   if (bra == OP_BRAZERO)
13125     {
13126     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13127     free_stack(common, 1);
13128     brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13129     }
13130   }
13131 else if (ket == OP_KETRMIN)
13132   {
13133   if (bra != OP_BRAMINZERO)
13134     {
13135     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13136     if (repeat_type != 0)
13137       {
13138       /* TMP1 was set a few lines above. */
13139       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
13140       /* Drop STR_PTR for non-greedy plus quantifier. */
13141       if (opcode != OP_ONCE)
13142         free_stack(common, 1);
13143       }
13144     else if (opcode >= OP_SBRA || opcode == OP_ONCE)
13145       {
13146       /* Checking zero-length iteration. */
13147       if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
13148         CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
13149       else
13150         {
13151         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
13152         CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
13153         }
13154       /* Drop STR_PTR for non-greedy plus quantifier. */
13155       if (opcode != OP_ONCE)
13156         free_stack(common, 1);
13157       }
13158     else
13159       JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
13160     }
13161   rmin_label = LABEL();
13162   if (repeat_type != 0)
13163     OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
13164   }
13165 else if (bra == OP_BRAZERO)
13166   {
13167   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13168   free_stack(common, 1);
13169   brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13170   }
13171 else if (repeat_type == OP_EXACT)
13172   {
13173   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
13174   exact_label = LABEL();
13175   }
13176 
13177 if (offset != 0)
13178   {
13179   if (common->capture_last_ptr != 0)
13180     {
13181     SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
13182     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13183     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13184     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
13185     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
13186     free_stack(common, 3);
13187     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
13188     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
13189     }
13190   else if (common->optimized_cbracket[offset >> 1] == 0)
13191     {
13192     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13193     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13194     free_stack(common, 2);
13195     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
13196     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
13197     }
13198   }
13199 
13200 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
13201   {
13202   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
13203     {
13204     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
13205     add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13206     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
13207     }
13208   once = JUMP(SLJIT_JUMP);
13209   }
13210 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
13211   {
13212   if (has_alternatives)
13213     {
13214     /* Always exactly one alternative. */
13215     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13216     free_stack(common, 1);
13217 
13218     alt_max = 2;
13219     next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13220     }
13221   }
13222 else if (has_alternatives)
13223   {
13224   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13225   free_stack(common, 1);
13226 
13227   if (alt_max > 3)
13228     {
13229     sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
13230 
13231     SLJIT_ASSERT(CURRENT_AS(bracket_backtrack)->u.matching_mov_addr);
13232     sljit_set_label(CURRENT_AS(bracket_backtrack)->u.matching_mov_addr, LABEL());
13233     sljit_emit_op0(compiler, SLJIT_ENDBR);
13234     }
13235   else
13236     next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13237   }
13238 
13239 COMPILE_BACKTRACKINGPATH(current->top);
13240 if (current->own_backtracks)
13241   set_jumps(current->own_backtracks, LABEL());
13242 
13243 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
13244   {
13245   /* Conditional block always has at most one alternative. */
13246   if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
13247     {
13248     SLJIT_ASSERT(has_alternatives);
13249     assert = CURRENT_AS(bracket_backtrack)->u.assert;
13250     if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
13251       {
13252       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
13253       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13254       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
13255       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
13256       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
13257       }
13258     cond = JUMP(SLJIT_JUMP);
13259     set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
13260     }
13261   else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
13262     {
13263     SLJIT_ASSERT(has_alternatives);
13264     cond = JUMP(SLJIT_JUMP);
13265     set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
13266     }
13267   else
13268     SLJIT_ASSERT(!has_alternatives);
13269   }
13270 
13271 if (has_alternatives)
13272   {
13273   alt_count = 1;
13274   do
13275     {
13276     current->top = NULL;
13277     current->own_backtracks = NULL;
13278     current->simple_backtracks = NULL;
13279     /* Conditional blocks always have an additional alternative, even if it is empty. */
13280     if (*cc == OP_ALT)
13281       {
13282       ccprev = cc + 1 + LINK_SIZE;
13283       cc += GET(cc, 1);
13284 
13285       has_vreverse = FALSE;
13286       if (opcode == OP_ASSERTBACK || opcode == OP_ASSERTBACK_NA)
13287         {
13288         SLJIT_ASSERT(private_data_ptr != 0);
13289         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
13290 
13291         has_vreverse = (*ccprev == OP_VREVERSE);
13292         if (*ccprev == OP_REVERSE || has_vreverse)
13293           ccprev = compile_reverse_matchingpath(common, ccprev, current);
13294         }
13295       else if (opcode != OP_COND && opcode != OP_SCOND)
13296         {
13297         if (opcode != OP_ONCE)
13298           {
13299           if (private_data_ptr != 0)
13300             OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
13301           else
13302             OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13303           }
13304         else
13305           OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
13306         }
13307 
13308       compile_matchingpath(common, ccprev, cc, current);
13309       if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13310         return;
13311 
13312       switch (opcode)
13313         {
13314         case OP_ASSERTBACK_NA:
13315           if (has_vreverse)
13316             {
13317             SLJIT_ASSERT(current->top != NULL && PRIVATE_DATA(ccbegin + 1));
13318             add_jump(compiler, &current->top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
13319             }
13320 
13321           if (PRIVATE_DATA(ccbegin + 1))
13322             OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
13323           break;
13324         case OP_ASSERT_NA:
13325           OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
13326           break;
13327         case OP_SCRIPT_RUN:
13328           match_script_run_common(common, private_data_ptr, current);
13329           break;
13330         }
13331       }
13332 
13333     /* Instructions after the current alternative is successfully matched. */
13334     /* There is a similar code in compile_bracket_matchingpath. */
13335     if (opcode == OP_ONCE)
13336       match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
13337 
13338     stacksize = 0;
13339     if (repeat_type == OP_MINUPTO)
13340       {
13341       /* We need to preserve the counter. TMP2 will be used below. */
13342       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
13343       stacksize++;
13344       }
13345     if (ket != OP_KET || bra != OP_BRA)
13346       stacksize++;
13347     if (offset != 0)
13348       {
13349       if (common->capture_last_ptr != 0)
13350         stacksize++;
13351       if (common->optimized_cbracket[offset >> 1] == 0)
13352         stacksize += 2;
13353       }
13354     if (opcode != OP_ONCE)
13355       stacksize++;
13356 
13357     if (stacksize > 0)
13358       allocate_stack(common, stacksize);
13359 
13360     stacksize = 0;
13361     if (repeat_type == OP_MINUPTO)
13362       {
13363       /* TMP2 was set above. */
13364       OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
13365       stacksize++;
13366       }
13367 
13368     if (ket != OP_KET || bra != OP_BRA)
13369       {
13370       if (ket != OP_KET)
13371         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
13372       else
13373         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
13374       stacksize++;
13375       }
13376 
13377     if (offset != 0)
13378       stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
13379 
13380     if (opcode != OP_ONCE)
13381       {
13382       if (alt_max <= 3)
13383         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
13384       else
13385         mov_addr = sljit_emit_mov_addr(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
13386       }
13387 
13388     if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
13389       {
13390       /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
13391       SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
13392       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
13393       }
13394 
13395     JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
13396 
13397     if (opcode != OP_ONCE)
13398       {
13399       if (alt_max <= 3)
13400         {
13401         JUMPHERE(next_alt);
13402         alt_count++;
13403         if (alt_count < alt_max)
13404           {
13405           SLJIT_ASSERT(alt_count == 2 && alt_max == 3);
13406           next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
13407           }
13408         }
13409       else
13410         {
13411         sljit_set_label(mov_addr, LABEL());
13412         sljit_emit_op0(compiler, SLJIT_ENDBR);
13413         }
13414       }
13415 
13416     COMPILE_BACKTRACKINGPATH(current->top);
13417     if (current->own_backtracks)
13418       set_jumps(current->own_backtracks, LABEL());
13419     SLJIT_ASSERT(!current->simple_backtracks);
13420     }
13421   while (*cc == OP_ALT);
13422 
13423   if (cond != NULL)
13424     {
13425     SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
13426     assert = CURRENT_AS(bracket_backtrack)->u.assert;
13427     if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
13428       {
13429       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
13430       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13431       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
13432       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
13433       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
13434       }
13435     JUMPHERE(cond);
13436     }
13437 
13438   /* Free the STR_PTR. */
13439   if (private_data_ptr == 0)
13440     free_stack(common, 1);
13441   }
13442 
13443 if (offset != 0)
13444   {
13445   /* Using both tmp register is better for instruction scheduling. */
13446   if (common->optimized_cbracket[offset >> 1] != 0)
13447     {
13448     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13449     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13450     free_stack(common, 2);
13451     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
13452     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
13453     }
13454   else
13455     {
13456     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13457     free_stack(common, 1);
13458     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
13459     }
13460   }
13461 else if (opcode == OP_ASSERTBACK_NA && PRIVATE_DATA(ccbegin + 1))
13462   {
13463   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13464   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13465   OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
13466   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
13467   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP2, 0);
13468   free_stack(common, 4);
13469   }
13470 else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
13471   {
13472   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
13473   free_stack(common, 1);
13474   }
13475 else if (opcode == OP_ONCE)
13476   {
13477   cc = ccbegin + GET(ccbegin, 1);
13478   stacksize = needs_control_head ? 1 : 0;
13479 
13480   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
13481     {
13482     /* Reset head and drop saved frame. */
13483     stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
13484     }
13485   else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
13486     {
13487     /* The STR_PTR must be released. */
13488     stacksize++;
13489     }
13490 
13491   if (stacksize > 0)
13492     free_stack(common, stacksize);
13493 
13494   JUMPHERE(once);
13495   /* Restore previous private_data_ptr */
13496   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
13497     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1));
13498   else if (ket == OP_KETRMIN)
13499     {
13500     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13501     /* See the comment below. */
13502     free_stack(common, 2);
13503     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
13504     }
13505   }
13506 
13507 if (repeat_type == OP_EXACT)
13508   {
13509   OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
13510   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
13511   CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
13512   }
13513 else if (ket == OP_KETRMAX)
13514   {
13515   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13516   if (bra != OP_BRAZERO)
13517     free_stack(common, 1);
13518 
13519   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
13520   if (bra == OP_BRAZERO)
13521     {
13522     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13523     JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
13524     JUMPHERE(brazero);
13525     free_stack(common, 1);
13526     }
13527   }
13528 else if (ket == OP_KETRMIN)
13529   {
13530   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13531 
13532   /* OP_ONCE removes everything in case of a backtrack, so we don't
13533   need to explicitly release the STR_PTR. The extra release would
13534   affect badly the free_stack(2) above. */
13535   if (opcode != OP_ONCE)
13536     free_stack(common, 1);
13537   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
13538   if (opcode == OP_ONCE)
13539     free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
13540   else if (bra == OP_BRAMINZERO)
13541     free_stack(common, 1);
13542   }
13543 else if (bra == OP_BRAZERO)
13544   {
13545   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13546   JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
13547   JUMPHERE(brazero);
13548   }
13549 }
13550 
compile_bracketpos_backtrackingpath(compiler_common * common,struct backtrack_common * current)13551 static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13552 {
13553 DEFINE_COMPILER;
13554 int offset;
13555 struct sljit_jump *jump;
13556 PCRE2_SPTR cc;
13557 
13558 /* No retry on backtrack, just drop everything. */
13559 if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
13560   {
13561   cc = current->cc;
13562 
13563   if (*cc == OP_BRAPOSZERO)
13564     cc++;
13565 
13566   if (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS)
13567     {
13568     offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
13569     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13570     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13571     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
13572     if (common->capture_last_ptr != 0)
13573       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
13574     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
13575     if (common->capture_last_ptr != 0)
13576       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
13577     }
13578   set_jumps(current->own_backtracks, LABEL());
13579   free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
13580   return;
13581   }
13582 
13583 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
13584 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13585 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracketpos_backtrack)->framesize - 1) * sizeof(sljit_sw));
13586 
13587 if (current->own_backtracks)
13588   {
13589   jump = JUMP(SLJIT_JUMP);
13590   set_jumps(current->own_backtracks, LABEL());
13591   /* Drop the stack frame. */
13592   free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
13593   JUMPHERE(jump);
13594   }
13595 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1));
13596 }
13597 
compile_braminzero_backtrackingpath(compiler_common * common,struct backtrack_common * current)13598 static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13599 {
13600 assert_backtrack backtrack;
13601 
13602 current->top = NULL;
13603 current->own_backtracks = NULL;
13604 current->simple_backtracks = NULL;
13605 if (current->cc[1] > OP_ASSERTBACK_NOT)
13606   {
13607   /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
13608   compile_bracket_matchingpath(common, current->cc, current);
13609   compile_bracket_backtrackingpath(common, current->top);
13610   }
13611 else
13612   {
13613   memset(&backtrack, 0, sizeof(backtrack));
13614   backtrack.common.cc = current->cc;
13615   backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
13616   /* Manual call of compile_assert_matchingpath. */
13617   compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
13618   }
13619 SLJIT_ASSERT(!current->simple_backtracks && !current->own_backtracks);
13620 }
13621 
compile_control_verb_backtrackingpath(compiler_common * common,struct backtrack_common * current)13622 static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13623 {
13624 DEFINE_COMPILER;
13625 PCRE2_UCHAR opcode = *current->cc;
13626 struct sljit_label *loop;
13627 struct sljit_jump *jump;
13628 
13629 if (opcode == OP_THEN || opcode == OP_THEN_ARG)
13630   {
13631   if (common->then_trap != NULL)
13632     {
13633     SLJIT_ASSERT(common->control_head_ptr != 0);
13634 
13635     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
13636     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
13637     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
13638     jump = JUMP(SLJIT_JUMP);
13639 
13640     loop = LABEL();
13641     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13642     JUMPHERE(jump);
13643     CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop);
13644     CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop);
13645     add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
13646     return;
13647     }
13648   else if (!common->local_quit_available && common->in_positive_assertion)
13649     {
13650     add_jump(compiler, &common->positive_assertion_quit, JUMP(SLJIT_JUMP));
13651     return;
13652     }
13653   }
13654 
13655 if (common->local_quit_available)
13656   {
13657   /* Abort match with a fail. */
13658   if (common->quit_label == NULL)
13659     add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
13660   else
13661     JUMPTO(SLJIT_JUMP, common->quit_label);
13662   return;
13663   }
13664 
13665 if (opcode == OP_SKIP_ARG)
13666   {
13667   SLJIT_ASSERT(common->control_head_ptr != 0 && TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
13668   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
13669   OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
13670   sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_search_mark));
13671 
13672   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0);
13673   add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0));
13674   return;
13675   }
13676 
13677 if (opcode == OP_SKIP)
13678   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13679 else
13680   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
13681 add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
13682 }
13683 
compile_vreverse_backtrackingpath(compiler_common * common,struct backtrack_common * current)13684 static SLJIT_INLINE void compile_vreverse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13685 {
13686 DEFINE_COMPILER;
13687 struct sljit_jump *jump;
13688 struct sljit_label *label;
13689 
13690 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
13691 jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(3));
13692 skip_valid_char(common);
13693 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
13694 JUMPTO(SLJIT_JUMP, CURRENT_AS(vreverse_backtrack)->matchingpath);
13695 
13696 label = LABEL();
13697 sljit_set_label(jump, label);
13698 set_jumps(current->own_backtracks, label);
13699 }
13700 
compile_then_trap_backtrackingpath(compiler_common * common,struct backtrack_common * current)13701 static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13702 {
13703 DEFINE_COMPILER;
13704 struct sljit_jump *jump;
13705 int size;
13706 
13707 if (CURRENT_AS(then_trap_backtrack)->then_trap)
13708   {
13709   common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
13710   return;
13711   }
13712 
13713 size = CURRENT_AS(then_trap_backtrack)->framesize;
13714 size = 3 + (size < 0 ? 0 : size);
13715 
13716 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
13717 free_stack(common, size);
13718 jump = JUMP(SLJIT_JUMP);
13719 
13720 set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
13721 /* STACK_TOP is set by THEN. */
13722 if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
13723   {
13724   add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13725   OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(then_trap_backtrack)->framesize - 1) * sizeof(sljit_sw));
13726   }
13727 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13728 free_stack(common, 3);
13729 
13730 JUMPHERE(jump);
13731 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
13732 }
13733 
compile_backtrackingpath(compiler_common * common,struct backtrack_common * current)13734 static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13735 {
13736 DEFINE_COMPILER;
13737 then_trap_backtrack *save_then_trap = common->then_trap;
13738 
13739 while (current)
13740   {
13741   if (current->simple_backtracks != NULL)
13742     set_jumps(current->simple_backtracks, LABEL());
13743   switch(*current->cc)
13744     {
13745     case OP_SET_SOM:
13746     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13747     free_stack(common, 1);
13748     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
13749     break;
13750 
13751     case OP_STAR:
13752     case OP_MINSTAR:
13753     case OP_PLUS:
13754     case OP_MINPLUS:
13755     case OP_QUERY:
13756     case OP_MINQUERY:
13757     case OP_UPTO:
13758     case OP_MINUPTO:
13759     case OP_EXACT:
13760     case OP_POSSTAR:
13761     case OP_POSPLUS:
13762     case OP_POSQUERY:
13763     case OP_POSUPTO:
13764     case OP_STARI:
13765     case OP_MINSTARI:
13766     case OP_PLUSI:
13767     case OP_MINPLUSI:
13768     case OP_QUERYI:
13769     case OP_MINQUERYI:
13770     case OP_UPTOI:
13771     case OP_MINUPTOI:
13772     case OP_EXACTI:
13773     case OP_POSSTARI:
13774     case OP_POSPLUSI:
13775     case OP_POSQUERYI:
13776     case OP_POSUPTOI:
13777     case OP_NOTSTAR:
13778     case OP_NOTMINSTAR:
13779     case OP_NOTPLUS:
13780     case OP_NOTMINPLUS:
13781     case OP_NOTQUERY:
13782     case OP_NOTMINQUERY:
13783     case OP_NOTUPTO:
13784     case OP_NOTMINUPTO:
13785     case OP_NOTEXACT:
13786     case OP_NOTPOSSTAR:
13787     case OP_NOTPOSPLUS:
13788     case OP_NOTPOSQUERY:
13789     case OP_NOTPOSUPTO:
13790     case OP_NOTSTARI:
13791     case OP_NOTMINSTARI:
13792     case OP_NOTPLUSI:
13793     case OP_NOTMINPLUSI:
13794     case OP_NOTQUERYI:
13795     case OP_NOTMINQUERYI:
13796     case OP_NOTUPTOI:
13797     case OP_NOTMINUPTOI:
13798     case OP_NOTEXACTI:
13799     case OP_NOTPOSSTARI:
13800     case OP_NOTPOSPLUSI:
13801     case OP_NOTPOSQUERYI:
13802     case OP_NOTPOSUPTOI:
13803     case OP_TYPESTAR:
13804     case OP_TYPEMINSTAR:
13805     case OP_TYPEPLUS:
13806     case OP_TYPEMINPLUS:
13807     case OP_TYPEQUERY:
13808     case OP_TYPEMINQUERY:
13809     case OP_TYPEUPTO:
13810     case OP_TYPEMINUPTO:
13811     case OP_TYPEEXACT:
13812     case OP_TYPEPOSSTAR:
13813     case OP_TYPEPOSPLUS:
13814     case OP_TYPEPOSQUERY:
13815     case OP_TYPEPOSUPTO:
13816     case OP_CLASS:
13817     case OP_NCLASS:
13818 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
13819     case OP_XCLASS:
13820 #endif
13821     compile_iterator_backtrackingpath(common, current);
13822     break;
13823 
13824     case OP_REF:
13825     case OP_REFI:
13826     case OP_DNREF:
13827     case OP_DNREFI:
13828     compile_ref_iterator_backtrackingpath(common, current);
13829     break;
13830 
13831     case OP_RECURSE:
13832     compile_recurse_backtrackingpath(common, current);
13833     break;
13834 
13835     case OP_ASSERT:
13836     case OP_ASSERT_NOT:
13837     case OP_ASSERTBACK:
13838     case OP_ASSERTBACK_NOT:
13839     compile_assert_backtrackingpath(common, current);
13840     break;
13841 
13842     case OP_ASSERT_NA:
13843     case OP_ASSERTBACK_NA:
13844     case OP_ONCE:
13845     case OP_SCRIPT_RUN:
13846     case OP_BRA:
13847     case OP_CBRA:
13848     case OP_COND:
13849     case OP_SBRA:
13850     case OP_SCBRA:
13851     case OP_SCOND:
13852     compile_bracket_backtrackingpath(common, current);
13853     break;
13854 
13855     case OP_BRAZERO:
13856     if (current->cc[1] > OP_ASSERTBACK_NOT)
13857       compile_bracket_backtrackingpath(common, current);
13858     else
13859       compile_assert_backtrackingpath(common, current);
13860     break;
13861 
13862     case OP_BRAPOS:
13863     case OP_CBRAPOS:
13864     case OP_SBRAPOS:
13865     case OP_SCBRAPOS:
13866     case OP_BRAPOSZERO:
13867     compile_bracketpos_backtrackingpath(common, current);
13868     break;
13869 
13870     case OP_BRAMINZERO:
13871     compile_braminzero_backtrackingpath(common, current);
13872     break;
13873 
13874     case OP_MARK:
13875     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
13876     if (common->has_skip_arg)
13877       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13878     free_stack(common, common->has_skip_arg ? 5 : 1);
13879     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
13880     if (common->has_skip_arg)
13881       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
13882     break;
13883 
13884     case OP_THEN:
13885     case OP_THEN_ARG:
13886     case OP_PRUNE:
13887     case OP_PRUNE_ARG:
13888     case OP_SKIP:
13889     case OP_SKIP_ARG:
13890     compile_control_verb_backtrackingpath(common, current);
13891     break;
13892 
13893     case OP_COMMIT:
13894     case OP_COMMIT_ARG:
13895     if (!common->local_quit_available)
13896       OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13897     if (common->quit_label == NULL)
13898       add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
13899     else
13900       JUMPTO(SLJIT_JUMP, common->quit_label);
13901     break;
13902 
13903     case OP_CALLOUT:
13904     case OP_CALLOUT_STR:
13905     case OP_FAIL:
13906     case OP_ACCEPT:
13907     case OP_ASSERT_ACCEPT:
13908     set_jumps(current->own_backtracks, LABEL());
13909     break;
13910 
13911     case OP_VREVERSE:
13912     compile_vreverse_backtrackingpath(common, current);
13913     break;
13914 
13915     case OP_THEN_TRAP:
13916     /* A virtual opcode for then traps. */
13917     compile_then_trap_backtrackingpath(common, current);
13918     break;
13919 
13920     default:
13921     SLJIT_UNREACHABLE();
13922     break;
13923     }
13924   current = current->prev;
13925   }
13926 common->then_trap = save_then_trap;
13927 }
13928 
compile_recurse(compiler_common * common)13929 static SLJIT_INLINE void compile_recurse(compiler_common *common)
13930 {
13931 DEFINE_COMPILER;
13932 PCRE2_SPTR cc = common->start + common->currententry->start;
13933 PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
13934 PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE);
13935 uint32_t recurse_flags = 0;
13936 int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &recurse_flags);
13937 int alt_count, alt_max, local_size;
13938 backtrack_common altbacktrack;
13939 jump_list *match = NULL;
13940 struct sljit_jump *next_alt = NULL;
13941 struct sljit_jump *accept_exit = NULL;
13942 struct sljit_label *quit;
13943 struct sljit_jump *mov_addr = NULL;
13944 
13945 /* Recurse captures then. */
13946 common->then_trap = NULL;
13947 
13948 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
13949 
13950 alt_max = no_alternatives(cc);
13951 alt_count = 0;
13952 
13953 /* Matching path. */
13954 SLJIT_ASSERT(common->currententry->entry_label == NULL && common->recursive_head_ptr != 0);
13955 common->currententry->entry_label = LABEL();
13956 set_jumps(common->currententry->entry_calls, common->currententry->entry_label);
13957 
13958 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, TMP2, 0);
13959 count_match(common);
13960 
13961 local_size = (alt_max > 1) ? 2 : 1;
13962 
13963 /* (Reversed) stack layout:
13964    [private data][return address][optional: str ptr] ... [optional: alternative index][recursive_head_ptr] */
13965 
13966 allocate_stack(common, private_data_size + local_size);
13967 /* Save return address. */
13968 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0);
13969 
13970 copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, recurse_flags);
13971 
13972 /* This variable is saved and restored all time when we enter or exit from a recursive context. */
13973 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
13974 
13975 if (recurse_flags & recurse_flag_control_head_found)
13976   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13977 
13978 if (alt_max > 1)
13979   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
13980 
13981 memset(&altbacktrack, 0, sizeof(backtrack_common));
13982 common->quit_label = NULL;
13983 common->accept_label = NULL;
13984 common->quit = NULL;
13985 common->accept = NULL;
13986 altbacktrack.cc = ccbegin;
13987 cc += GET(cc, 1);
13988 while (1)
13989   {
13990   altbacktrack.top = NULL;
13991   altbacktrack.own_backtracks = NULL;
13992 
13993   if (altbacktrack.cc != ccbegin)
13994     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13995 
13996   compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
13997   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13998     return;
13999 
14000   allocate_stack(common, (alt_max > 1 || (recurse_flags & recurse_flag_accept_found)) ? 2 : 1);
14001   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
14002 
14003   if (alt_max > 1 || (recurse_flags & recurse_flag_accept_found))
14004     {
14005     if (alt_max > 3)
14006       mov_addr = sljit_emit_mov_addr(compiler, SLJIT_MEM1(STACK_TOP), STACK(1));
14007     else
14008       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
14009     }
14010 
14011   add_jump(compiler, &match, JUMP(SLJIT_JUMP));
14012 
14013   if (alt_count == 0)
14014     {
14015     /* Backtracking path entry. */
14016     SLJIT_ASSERT(common->currententry->backtrack_label == NULL);
14017     common->currententry->backtrack_label = LABEL();
14018     set_jumps(common->currententry->backtrack_calls, common->currententry->backtrack_label);
14019 
14020     sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, TMP1, 0);
14021 
14022     if (recurse_flags & recurse_flag_accept_found)
14023       accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
14024 
14025     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
14026     /* Save return address. */
14027     OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0);
14028 
14029     copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags);
14030 
14031     if (alt_max > 1)
14032       {
14033       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
14034       free_stack(common, 2);
14035 
14036       if (alt_max > 3)
14037         {
14038         sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
14039         sljit_set_label(mov_addr, LABEL());
14040         sljit_emit_op0(compiler, SLJIT_ENDBR);
14041         }
14042       else
14043         next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
14044       }
14045     else
14046       free_stack(common, (recurse_flags & recurse_flag_accept_found) ? 2 : 1);
14047     }
14048   else if (alt_max > 3)
14049     {
14050     sljit_set_label(mov_addr, LABEL());
14051     sljit_emit_op0(compiler, SLJIT_ENDBR);
14052     }
14053   else
14054     {
14055     JUMPHERE(next_alt);
14056     if (alt_count + 1 < alt_max)
14057       {
14058       SLJIT_ASSERT(alt_count == 1 && alt_max == 3);
14059       next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
14060       }
14061     }
14062 
14063   alt_count++;
14064 
14065   compile_backtrackingpath(common, altbacktrack.top);
14066   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14067     return;
14068   set_jumps(altbacktrack.own_backtracks, LABEL());
14069 
14070   if (*cc != OP_ALT)
14071     break;
14072 
14073   altbacktrack.cc = cc + 1 + LINK_SIZE;
14074   cc += GET(cc, 1);
14075   }
14076 
14077 /* No alternative is matched. */
14078 
14079 quit = LABEL();
14080 
14081 copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, recurse_flags);
14082 
14083 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
14084 free_stack(common, private_data_size + local_size);
14085 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
14086 OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
14087 
14088 if (common->quit != NULL)
14089   {
14090   SLJIT_ASSERT(recurse_flags & recurse_flag_quit_found);
14091 
14092   set_jumps(common->quit, LABEL());
14093   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
14094   copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, recurse_flags);
14095   JUMPTO(SLJIT_JUMP, quit);
14096   }
14097 
14098 if (recurse_flags & recurse_flag_accept_found)
14099   {
14100   JUMPHERE(accept_exit);
14101   free_stack(common, 2);
14102 
14103   /* Save return address. */
14104   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0);
14105 
14106   copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, recurse_flags);
14107 
14108   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
14109   free_stack(common, private_data_size + local_size);
14110   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
14111   OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
14112   }
14113 
14114 if (common->accept != NULL)
14115   {
14116   SLJIT_ASSERT(recurse_flags & recurse_flag_accept_found);
14117 
14118   set_jumps(common->accept, LABEL());
14119 
14120   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
14121   OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
14122 
14123   allocate_stack(common, 2);
14124   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
14125   }
14126 
14127 set_jumps(match, LABEL());
14128 
14129 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
14130 
14131 copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags);
14132 
14133 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1));
14134 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
14135 OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
14136 }
14137 
14138 #undef COMPILE_BACKTRACKINGPATH
14139 #undef CURRENT_AS
14140 
14141 #define PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS \
14142   (PCRE2_JIT_INVALID_UTF)
14143 
jit_compile(pcre2_code * code,sljit_u32 mode)14144 static int jit_compile(pcre2_code *code, sljit_u32 mode)
14145 {
14146 pcre2_real_code *re = (pcre2_real_code *)code;
14147 struct sljit_compiler *compiler;
14148 backtrack_common rootbacktrack;
14149 compiler_common common_data;
14150 compiler_common *common = &common_data;
14151 const sljit_u8 *tables = re->tables;
14152 void *allocator_data = &re->memctl;
14153 int private_data_size;
14154 PCRE2_SPTR ccend;
14155 executable_functions *functions;
14156 void *executable_func;
14157 sljit_uw executable_size;
14158 sljit_uw total_length;
14159 struct sljit_label *mainloop_label = NULL;
14160 struct sljit_label *continue_match_label;
14161 struct sljit_label *empty_match_found_label = NULL;
14162 struct sljit_label *empty_match_backtrack_label = NULL;
14163 struct sljit_label *reset_match_label;
14164 struct sljit_label *quit_label;
14165 struct sljit_jump *jump;
14166 struct sljit_jump *minlength_check_failed = NULL;
14167 struct sljit_jump *empty_match = NULL;
14168 struct sljit_jump *end_anchor_failed = NULL;
14169 jump_list *reqcu_not_found = NULL;
14170 
14171 SLJIT_ASSERT(tables);
14172 
14173 #if HAS_VIRTUAL_REGISTERS == 1
14174 SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) < 0 && sljit_get_register_index(SLJIT_GP_REGISTER, ARGUMENTS) < 0 && sljit_get_register_index(SLJIT_GP_REGISTER, RETURN_ADDR) < 0);
14175 #elif HAS_VIRTUAL_REGISTERS == 0
14176 SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) >= 0 && sljit_get_register_index(SLJIT_GP_REGISTER, ARGUMENTS) >= 0 && sljit_get_register_index(SLJIT_GP_REGISTER, RETURN_ADDR) >= 0);
14177 #else
14178 #error "Invalid value for HAS_VIRTUAL_REGISTERS"
14179 #endif
14180 
14181 memset(&rootbacktrack, 0, sizeof(backtrack_common));
14182 memset(common, 0, sizeof(compiler_common));
14183 common->re = re;
14184 common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
14185 rootbacktrack.cc = common->name_table + re->name_count * re->name_entry_size;
14186 
14187 #ifdef SUPPORT_UNICODE
14188 common->invalid_utf = (mode & PCRE2_JIT_INVALID_UTF) != 0;
14189 #endif /* SUPPORT_UNICODE */
14190 mode &= ~PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS;
14191 
14192 common->start = rootbacktrack.cc;
14193 common->read_only_data_head = NULL;
14194 common->fcc = tables + fcc_offset;
14195 common->lcc = (sljit_sw)(tables + lcc_offset);
14196 common->mode = mode;
14197 common->might_be_empty = (re->minlength == 0) || (re->flags & PCRE2_MATCH_EMPTY);
14198 common->allow_empty_partial = (re->max_lookbehind > 0) || (re->flags & PCRE2_MATCH_EMPTY);
14199 common->nltype = NLTYPE_FIXED;
14200 switch(re->newline_convention)
14201   {
14202   case PCRE2_NEWLINE_CR: common->newline = CHAR_CR; break;
14203   case PCRE2_NEWLINE_LF: common->newline = CHAR_NL; break;
14204   case PCRE2_NEWLINE_CRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
14205   case PCRE2_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
14206   case PCRE2_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
14207   case PCRE2_NEWLINE_NUL: common->newline = CHAR_NUL; break;
14208   default: return PCRE2_ERROR_INTERNAL;
14209   }
14210 common->nlmax = READ_CHAR_MAX;
14211 common->nlmin = 0;
14212 if (re->bsr_convention == PCRE2_BSR_UNICODE)
14213   common->bsr_nltype = NLTYPE_ANY;
14214 else if (re->bsr_convention == PCRE2_BSR_ANYCRLF)
14215   common->bsr_nltype = NLTYPE_ANYCRLF;
14216 else
14217   {
14218 #ifdef BSR_ANYCRLF
14219   common->bsr_nltype = NLTYPE_ANYCRLF;
14220 #else
14221   common->bsr_nltype = NLTYPE_ANY;
14222 #endif
14223   }
14224 common->bsr_nlmax = READ_CHAR_MAX;
14225 common->bsr_nlmin = 0;
14226 common->endonly = (re->overall_options & PCRE2_DOLLAR_ENDONLY) != 0;
14227 common->ctypes = (sljit_sw)(tables + ctypes_offset);
14228 common->name_count = re->name_count;
14229 common->name_entry_size = re->name_entry_size;
14230 common->unset_backref = (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) != 0;
14231 common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0;
14232 #ifdef SUPPORT_UNICODE
14233 /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
14234 common->utf = (re->overall_options & PCRE2_UTF) != 0;
14235 common->ucp = (re->overall_options & PCRE2_UCP) != 0;
14236 if (common->utf)
14237   {
14238   if (common->nltype == NLTYPE_ANY)
14239     common->nlmax = 0x2029;
14240   else if (common->nltype == NLTYPE_ANYCRLF)
14241     common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
14242   else
14243     {
14244     /* We only care about the first newline character. */
14245     common->nlmax = common->newline & 0xff;
14246     }
14247 
14248   if (common->nltype == NLTYPE_FIXED)
14249     common->nlmin = common->newline & 0xff;
14250   else
14251     common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
14252 
14253   if (common->bsr_nltype == NLTYPE_ANY)
14254     common->bsr_nlmax = 0x2029;
14255   else
14256     common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
14257   common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
14258   }
14259 else
14260   common->invalid_utf = FALSE;
14261 #endif /* SUPPORT_UNICODE */
14262 ccend = bracketend(common->start);
14263 
14264 /* Calculate the local space size on the stack. */
14265 common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
14266 common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, allocator_data);
14267 if (!common->optimized_cbracket)
14268   return PCRE2_ERROR_NOMEMORY;
14269 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
14270 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
14271 #else
14272 memset(common->optimized_cbracket, 1, re->top_bracket + 1);
14273 #endif
14274 
14275 SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
14276 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
14277 common->capture_last_ptr = common->ovector_start;
14278 common->ovector_start += sizeof(sljit_sw);
14279 #endif
14280 if (!check_opcode_types(common, common->start, ccend))
14281   {
14282   SLJIT_FREE(common->optimized_cbracket, allocator_data);
14283   return PCRE2_ERROR_NOMEMORY;
14284   }
14285 
14286 /* Checking flags and updating ovector_start. */
14287 if (mode == PCRE2_JIT_COMPLETE && (re->flags & PCRE2_LASTSET) != 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
14288   {
14289   common->req_char_ptr = common->ovector_start;
14290   common->ovector_start += sizeof(sljit_sw);
14291   }
14292 if (mode != PCRE2_JIT_COMPLETE)
14293   {
14294   common->start_used_ptr = common->ovector_start;
14295   common->ovector_start += sizeof(sljit_sw);
14296   if (mode == PCRE2_JIT_PARTIAL_SOFT)
14297     {
14298     common->hit_start = common->ovector_start;
14299     common->ovector_start += sizeof(sljit_sw);
14300     }
14301   }
14302 if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0)
14303   {
14304   common->match_end_ptr = common->ovector_start;
14305   common->ovector_start += sizeof(sljit_sw);
14306   }
14307 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
14308 common->control_head_ptr = 1;
14309 #endif
14310 if (common->control_head_ptr != 0)
14311   {
14312   common->control_head_ptr = common->ovector_start;
14313   common->ovector_start += sizeof(sljit_sw);
14314   }
14315 if (common->has_set_som)
14316   {
14317   /* Saving the real start pointer is necessary. */
14318   common->start_ptr = common->ovector_start;
14319   common->ovector_start += sizeof(sljit_sw);
14320   }
14321 
14322 /* Aligning ovector to even number of sljit words. */
14323 if ((common->ovector_start & sizeof(sljit_sw)) != 0)
14324   common->ovector_start += sizeof(sljit_sw);
14325 
14326 if (common->start_ptr == 0)
14327   common->start_ptr = OVECTOR(0);
14328 
14329 /* Capturing brackets cannot be optimized if callouts are allowed. */
14330 if (common->capture_last_ptr != 0)
14331   memset(common->optimized_cbracket, 0, re->top_bracket + 1);
14332 
14333 SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
14334 common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
14335 
14336 total_length = ccend - common->start;
14337 common->private_data_ptrs = (sljit_s32*)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data);
14338 if (!common->private_data_ptrs)
14339   {
14340   SLJIT_FREE(common->optimized_cbracket, allocator_data);
14341   return PCRE2_ERROR_NOMEMORY;
14342   }
14343 memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
14344 
14345 private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
14346 
14347 if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && !common->has_skip_in_assert_back)
14348   detect_early_fail(common, common->start, &private_data_size, 0, 0);
14349 
14350 set_private_data_ptrs(common, &private_data_size, ccend);
14351 
14352 SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr);
14353 
14354 if (private_data_size > 65536)
14355   {
14356   SLJIT_FREE(common->private_data_ptrs, allocator_data);
14357   SLJIT_FREE(common->optimized_cbracket, allocator_data);
14358   return PCRE2_ERROR_NOMEMORY;
14359   }
14360 
14361 if (common->has_then)
14362   {
14363   common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length);
14364   memset(common->then_offsets, 0, total_length);
14365   set_then_offsets(common, common->start, NULL);
14366   }
14367 
14368 compiler = sljit_create_compiler(allocator_data);
14369 if (!compiler)
14370   {
14371   SLJIT_FREE(common->optimized_cbracket, allocator_data);
14372   SLJIT_FREE(common->private_data_ptrs, allocator_data);
14373   return PCRE2_ERROR_NOMEMORY;
14374   }
14375 common->compiler = compiler;
14376 
14377 /* Main pcre2_jit_exec entry. */
14378 SLJIT_ASSERT((private_data_size & (sizeof(sljit_sw) - 1)) == 0);
14379 sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, W), 5, 5, SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS, 0, private_data_size);
14380 
14381 /* Register init. */
14382 reset_ovector(common, (re->top_bracket + 1) * 2);
14383 if (common->req_char_ptr != 0)
14384   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
14385 
14386 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
14387 OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
14388 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
14389 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
14390 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
14391 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
14392 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, end));
14393 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, start));
14394 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
14395 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
14396 
14397 if (common->early_fail_start_ptr < common->early_fail_end_ptr)
14398   reset_early_fail(common);
14399 
14400 if (mode == PCRE2_JIT_PARTIAL_SOFT)
14401   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
14402 if (common->mark_ptr != 0)
14403   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
14404 if (common->control_head_ptr != 0)
14405   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
14406 
14407 /* Main part of the matching */
14408 if ((re->overall_options & PCRE2_ANCHORED) == 0)
14409   {
14410   mainloop_label = mainloop_entry(common);
14411   continue_match_label = LABEL();
14412   /* Forward search if possible. */
14413   if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
14414     {
14415     if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common))
14416       ;
14417     else if ((re->flags & PCRE2_FIRSTSET) != 0)
14418       fast_forward_first_char(common);
14419     else if ((re->flags & PCRE2_STARTLINE) != 0)
14420       fast_forward_newline(common);
14421     else if ((re->flags & PCRE2_FIRSTMAPSET) != 0)
14422       fast_forward_start_bits(common);
14423     }
14424   }
14425 else
14426   continue_match_label = LABEL();
14427 
14428 if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
14429   {
14430   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
14431   OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(re->minlength));
14432   minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
14433   }
14434 if (common->req_char_ptr != 0)
14435   reqcu_not_found = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0);
14436 
14437 /* Store the current STR_PTR in OVECTOR(0). */
14438 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
14439 /* Copy the limit of allowed recursions. */
14440 OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
14441 if (common->capture_last_ptr != 0)
14442   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, 0);
14443 if (common->fast_forward_bc_ptr != NULL)
14444   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3, STR_PTR, 0);
14445 
14446 if (common->start_ptr != OVECTOR(0))
14447   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
14448 
14449 /* Copy the beginning of the string. */
14450 if (mode == PCRE2_JIT_PARTIAL_SOFT)
14451   {
14452   jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
14453   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
14454   JUMPHERE(jump);
14455   }
14456 else if (mode == PCRE2_JIT_PARTIAL_HARD)
14457   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
14458 
14459 compile_matchingpath(common, common->start, ccend, &rootbacktrack);
14460 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14461   {
14462   sljit_free_compiler(compiler);
14463   SLJIT_FREE(common->optimized_cbracket, allocator_data);
14464   SLJIT_FREE(common->private_data_ptrs, allocator_data);
14465   PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14466   return PCRE2_ERROR_NOMEMORY;
14467   }
14468 
14469 if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
14470   end_anchor_failed = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0);
14471 
14472 if (common->might_be_empty)
14473   {
14474   empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
14475   empty_match_found_label = LABEL();
14476   }
14477 
14478 common->accept_label = LABEL();
14479 if (common->accept != NULL)
14480   set_jumps(common->accept, common->accept_label);
14481 
14482 /* This means we have a match. Update the ovector. */
14483 copy_ovector(common, re->top_bracket + 1);
14484 common->quit_label = common->abort_label = LABEL();
14485 if (common->quit != NULL)
14486   set_jumps(common->quit, common->quit_label);
14487 if (common->abort != NULL)
14488   set_jumps(common->abort, common->abort_label);
14489 if (minlength_check_failed != NULL)
14490   SET_LABEL(minlength_check_failed, common->abort_label);
14491 
14492 sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN);
14493 sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
14494 
14495 if (common->failed_match != NULL)
14496   {
14497   SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
14498   set_jumps(common->failed_match, LABEL());
14499   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
14500   JUMPTO(SLJIT_JUMP, common->abort_label);
14501   }
14502 
14503 if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
14504   JUMPHERE(end_anchor_failed);
14505 
14506 if (mode != PCRE2_JIT_COMPLETE)
14507   {
14508   common->partialmatchlabel = LABEL();
14509   set_jumps(common->partialmatch, common->partialmatchlabel);
14510   return_with_partial_match(common, common->quit_label);
14511   }
14512 
14513 if (common->might_be_empty)
14514   empty_match_backtrack_label = LABEL();
14515 compile_backtrackingpath(common, rootbacktrack.top);
14516 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14517   {
14518   sljit_free_compiler(compiler);
14519   SLJIT_FREE(common->optimized_cbracket, allocator_data);
14520   SLJIT_FREE(common->private_data_ptrs, allocator_data);
14521   PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14522   return PCRE2_ERROR_NOMEMORY;
14523   }
14524 
14525 SLJIT_ASSERT(rootbacktrack.prev == NULL);
14526 reset_match_label = LABEL();
14527 
14528 if (mode == PCRE2_JIT_PARTIAL_SOFT)
14529   {
14530   /* Update hit_start only in the first time. */
14531   jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
14532   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
14533   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
14534   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
14535   JUMPHERE(jump);
14536   }
14537 
14538 /* Check we have remaining characters. */
14539 if ((re->overall_options & PCRE2_ANCHORED) == 0 && common->match_end_ptr != 0)
14540   {
14541   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
14542   }
14543 
14544 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
14545     (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3) : common->start_ptr);
14546 
14547 if ((re->overall_options & PCRE2_ANCHORED) == 0)
14548   {
14549   if (common->ff_newline_shortcut != NULL)
14550     {
14551     /* There cannot be more newlines if PCRE2_FIRSTLINE is set. */
14552     if ((re->overall_options & PCRE2_FIRSTLINE) == 0)
14553       {
14554       if (common->match_end_ptr != 0)
14555         {
14556         OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
14557         OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
14558         CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, common->ff_newline_shortcut);
14559         OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
14560         }
14561       else
14562         CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
14563       }
14564     }
14565   else
14566     CMPTO(SLJIT_LESS, STR_PTR, 0, (common->match_end_ptr == 0) ? STR_END : TMP1, 0, mainloop_label);
14567   }
14568 
14569 /* No more remaining characters. */
14570 if (reqcu_not_found != NULL)
14571   set_jumps(reqcu_not_found, LABEL());
14572 
14573 if (mode == PCRE2_JIT_PARTIAL_SOFT)
14574   CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
14575 
14576 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
14577 JUMPTO(SLJIT_JUMP, common->quit_label);
14578 
14579 flush_stubs(common);
14580 
14581 if (common->might_be_empty)
14582   {
14583   JUMPHERE(empty_match);
14584   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
14585   OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
14586   OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
14587   JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label);
14588   OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
14589   JUMPTO(SLJIT_ZERO, empty_match_found_label);
14590   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
14591   CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
14592   JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
14593   }
14594 
14595 common->fast_forward_bc_ptr = NULL;
14596 common->early_fail_start_ptr = 0;
14597 common->early_fail_end_ptr = 0;
14598 common->currententry = common->entries;
14599 common->local_quit_available = TRUE;
14600 quit_label = common->quit_label;
14601 if (common->currententry != NULL)
14602   {
14603   /* A free bit for each private data. */
14604   common->recurse_bitset_size = ((private_data_size / SSIZE_OF(sw)) + 7) >> 3;
14605   SLJIT_ASSERT(common->recurse_bitset_size > 0);
14606   common->recurse_bitset = (sljit_u8*)SLJIT_MALLOC(common->recurse_bitset_size, allocator_data);;
14607 
14608   if (common->recurse_bitset != NULL)
14609     {
14610     do
14611       {
14612       /* Might add new entries. */
14613       compile_recurse(common);
14614       if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14615         break;
14616       flush_stubs(common);
14617       common->currententry = common->currententry->next;
14618       }
14619     while (common->currententry != NULL);
14620 
14621     SLJIT_FREE(common->recurse_bitset, allocator_data);
14622     }
14623 
14624   if (common->currententry != NULL)
14625     {
14626     /* The common->recurse_bitset has been freed. */
14627     SLJIT_ASSERT(sljit_get_compiler_error(compiler) || common->recurse_bitset == NULL);
14628 
14629     sljit_free_compiler(compiler);
14630     SLJIT_FREE(common->optimized_cbracket, allocator_data);
14631     SLJIT_FREE(common->private_data_ptrs, allocator_data);
14632     PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14633     return PCRE2_ERROR_NOMEMORY;
14634     }
14635   }
14636 common->local_quit_available = FALSE;
14637 common->quit_label = quit_label;
14638 
14639 /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
14640 /* This is a (really) rare case. */
14641 set_jumps(common->stackalloc, LABEL());
14642 /* RETURN_ADDR is not a saved register. */
14643 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0);
14644 
14645 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
14646 
14647 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
14648 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
14649 OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE);
14650 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack));
14651 OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0);
14652 
14653 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(sljit_stack_resize));
14654 
14655 jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
14656 OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0);
14657 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0);
14658 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
14659 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
14660 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
14661 
14662 /* Allocation failed. */
14663 JUMPHERE(jump);
14664 /* We break the return address cache here, but this is a really rare case. */
14665 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_JIT_STACKLIMIT);
14666 JUMPTO(SLJIT_JUMP, common->quit_label);
14667 
14668 /* Call limit reached. */
14669 set_jumps(common->calllimit, LABEL());
14670 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_MATCHLIMIT);
14671 JUMPTO(SLJIT_JUMP, common->quit_label);
14672 
14673 if (common->revertframes != NULL)
14674   {
14675   set_jumps(common->revertframes, LABEL());
14676   do_revertframes(common);
14677   }
14678 if (common->wordboundary != NULL)
14679   {
14680   set_jumps(common->wordboundary, LABEL());
14681   check_wordboundary(common, FALSE);
14682   }
14683 if (common->ucp_wordboundary != NULL)
14684   {
14685   set_jumps(common->ucp_wordboundary, LABEL());
14686   check_wordboundary(common, TRUE);
14687   }
14688 if (common->anynewline != NULL)
14689   {
14690   set_jumps(common->anynewline, LABEL());
14691   check_anynewline(common);
14692   }
14693 if (common->hspace != NULL)
14694   {
14695   set_jumps(common->hspace, LABEL());
14696   check_hspace(common);
14697   }
14698 if (common->vspace != NULL)
14699   {
14700   set_jumps(common->vspace, LABEL());
14701   check_vspace(common);
14702   }
14703 if (common->casefulcmp != NULL)
14704   {
14705   set_jumps(common->casefulcmp, LABEL());
14706   do_casefulcmp(common);
14707   }
14708 if (common->caselesscmp != NULL)
14709   {
14710   set_jumps(common->caselesscmp, LABEL());
14711   do_caselesscmp(common);
14712   }
14713 if (common->reset_match != NULL || common->restart_match != NULL)
14714   {
14715   if (common->restart_match != NULL)
14716     {
14717     set_jumps(common->restart_match, LABEL());
14718     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
14719     }
14720 
14721   set_jumps(common->reset_match, LABEL());
14722   do_reset_match(common, (re->top_bracket + 1) * 2);
14723   /* The value of restart_match is in TMP1. */
14724   CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
14725   OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
14726   JUMPTO(SLJIT_JUMP, reset_match_label);
14727   }
14728 #ifdef SUPPORT_UNICODE
14729 #if PCRE2_CODE_UNIT_WIDTH == 8
14730 if (common->utfreadchar != NULL)
14731   {
14732   set_jumps(common->utfreadchar, LABEL());
14733   do_utfreadchar(common);
14734   }
14735 if (common->utfreadtype8 != NULL)
14736   {
14737   set_jumps(common->utfreadtype8, LABEL());
14738   do_utfreadtype8(common);
14739   }
14740 if (common->utfpeakcharback != NULL)
14741   {
14742   set_jumps(common->utfpeakcharback, LABEL());
14743   do_utfpeakcharback(common);
14744   }
14745 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
14746 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
14747 if (common->utfreadchar_invalid != NULL)
14748   {
14749   set_jumps(common->utfreadchar_invalid, LABEL());
14750   do_utfreadchar_invalid(common);
14751   }
14752 if (common->utfreadnewline_invalid != NULL)
14753   {
14754   set_jumps(common->utfreadnewline_invalid, LABEL());
14755   do_utfreadnewline_invalid(common);
14756   }
14757 if (common->utfmoveback_invalid)
14758   {
14759   set_jumps(common->utfmoveback_invalid, LABEL());
14760   do_utfmoveback_invalid(common);
14761   }
14762 if (common->utfpeakcharback_invalid)
14763   {
14764   set_jumps(common->utfpeakcharback_invalid, LABEL());
14765   do_utfpeakcharback_invalid(common);
14766   }
14767 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 */
14768 if (common->getucd != NULL)
14769   {
14770   set_jumps(common->getucd, LABEL());
14771   do_getucd(common);
14772   }
14773 if (common->getucdtype != NULL)
14774   {
14775   set_jumps(common->getucdtype, LABEL());
14776   do_getucdtype(common);
14777   }
14778 #endif /* SUPPORT_UNICODE */
14779 
14780 SLJIT_FREE(common->optimized_cbracket, allocator_data);
14781 SLJIT_FREE(common->private_data_ptrs, allocator_data);
14782 
14783 executable_func = sljit_generate_code(compiler, 0, NULL);
14784 executable_size = sljit_get_generated_code_size(compiler);
14785 sljit_free_compiler(compiler);
14786 
14787 if (executable_func == NULL)
14788   {
14789   PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14790   return PCRE2_ERROR_NOMEMORY;
14791   }
14792 
14793 /* Reuse the function descriptor if possible. */
14794 if (re->executable_jit != NULL)
14795   functions = (executable_functions *)re->executable_jit;
14796 else
14797   {
14798   functions = SLJIT_MALLOC(sizeof(executable_functions), allocator_data);
14799   if (functions == NULL)
14800     {
14801     /* This case is highly unlikely since we just recently
14802     freed a lot of memory. Not impossible though. */
14803     sljit_free_code(executable_func, NULL);
14804     PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14805     return PCRE2_ERROR_NOMEMORY;
14806     }
14807   memset(functions, 0, sizeof(executable_functions));
14808   functions->top_bracket = re->top_bracket + 1;
14809   functions->limit_match = re->limit_match;
14810   re->executable_jit = functions;
14811   }
14812 
14813 /* Turn mode into an index. */
14814 if (mode == PCRE2_JIT_COMPLETE)
14815   mode = 0;
14816 else
14817   mode = (mode == PCRE2_JIT_PARTIAL_SOFT) ? 1 : 2;
14818 
14819 SLJIT_ASSERT(mode < JIT_NUMBER_OF_COMPILE_MODES);
14820 functions->executable_funcs[mode] = executable_func;
14821 functions->read_only_data_heads[mode] = common->read_only_data_head;
14822 functions->executable_sizes[mode] = executable_size;
14823 return 0;
14824 }
14825 
14826 #endif
14827 
14828 /*************************************************
14829 *        JIT compile a Regular Expression        *
14830 *************************************************/
14831 
14832 /* This function used JIT to convert a previously-compiled pattern into machine
14833 code.
14834 
14835 Arguments:
14836   code          a compiled pattern
14837   options       JIT option bits
14838 
14839 Returns:        0: success or (*NOJIT) was used
14840                <0: an error code
14841 */
14842 
14843 #define PUBLIC_JIT_COMPILE_OPTIONS \
14844   (PCRE2_JIT_COMPLETE|PCRE2_JIT_PARTIAL_SOFT|PCRE2_JIT_PARTIAL_HARD|PCRE2_JIT_INVALID_UTF)
14845 
14846 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_jit_compile(pcre2_code * code,uint32_t options)14847 pcre2_jit_compile(pcre2_code *code, uint32_t options)
14848 {
14849 pcre2_real_code *re = (pcre2_real_code *)code;
14850 #ifdef SUPPORT_JIT
14851 executable_functions *functions;
14852 static int executable_allocator_is_working = -1;
14853 #endif
14854 
14855 if (code == NULL)
14856   return PCRE2_ERROR_NULL;
14857 
14858 if ((options & ~PUBLIC_JIT_COMPILE_OPTIONS) != 0)
14859   return PCRE2_ERROR_JIT_BADOPTION;
14860 
14861 /* Support for invalid UTF was first introduced in JIT, with the option
14862 PCRE2_JIT_INVALID_UTF. Later, support was added to the interpreter, and the
14863 compile-time option PCRE2_MATCH_INVALID_UTF was created. This is now the
14864 preferred feature, with the earlier option deprecated. However, for backward
14865 compatibility, if the earlier option is set, it forces the new option so that
14866 if JIT matching falls back to the interpreter, there is still support for
14867 invalid UTF. However, if this function has already been successfully called
14868 without PCRE2_JIT_INVALID_UTF and without PCRE2_MATCH_INVALID_UTF (meaning that
14869 non-invalid-supporting JIT code was compiled), give an error.
14870 
14871 If in the future support for PCRE2_JIT_INVALID_UTF is withdrawn, the following
14872 actions are needed:
14873 
14874   1. Remove the definition from pcre2.h.in and from the list in
14875      PUBLIC_JIT_COMPILE_OPTIONS above.
14876 
14877   2. Replace PCRE2_JIT_INVALID_UTF with a local flag in this module.
14878 
14879   3. Replace PCRE2_JIT_INVALID_UTF in pcre2_jit_test.c.
14880 
14881   4. Delete the following short block of code. The setting of "re" and
14882      "functions" can be moved into the JIT-only block below, but if that is
14883      done, (void)re and (void)functions will be needed in the non-JIT case, to
14884      avoid compiler warnings.
14885 */
14886 
14887 #ifdef SUPPORT_JIT
14888 functions = (executable_functions *)re->executable_jit;
14889 #endif
14890 
14891 if ((options & PCRE2_JIT_INVALID_UTF) != 0)
14892   {
14893   if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) == 0)
14894     {
14895 #ifdef SUPPORT_JIT
14896     if (functions != NULL) return PCRE2_ERROR_JIT_BADOPTION;
14897 #endif
14898     re->overall_options |= PCRE2_MATCH_INVALID_UTF;
14899     }
14900   }
14901 
14902 /* The above tests are run with and without JIT support. This means that
14903 PCRE2_JIT_INVALID_UTF propagates back into the regex options (ensuring
14904 interpreter support) even in the absence of JIT. But now, if there is no JIT
14905 support, give an error return. */
14906 
14907 #ifndef SUPPORT_JIT
14908 return PCRE2_ERROR_JIT_BADOPTION;
14909 #else  /* SUPPORT_JIT */
14910 
14911 /* There is JIT support. Do the necessary. */
14912 
14913 if ((re->flags & PCRE2_NOJIT) != 0) return 0;
14914 
14915 if (executable_allocator_is_working == -1)
14916   {
14917   /* Checks whether the executable allocator is working. This check
14918      might run multiple times in multi-threaded environments, but the
14919      result should not be affected by it. */
14920   void *ptr = SLJIT_MALLOC_EXEC(32, NULL);
14921   if (ptr != NULL)
14922     {
14923     SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr), NULL);
14924     executable_allocator_is_working = 1;
14925     }
14926   else executable_allocator_is_working = 0;
14927   }
14928 
14929 if (!executable_allocator_is_working)
14930   return PCRE2_ERROR_NOMEMORY;
14931 
14932 if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0)
14933   options |= PCRE2_JIT_INVALID_UTF;
14934 
14935 if ((options & PCRE2_JIT_COMPLETE) != 0 && (functions == NULL
14936     || functions->executable_funcs[0] == NULL)) {
14937   uint32_t excluded_options = (PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD);
14938   int result = jit_compile(code, options & ~excluded_options);
14939   if (result != 0)
14940     return result;
14941   }
14942 
14943 if ((options & PCRE2_JIT_PARTIAL_SOFT) != 0 && (functions == NULL
14944     || functions->executable_funcs[1] == NULL)) {
14945   uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_HARD);
14946   int result = jit_compile(code, options & ~excluded_options);
14947   if (result != 0)
14948     return result;
14949   }
14950 
14951 if ((options & PCRE2_JIT_PARTIAL_HARD) != 0 && (functions == NULL
14952     || functions->executable_funcs[2] == NULL)) {
14953   uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT);
14954   int result = jit_compile(code, options & ~excluded_options);
14955   if (result != 0)
14956     return result;
14957   }
14958 
14959 return 0;
14960 
14961 #endif  /* SUPPORT_JIT */
14962 }
14963 
14964 /* JIT compiler uses an all-in-one approach. This improves security,
14965    since the code generator functions are not exported. */
14966 
14967 #define INCLUDED_FROM_PCRE2_JIT_COMPILE
14968 
14969 #include "pcre2_jit_match.c"
14970 #include "pcre2_jit_misc.c"
14971 
14972 /* End of pcre2_jit_compile.c */
14973