1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9                     This module by Zoltan Herczeg
10      Original API code Copyright (c) 1997-2012 University of Cambridge
11           New API code Copyright (c) 2016-2021 University of Cambridge
12 
13 -----------------------------------------------------------------------------
14 Redistribution and use in source and binary forms, with or without
15 modification, are permitted provided that the following conditions are met:
16 
17     * Redistributions of source code must retain the above copyright notice,
18       this list of conditions and the following disclaimer.
19 
20     * Redistributions in binary form must reproduce the above copyright
21       notice, this list of conditions and the following disclaimer in the
22       documentation and/or other materials provided with the distribution.
23 
24     * Neither the name of the University of Cambridge nor the names of its
25       contributors may be used to endorse or promote products derived from
26       this software without specific prior written permission.
27 
28 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 POSSIBILITY OF SUCH DAMAGE.
39 -----------------------------------------------------------------------------
40 */
41 
42 #ifdef HAVE_CONFIG_H
43 #include "config.h"
44 #endif
45 
46 #include "pcre2_internal.h"
47 
48 #ifdef SUPPORT_JIT
49 
50 /* All-in-one: Since we use the JIT compiler only from here,
51 we just include it. This way we don't need to touch the build
52 system files. */
53 
54 #define SLJIT_CONFIG_AUTO 1
55 #define SLJIT_CONFIG_STATIC 1
56 #define SLJIT_VERBOSE 0
57 
58 #ifdef PCRE2_DEBUG
59 #define SLJIT_DEBUG 1
60 #else
61 #define SLJIT_DEBUG 0
62 #endif
63 
64 #define SLJIT_MALLOC(size, allocator_data) pcre2_jit_malloc(size, allocator_data)
65 #define SLJIT_FREE(ptr, allocator_data) pcre2_jit_free(ptr, allocator_data)
66 
pcre2_jit_malloc(size_t size,void * allocator_data)67 static void * pcre2_jit_malloc(size_t size, void *allocator_data)
68 {
69 pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
70 return allocator->malloc(size, allocator->memory_data);
71 }
72 
pcre2_jit_free(void * ptr,void * allocator_data)73 static void pcre2_jit_free(void *ptr, void *allocator_data)
74 {
75 pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
76 allocator->free(ptr, allocator->memory_data);
77 }
78 
79 #include "sljit/sljitLir.c"
80 
81 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
82 #error Unsupported architecture
83 #endif
84 
85 /* Defines for debugging purposes. */
86 
87 /* 1 - Use unoptimized capturing brackets.
88    2 - Enable capture_last_ptr (includes option 1). */
89 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
90 
91 /* 1 - Always have a control head. */
92 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
93 
94 /* Allocate memory for the regex stack on the real machine stack.
95 Fast, but limited size. */
96 #define MACHINE_STACK_SIZE 32768
97 
98 /* Growth rate for stack allocated by the OS. Should be the multiply
99 of page size. */
100 #define STACK_GROWTH_RATE 8192
101 
102 /* Enable to check that the allocation could destroy temporaries. */
103 #if defined SLJIT_DEBUG && SLJIT_DEBUG
104 #define DESTROY_REGISTERS 1
105 #endif
106 
107 /*
108 Short summary about the backtracking mechanism empolyed by the jit code generator:
109 
110 The code generator follows the recursive nature of the PERL compatible regular
111 expressions. The basic blocks of regular expressions are condition checkers
112 whose execute different commands depending on the result of the condition check.
113 The relationship between the operators can be horizontal (concatenation) and
114 vertical (sub-expression) (See struct backtrack_common for more details).
115 
116   'ab' - 'a' and 'b' regexps are concatenated
117   'a+' - 'a' is the sub-expression of the '+' operator
118 
119 The condition checkers are boolean (true/false) checkers. Machine code is generated
120 for the checker itself and for the actions depending on the result of the checker.
121 The 'true' case is called as the matching path (expected path), and the other is called as
122 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
123 branches on the matching path.
124 
125  Greedy star operator (*) :
126    Matching path: match happens.
127    Backtrack path: match failed.
128  Non-greedy star operator (*?) :
129    Matching path: no need to perform a match.
130    Backtrack path: match is required.
131 
132 The following example shows how the code generated for a capturing bracket
133 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
134 we have the following regular expression:
135 
136    A(B|C)D
137 
138 The generated code will be the following:
139 
140  A matching path
141  '(' matching path (pushing arguments to the stack)
142  B matching path
143  ')' matching path (pushing arguments to the stack)
144  D matching path
145  return with successful match
146 
147  D backtrack path
148  ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
149  B backtrack path
150  C expected path
151  jump to D matching path
152  C backtrack path
153  A backtrack path
154 
155  Notice, that the order of backtrack code paths are the opposite of the fast
156  code paths. In this way the topmost value on the stack is always belong
157  to the current backtrack code path. The backtrack path must check
158  whether there is a next alternative. If so, it needs to jump back to
159  the matching path eventually. Otherwise it needs to clear out its own stack
160  frame and continue the execution on the backtrack code paths.
161 */
162 
163 /*
164 Saved stack frames:
165 
166 Atomic blocks and asserts require reloading the values of private data
167 when the backtrack mechanism performed. Because of OP_RECURSE, the data
168 are not necessarly known in compile time, thus we need a dynamic restore
169 mechanism.
170 
171 The stack frames are stored in a chain list, and have the following format:
172 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
173 
174 Thus we can restore the private data to a particular point in the stack.
175 */
176 
177 typedef struct jit_arguments {
178   /* Pointers first. */
179   struct sljit_stack *stack;
180   PCRE2_SPTR str;
181   PCRE2_SPTR begin;
182   PCRE2_SPTR end;
183   pcre2_match_data *match_data;
184   PCRE2_SPTR startchar_ptr;
185   PCRE2_UCHAR *mark_ptr;
186   int (*callout)(pcre2_callout_block *, void *);
187   void *callout_data;
188   /* Everything else after. */
189   sljit_uw offset_limit;
190   sljit_u32 limit_match;
191   sljit_u32 oveccount;
192   sljit_u32 options;
193 } jit_arguments;
194 
195 #define JIT_NUMBER_OF_COMPILE_MODES 3
196 
197 typedef struct executable_functions {
198   void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
199   void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
200   sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
201   sljit_u32 top_bracket;
202   sljit_u32 limit_match;
203 } executable_functions;
204 
205 typedef struct jump_list {
206   struct sljit_jump *jump;
207   struct jump_list *next;
208 } jump_list;
209 
210 typedef struct stub_list {
211   struct sljit_jump *start;
212   struct sljit_label *quit;
213   struct stub_list *next;
214 } stub_list;
215 
216 enum frame_types {
217   no_frame = -1,
218   no_stack = -2
219 };
220 
221 enum control_types {
222   type_mark = 0,
223   type_then_trap = 1
224 };
225 
226 enum  early_fail_types {
227   type_skip = 0,
228   type_fail = 1,
229   type_fail_range = 2
230 };
231 
232 typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
233 
234 /* The following structure is the key data type for the recursive
235 code generator. It is allocated by compile_matchingpath, and contains
236 the arguments for compile_backtrackingpath. Must be the first member
237 of its descendants. */
238 typedef struct backtrack_common {
239   /* Concatenation stack. */
240   struct backtrack_common *prev;
241   jump_list *nextbacktracks;
242   /* Internal stack (for component operators). */
243   struct backtrack_common *top;
244   jump_list *topbacktracks;
245   /* Opcode pointer. */
246   PCRE2_SPTR cc;
247 } backtrack_common;
248 
249 typedef struct assert_backtrack {
250   backtrack_common common;
251   jump_list *condfailed;
252   /* Less than 0 if a frame is not needed. */
253   int framesize;
254   /* Points to our private memory word on the stack. */
255   int private_data_ptr;
256   /* For iterators. */
257   struct sljit_label *matchingpath;
258 } assert_backtrack;
259 
260 typedef struct bracket_backtrack {
261   backtrack_common common;
262   /* Where to coninue if an alternative is successfully matched. */
263   struct sljit_label *alternative_matchingpath;
264   /* For rmin and rmax iterators. */
265   struct sljit_label *recursive_matchingpath;
266   /* For greedy ? operator. */
267   struct sljit_label *zero_matchingpath;
268   /* Contains the branches of a failed condition. */
269   union {
270     /* Both for OP_COND, OP_SCOND. */
271     jump_list *condfailed;
272     assert_backtrack *assert;
273     /* For OP_ONCE. Less than 0 if not needed. */
274     int framesize;
275     /* For brackets with >3 alternatives. */
276     struct sljit_put_label *matching_put_label;
277   } u;
278   /* Points to our private memory word on the stack. */
279   int private_data_ptr;
280 } bracket_backtrack;
281 
282 typedef struct bracketpos_backtrack {
283   backtrack_common common;
284   /* Points to our private memory word on the stack. */
285   int private_data_ptr;
286   /* Reverting stack is needed. */
287   int framesize;
288   /* Allocated stack size. */
289   int stacksize;
290 } bracketpos_backtrack;
291 
292 typedef struct braminzero_backtrack {
293   backtrack_common common;
294   struct sljit_label *matchingpath;
295 } braminzero_backtrack;
296 
297 typedef struct char_iterator_backtrack {
298   backtrack_common common;
299   /* Next iteration. */
300   struct sljit_label *matchingpath;
301   union {
302     jump_list *backtracks;
303     struct {
304       unsigned int othercasebit;
305       PCRE2_UCHAR chr;
306       BOOL enabled;
307     } charpos;
308   } u;
309 } char_iterator_backtrack;
310 
311 typedef struct ref_iterator_backtrack {
312   backtrack_common common;
313   /* Next iteration. */
314   struct sljit_label *matchingpath;
315 } ref_iterator_backtrack;
316 
317 typedef struct recurse_entry {
318   struct recurse_entry *next;
319   /* Contains the function entry label. */
320   struct sljit_label *entry_label;
321   /* Contains the function entry label. */
322   struct sljit_label *backtrack_label;
323   /* Collects the entry calls until the function is not created. */
324   jump_list *entry_calls;
325   /* Collects the backtrack calls until the function is not created. */
326   jump_list *backtrack_calls;
327   /* Points to the starting opcode. */
328   sljit_sw start;
329 } recurse_entry;
330 
331 typedef struct recurse_backtrack {
332   backtrack_common common;
333   /* Return to the matching path. */
334   struct sljit_label *matchingpath;
335   /* Recursive pattern. */
336   recurse_entry *entry;
337   /* Pattern is inlined. */
338   BOOL inlined_pattern;
339 } recurse_backtrack;
340 
341 #define OP_THEN_TRAP OP_TABLE_LENGTH
342 
343 typedef struct then_trap_backtrack {
344   backtrack_common common;
345   /* If then_trap is not NULL, this structure contains the real
346   then_trap for the backtracking path. */
347   struct then_trap_backtrack *then_trap;
348   /* Points to the starting opcode. */
349   sljit_sw start;
350   /* Exit point for the then opcodes of this alternative. */
351   jump_list *quit;
352   /* Frame size of the current alternative. */
353   int framesize;
354 } then_trap_backtrack;
355 
356 #define MAX_N_CHARS 12
357 #define MAX_DIFF_CHARS 5
358 
359 typedef struct fast_forward_char_data {
360   /* Number of characters in the chars array, 255 for any character. */
361   sljit_u8 count;
362   /* Number of last UTF-8 characters in the chars array. */
363   sljit_u8 last_count;
364   /* Available characters in the current position. */
365   PCRE2_UCHAR chars[MAX_DIFF_CHARS];
366 } fast_forward_char_data;
367 
368 #define MAX_CLASS_RANGE_SIZE 4
369 #define MAX_CLASS_CHARS_SIZE 3
370 
371 typedef struct compiler_common {
372   /* The sljit ceneric compiler. */
373   struct sljit_compiler *compiler;
374   /* Compiled regular expression. */
375   pcre2_real_code *re;
376   /* First byte code. */
377   PCRE2_SPTR start;
378   /* Maps private data offset to each opcode. */
379   sljit_s32 *private_data_ptrs;
380   /* Chain list of read-only data ptrs. */
381   void *read_only_data_head;
382   /* Tells whether the capturing bracket is optimized. */
383   sljit_u8 *optimized_cbracket;
384   /* Tells whether the starting offset is a target of then. */
385   sljit_u8 *then_offsets;
386   /* Current position where a THEN must jump. */
387   then_trap_backtrack *then_trap;
388   /* Starting offset of private data for capturing brackets. */
389   sljit_s32 cbra_ptr;
390   /* Output vector starting point. Must be divisible by 2. */
391   sljit_s32 ovector_start;
392   /* Points to the starting character of the current match. */
393   sljit_s32 start_ptr;
394   /* Last known position of the requested byte. */
395   sljit_s32 req_char_ptr;
396   /* Head of the last recursion. */
397   sljit_s32 recursive_head_ptr;
398   /* First inspected character for partial matching.
399      (Needed for avoiding zero length partial matches.) */
400   sljit_s32 start_used_ptr;
401   /* Starting pointer for partial soft matches. */
402   sljit_s32 hit_start;
403   /* Pointer of the match end position. */
404   sljit_s32 match_end_ptr;
405   /* Points to the marked string. */
406   sljit_s32 mark_ptr;
407   /* Recursive control verb management chain. */
408   sljit_s32 control_head_ptr;
409   /* Points to the last matched capture block index. */
410   sljit_s32 capture_last_ptr;
411   /* Fast forward skipping byte code pointer. */
412   PCRE2_SPTR fast_forward_bc_ptr;
413   /* Locals used by fast fail optimization. */
414   sljit_s32 early_fail_start_ptr;
415   sljit_s32 early_fail_end_ptr;
416   /* Variables used by recursive call generator. */
417   sljit_s32 recurse_bitset_size;
418   uint8_t *recurse_bitset;
419 
420   /* Flipped and lower case tables. */
421   const sljit_u8 *fcc;
422   sljit_sw lcc;
423   /* Mode can be PCRE2_JIT_COMPLETE and others. */
424   int mode;
425   /* TRUE, when empty match is accepted for partial matching. */
426   BOOL allow_empty_partial;
427   /* TRUE, when minlength is greater than 0. */
428   BOOL might_be_empty;
429   /* \K is found in the pattern. */
430   BOOL has_set_som;
431   /* (*SKIP:arg) is found in the pattern. */
432   BOOL has_skip_arg;
433   /* (*THEN) is found in the pattern. */
434   BOOL has_then;
435   /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
436   BOOL has_skip_in_assert_back;
437   /* Quit is redirected by recurse, negative assertion, or positive assertion in conditional block. */
438   BOOL local_quit_available;
439   /* Currently in a positive assertion. */
440   BOOL in_positive_assertion;
441   /* Newline control. */
442   int nltype;
443   sljit_u32 nlmax;
444   sljit_u32 nlmin;
445   int newline;
446   int bsr_nltype;
447   sljit_u32 bsr_nlmax;
448   sljit_u32 bsr_nlmin;
449   /* Dollar endonly. */
450   int endonly;
451   /* Tables. */
452   sljit_sw ctypes;
453   /* Named capturing brackets. */
454   PCRE2_SPTR name_table;
455   sljit_sw name_count;
456   sljit_sw name_entry_size;
457 
458   /* Labels and jump lists. */
459   struct sljit_label *partialmatchlabel;
460   struct sljit_label *quit_label;
461   struct sljit_label *abort_label;
462   struct sljit_label *accept_label;
463   struct sljit_label *ff_newline_shortcut;
464   stub_list *stubs;
465   recurse_entry *entries;
466   recurse_entry *currententry;
467   jump_list *partialmatch;
468   jump_list *quit;
469   jump_list *positive_assertion_quit;
470   jump_list *abort;
471   jump_list *failed_match;
472   jump_list *accept;
473   jump_list *calllimit;
474   jump_list *stackalloc;
475   jump_list *revertframes;
476   jump_list *wordboundary;
477   jump_list *anynewline;
478   jump_list *hspace;
479   jump_list *vspace;
480   jump_list *casefulcmp;
481   jump_list *caselesscmp;
482   jump_list *reset_match;
483   BOOL unset_backref;
484   BOOL alt_circumflex;
485 #ifdef SUPPORT_UNICODE
486   BOOL utf;
487   BOOL invalid_utf;
488   BOOL ucp;
489   /* Points to saving area for iref. */
490   sljit_s32 iref_ptr;
491   jump_list *getucd;
492   jump_list *getucdtype;
493 #if PCRE2_CODE_UNIT_WIDTH == 8
494   jump_list *utfreadchar;
495   jump_list *utfreadtype8;
496   jump_list *utfpeakcharback;
497 #endif
498 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
499   jump_list *utfreadchar_invalid;
500   jump_list *utfreadnewline_invalid;
501   jump_list *utfmoveback_invalid;
502   jump_list *utfpeakcharback_invalid;
503 #endif
504 #endif /* SUPPORT_UNICODE */
505 } compiler_common;
506 
507 /* For byte_sequence_compare. */
508 
509 typedef struct compare_context {
510   int length;
511   int sourcereg;
512 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
513   int ucharptr;
514   union {
515     sljit_s32 asint;
516     sljit_u16 asushort;
517 #if PCRE2_CODE_UNIT_WIDTH == 8
518     sljit_u8 asbyte;
519     sljit_u8 asuchars[4];
520 #elif PCRE2_CODE_UNIT_WIDTH == 16
521     sljit_u16 asuchars[2];
522 #elif PCRE2_CODE_UNIT_WIDTH == 32
523     sljit_u32 asuchars[1];
524 #endif
525   } c;
526   union {
527     sljit_s32 asint;
528     sljit_u16 asushort;
529 #if PCRE2_CODE_UNIT_WIDTH == 8
530     sljit_u8 asbyte;
531     sljit_u8 asuchars[4];
532 #elif PCRE2_CODE_UNIT_WIDTH == 16
533     sljit_u16 asuchars[2];
534 #elif PCRE2_CODE_UNIT_WIDTH == 32
535     sljit_u32 asuchars[1];
536 #endif
537   } oc;
538 #endif
539 } compare_context;
540 
541 /* Undefine sljit macros. */
542 #undef CMP
543 
544 /* Used for accessing the elements of the stack. */
545 #define STACK(i)      ((i) * (int)sizeof(sljit_sw))
546 
547 #ifdef SLJIT_PREF_SHIFT_REG
548 #if SLJIT_PREF_SHIFT_REG == SLJIT_R2
549 /* Nothing. */
550 #elif SLJIT_PREF_SHIFT_REG == SLJIT_R3
551 #define SHIFT_REG_IS_R3
552 #else
553 #error "Unsupported shift register"
554 #endif
555 #endif
556 
557 #define TMP1          SLJIT_R0
558 #ifdef SHIFT_REG_IS_R3
559 #define TMP2          SLJIT_R3
560 #define TMP3          SLJIT_R2
561 #else
562 #define TMP2          SLJIT_R2
563 #define TMP3          SLJIT_R3
564 #endif
565 #define STR_PTR       SLJIT_R1
566 #define STR_END       SLJIT_S0
567 #define STACK_TOP     SLJIT_S1
568 #define STACK_LIMIT   SLJIT_S2
569 #define COUNT_MATCH   SLJIT_S3
570 #define ARGUMENTS     SLJIT_S4
571 #define RETURN_ADDR   SLJIT_R4
572 
573 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
574 #define HAS_VIRTUAL_REGISTERS 1
575 #else
576 #define HAS_VIRTUAL_REGISTERS 0
577 #endif
578 
579 /* Local space layout. */
580 /* These two locals can be used by the current opcode. */
581 #define LOCALS0          (0 * sizeof(sljit_sw))
582 #define LOCALS1          (1 * sizeof(sljit_sw))
583 /* Two local variables for possessive quantifiers (char1 cannot use them). */
584 #define POSSESSIVE0      (2 * sizeof(sljit_sw))
585 #define POSSESSIVE1      (3 * sizeof(sljit_sw))
586 /* Max limit of recursions. */
587 #define LIMIT_MATCH      (4 * sizeof(sljit_sw))
588 /* The output vector is stored on the stack, and contains pointers
589 to characters. The vector data is divided into two groups: the first
590 group contains the start / end character pointers, and the second is
591 the start pointers when the end of the capturing group has not yet reached. */
592 #define OVECTOR_START    (common->ovector_start)
593 #define OVECTOR(i)       (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
594 #define OVECTOR_PRIV(i)  (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
595 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
596 
597 #if PCRE2_CODE_UNIT_WIDTH == 8
598 #define MOV_UCHAR  SLJIT_MOV_U8
599 #define IN_UCHARS(x) (x)
600 #elif PCRE2_CODE_UNIT_WIDTH == 16
601 #define MOV_UCHAR  SLJIT_MOV_U16
602 #define UCHAR_SHIFT (1)
603 #define IN_UCHARS(x) ((x) * 2)
604 #elif PCRE2_CODE_UNIT_WIDTH == 32
605 #define MOV_UCHAR  SLJIT_MOV_U32
606 #define UCHAR_SHIFT (2)
607 #define IN_UCHARS(x) ((x) * 4)
608 #else
609 #error Unsupported compiling mode
610 #endif
611 
612 /* Shortcuts. */
613 #define DEFINE_COMPILER \
614   struct sljit_compiler *compiler = common->compiler
615 #define OP1(op, dst, dstw, src, srcw) \
616   sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
617 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
618   sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
619 #define OP2U(op, src1, src1w, src2, src2w) \
620   sljit_emit_op2u(compiler, (op), (src1), (src1w), (src2), (src2w))
621 #define OP_SRC(op, src, srcw) \
622   sljit_emit_op_src(compiler, (op), (src), (srcw))
623 #define LABEL() \
624   sljit_emit_label(compiler)
625 #define JUMP(type) \
626   sljit_emit_jump(compiler, (type))
627 #define JUMPTO(type, label) \
628   sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
629 #define JUMPHERE(jump) \
630   sljit_set_label((jump), sljit_emit_label(compiler))
631 #define SET_LABEL(jump, label) \
632   sljit_set_label((jump), (label))
633 #define CMP(type, src1, src1w, src2, src2w) \
634   sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
635 #define CMPTO(type, src1, src1w, src2, src2w, label) \
636   sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
637 #define OP_FLAGS(op, dst, dstw, type) \
638   sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
639 #define CMOV(type, dst_reg, src, srcw) \
640   sljit_emit_cmov(compiler, (type), (dst_reg), (src), (srcw))
641 #define GET_LOCAL_BASE(dst, dstw, offset) \
642   sljit_get_local_base(compiler, (dst), (dstw), (offset))
643 
644 #define READ_CHAR_MAX 0x7fffffff
645 
646 #define INVALID_UTF_CHAR -1
647 #define UNASSIGNED_UTF_CHAR 888
648 
649 #if defined SUPPORT_UNICODE
650 #if PCRE2_CODE_UNIT_WIDTH == 8
651 
652 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
653   { \
654   if (ptr[0] <= 0x7f) \
655     c = *ptr++; \
656   else if (ptr + 1 < end && ptr[1] >= 0x80 && ptr[1] < 0xc0) \
657     { \
658     c = ptr[1] - 0x80; \
659     \
660     if (ptr[0] >= 0xc2 && ptr[0] <= 0xdf) \
661       { \
662       c |= (ptr[0] - 0xc0) << 6; \
663       ptr += 2; \
664       } \
665     else if (ptr + 2 < end && ptr[2] >= 0x80 && ptr[2] < 0xc0) \
666       { \
667       c = c << 6 | (ptr[2] - 0x80); \
668       \
669       if (ptr[0] >= 0xe0 && ptr[0] <= 0xef) \
670         { \
671         c |= (ptr[0] - 0xe0) << 12; \
672         ptr += 3; \
673         \
674         if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
675           { \
676           invalid_action; \
677           } \
678         } \
679       else if (ptr + 3 < end && ptr[3] >= 0x80 && ptr[3] < 0xc0) \
680         { \
681         c = c << 6 | (ptr[3] - 0x80); \
682         \
683         if (ptr[0] >= 0xf0 && ptr[0] <= 0xf4) \
684           { \
685           c |= (ptr[0] - 0xf0) << 18; \
686           ptr += 4; \
687           \
688           if (c >= 0x110000 || c < 0x10000) \
689             { \
690             invalid_action; \
691             } \
692           } \
693         else \
694           { \
695           invalid_action; \
696           } \
697         } \
698       else \
699         { \
700         invalid_action; \
701         } \
702       } \
703     else \
704       { \
705       invalid_action; \
706       } \
707     } \
708   else \
709     { \
710     invalid_action; \
711     } \
712   }
713 
714 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
715   { \
716   c = ptr[-1]; \
717   if (c <= 0x7f) \
718     ptr--; \
719   else if (ptr - 1 > start && ptr[-1] >= 0x80 && ptr[-1] < 0xc0) \
720     { \
721     c -= 0x80; \
722     \
723     if (ptr[-2] >= 0xc2 && ptr[-2] <= 0xdf) \
724       { \
725       c |= (ptr[-2] - 0xc0) << 6; \
726       ptr -= 2; \
727       } \
728     else if (ptr - 2 > start && ptr[-2] >= 0x80 && ptr[-2] < 0xc0) \
729       { \
730       c = c << 6 | (ptr[-2] - 0x80); \
731       \
732       if (ptr[-3] >= 0xe0 && ptr[-3] <= 0xef) \
733         { \
734         c |= (ptr[-3] - 0xe0) << 12; \
735         ptr -= 3; \
736         \
737         if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
738           { \
739           invalid_action; \
740           } \
741         } \
742       else if (ptr - 3 > start && ptr[-3] >= 0x80 && ptr[-3] < 0xc0) \
743         { \
744         c = c << 6 | (ptr[-3] - 0x80); \
745         \
746         if (ptr[-4] >= 0xf0 && ptr[-4] <= 0xf4) \
747           { \
748           c |= (ptr[-4] - 0xf0) << 18; \
749           ptr -= 4; \
750           \
751           if (c >= 0x110000 || c < 0x10000) \
752             { \
753             invalid_action; \
754             } \
755           } \
756         else \
757           { \
758           invalid_action; \
759           } \
760         } \
761       else \
762         { \
763         invalid_action; \
764         } \
765       } \
766     else \
767       { \
768       invalid_action; \
769       } \
770     } \
771   else \
772     { \
773     invalid_action; \
774     } \
775   }
776 
777 #elif PCRE2_CODE_UNIT_WIDTH == 16
778 
779 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
780   { \
781   if (ptr[0] < 0xd800 || ptr[0] >= 0xe000) \
782     c = *ptr++; \
783   else if (ptr[0] < 0xdc00 && ptr + 1 < end && ptr[1] >= 0xdc00 && ptr[1] < 0xe000) \
784     { \
785     c = (((ptr[0] - 0xd800) << 10) | (ptr[1] - 0xdc00)) + 0x10000; \
786     ptr += 2; \
787     } \
788   else \
789     { \
790     invalid_action; \
791     } \
792   }
793 
794 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
795   { \
796   c = ptr[-1]; \
797   if (c < 0xd800 || c >= 0xe000) \
798     ptr--; \
799   else if (c >= 0xdc00 && ptr - 1 > start && ptr[-2] >= 0xd800 && ptr[-2] < 0xdc00) \
800     { \
801     c = (((ptr[-2] - 0xd800) << 10) | (c - 0xdc00)) + 0x10000; \
802     ptr -= 2; \
803     } \
804   else \
805     { \
806     invalid_action; \
807     } \
808   }
809 
810 
811 #elif PCRE2_CODE_UNIT_WIDTH == 32
812 
813 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
814   { \
815   if (ptr[0] < 0xd800 || (ptr[0] >= 0xe000 && ptr[0] < 0x110000)) \
816     c = *ptr++; \
817   else \
818     { \
819     invalid_action; \
820     } \
821   }
822 
823 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
824   { \
825   c = ptr[-1]; \
826   if (ptr[-1] < 0xd800 || (ptr[-1] >= 0xe000 && ptr[-1] < 0x110000)) \
827     ptr--; \
828   else \
829     { \
830     invalid_action; \
831     } \
832   }
833 
834 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
835 #endif /* SUPPORT_UNICODE */
836 
bracketend(PCRE2_SPTR cc)837 static PCRE2_SPTR bracketend(PCRE2_SPTR cc)
838 {
839 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
840 do cc += GET(cc, 1); while (*cc == OP_ALT);
841 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
842 cc += 1 + LINK_SIZE;
843 return cc;
844 }
845 
no_alternatives(PCRE2_SPTR cc)846 static int no_alternatives(PCRE2_SPTR cc)
847 {
848 int count = 0;
849 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
850 do
851   {
852   cc += GET(cc, 1);
853   count++;
854   }
855 while (*cc == OP_ALT);
856 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
857 return count;
858 }
859 
860 /* Functions whose might need modification for all new supported opcodes:
861  next_opcode
862  check_opcode_types
863  set_private_data_ptrs
864  get_framesize
865  init_frame
866  get_recurse_data_length
867  copy_recurse_data
868  compile_matchingpath
869  compile_backtrackingpath
870 */
871 
next_opcode(compiler_common * common,PCRE2_SPTR cc)872 static PCRE2_SPTR next_opcode(compiler_common *common, PCRE2_SPTR cc)
873 {
874 SLJIT_UNUSED_ARG(common);
875 switch(*cc)
876   {
877   case OP_SOD:
878   case OP_SOM:
879   case OP_SET_SOM:
880   case OP_NOT_WORD_BOUNDARY:
881   case OP_WORD_BOUNDARY:
882   case OP_NOT_DIGIT:
883   case OP_DIGIT:
884   case OP_NOT_WHITESPACE:
885   case OP_WHITESPACE:
886   case OP_NOT_WORDCHAR:
887   case OP_WORDCHAR:
888   case OP_ANY:
889   case OP_ALLANY:
890   case OP_NOTPROP:
891   case OP_PROP:
892   case OP_ANYNL:
893   case OP_NOT_HSPACE:
894   case OP_HSPACE:
895   case OP_NOT_VSPACE:
896   case OP_VSPACE:
897   case OP_EXTUNI:
898   case OP_EODN:
899   case OP_EOD:
900   case OP_CIRC:
901   case OP_CIRCM:
902   case OP_DOLL:
903   case OP_DOLLM:
904   case OP_CRSTAR:
905   case OP_CRMINSTAR:
906   case OP_CRPLUS:
907   case OP_CRMINPLUS:
908   case OP_CRQUERY:
909   case OP_CRMINQUERY:
910   case OP_CRRANGE:
911   case OP_CRMINRANGE:
912   case OP_CRPOSSTAR:
913   case OP_CRPOSPLUS:
914   case OP_CRPOSQUERY:
915   case OP_CRPOSRANGE:
916   case OP_CLASS:
917   case OP_NCLASS:
918   case OP_REF:
919   case OP_REFI:
920   case OP_DNREF:
921   case OP_DNREFI:
922   case OP_RECURSE:
923   case OP_CALLOUT:
924   case OP_ALT:
925   case OP_KET:
926   case OP_KETRMAX:
927   case OP_KETRMIN:
928   case OP_KETRPOS:
929   case OP_REVERSE:
930   case OP_ASSERT:
931   case OP_ASSERT_NOT:
932   case OP_ASSERTBACK:
933   case OP_ASSERTBACK_NOT:
934   case OP_ASSERT_NA:
935   case OP_ASSERTBACK_NA:
936   case OP_ONCE:
937   case OP_SCRIPT_RUN:
938   case OP_BRA:
939   case OP_BRAPOS:
940   case OP_CBRA:
941   case OP_CBRAPOS:
942   case OP_COND:
943   case OP_SBRA:
944   case OP_SBRAPOS:
945   case OP_SCBRA:
946   case OP_SCBRAPOS:
947   case OP_SCOND:
948   case OP_CREF:
949   case OP_DNCREF:
950   case OP_RREF:
951   case OP_DNRREF:
952   case OP_FALSE:
953   case OP_TRUE:
954   case OP_BRAZERO:
955   case OP_BRAMINZERO:
956   case OP_BRAPOSZERO:
957   case OP_PRUNE:
958   case OP_SKIP:
959   case OP_THEN:
960   case OP_COMMIT:
961   case OP_FAIL:
962   case OP_ACCEPT:
963   case OP_ASSERT_ACCEPT:
964   case OP_CLOSE:
965   case OP_SKIPZERO:
966   return cc + PRIV(OP_lengths)[*cc];
967 
968   case OP_CHAR:
969   case OP_CHARI:
970   case OP_NOT:
971   case OP_NOTI:
972   case OP_STAR:
973   case OP_MINSTAR:
974   case OP_PLUS:
975   case OP_MINPLUS:
976   case OP_QUERY:
977   case OP_MINQUERY:
978   case OP_UPTO:
979   case OP_MINUPTO:
980   case OP_EXACT:
981   case OP_POSSTAR:
982   case OP_POSPLUS:
983   case OP_POSQUERY:
984   case OP_POSUPTO:
985   case OP_STARI:
986   case OP_MINSTARI:
987   case OP_PLUSI:
988   case OP_MINPLUSI:
989   case OP_QUERYI:
990   case OP_MINQUERYI:
991   case OP_UPTOI:
992   case OP_MINUPTOI:
993   case OP_EXACTI:
994   case OP_POSSTARI:
995   case OP_POSPLUSI:
996   case OP_POSQUERYI:
997   case OP_POSUPTOI:
998   case OP_NOTSTAR:
999   case OP_NOTMINSTAR:
1000   case OP_NOTPLUS:
1001   case OP_NOTMINPLUS:
1002   case OP_NOTQUERY:
1003   case OP_NOTMINQUERY:
1004   case OP_NOTUPTO:
1005   case OP_NOTMINUPTO:
1006   case OP_NOTEXACT:
1007   case OP_NOTPOSSTAR:
1008   case OP_NOTPOSPLUS:
1009   case OP_NOTPOSQUERY:
1010   case OP_NOTPOSUPTO:
1011   case OP_NOTSTARI:
1012   case OP_NOTMINSTARI:
1013   case OP_NOTPLUSI:
1014   case OP_NOTMINPLUSI:
1015   case OP_NOTQUERYI:
1016   case OP_NOTMINQUERYI:
1017   case OP_NOTUPTOI:
1018   case OP_NOTMINUPTOI:
1019   case OP_NOTEXACTI:
1020   case OP_NOTPOSSTARI:
1021   case OP_NOTPOSPLUSI:
1022   case OP_NOTPOSQUERYI:
1023   case OP_NOTPOSUPTOI:
1024   cc += PRIV(OP_lengths)[*cc];
1025 #ifdef SUPPORT_UNICODE
1026   if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1027 #endif
1028   return cc;
1029 
1030   /* Special cases. */
1031   case OP_TYPESTAR:
1032   case OP_TYPEMINSTAR:
1033   case OP_TYPEPLUS:
1034   case OP_TYPEMINPLUS:
1035   case OP_TYPEQUERY:
1036   case OP_TYPEMINQUERY:
1037   case OP_TYPEUPTO:
1038   case OP_TYPEMINUPTO:
1039   case OP_TYPEEXACT:
1040   case OP_TYPEPOSSTAR:
1041   case OP_TYPEPOSPLUS:
1042   case OP_TYPEPOSQUERY:
1043   case OP_TYPEPOSUPTO:
1044   return cc + PRIV(OP_lengths)[*cc] - 1;
1045 
1046   case OP_ANYBYTE:
1047 #ifdef SUPPORT_UNICODE
1048   if (common->utf) return NULL;
1049 #endif
1050   return cc + 1;
1051 
1052   case OP_CALLOUT_STR:
1053   return cc + GET(cc, 1 + 2*LINK_SIZE);
1054 
1055 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1056   case OP_XCLASS:
1057   return cc + GET(cc, 1);
1058 #endif
1059 
1060   case OP_MARK:
1061   case OP_COMMIT_ARG:
1062   case OP_PRUNE_ARG:
1063   case OP_SKIP_ARG:
1064   case OP_THEN_ARG:
1065   return cc + 1 + 2 + cc[1];
1066 
1067   default:
1068   SLJIT_UNREACHABLE();
1069   return NULL;
1070   }
1071 }
1072 
check_opcode_types(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend)1073 static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend)
1074 {
1075 int count;
1076 PCRE2_SPTR slot;
1077 PCRE2_SPTR assert_back_end = cc - 1;
1078 PCRE2_SPTR assert_na_end = cc - 1;
1079 
1080 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
1081 while (cc < ccend)
1082   {
1083   switch(*cc)
1084     {
1085     case OP_SET_SOM:
1086     common->has_set_som = TRUE;
1087     common->might_be_empty = TRUE;
1088     cc += 1;
1089     break;
1090 
1091     case OP_REFI:
1092 #ifdef SUPPORT_UNICODE
1093     if (common->iref_ptr == 0)
1094       {
1095       common->iref_ptr = common->ovector_start;
1096       common->ovector_start += 3 * sizeof(sljit_sw);
1097       }
1098 #endif /* SUPPORT_UNICODE */
1099     /* Fall through. */
1100     case OP_REF:
1101     common->optimized_cbracket[GET2(cc, 1)] = 0;
1102     cc += 1 + IMM2_SIZE;
1103     break;
1104 
1105     case OP_ASSERT_NA:
1106     case OP_ASSERTBACK_NA:
1107     slot = bracketend(cc);
1108     if (slot > assert_na_end)
1109       assert_na_end = slot;
1110     cc += 1 + LINK_SIZE;
1111     break;
1112 
1113     case OP_CBRAPOS:
1114     case OP_SCBRAPOS:
1115     common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
1116     cc += 1 + LINK_SIZE + IMM2_SIZE;
1117     break;
1118 
1119     case OP_COND:
1120     case OP_SCOND:
1121     /* Only AUTO_CALLOUT can insert this opcode. We do
1122        not intend to support this case. */
1123     if (cc[1 + LINK_SIZE] == OP_CALLOUT || cc[1 + LINK_SIZE] == OP_CALLOUT_STR)
1124       return FALSE;
1125     cc += 1 + LINK_SIZE;
1126     break;
1127 
1128     case OP_CREF:
1129     common->optimized_cbracket[GET2(cc, 1)] = 0;
1130     cc += 1 + IMM2_SIZE;
1131     break;
1132 
1133     case OP_DNREF:
1134     case OP_DNREFI:
1135     case OP_DNCREF:
1136     count = GET2(cc, 1 + IMM2_SIZE);
1137     slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
1138     while (count-- > 0)
1139       {
1140       common->optimized_cbracket[GET2(slot, 0)] = 0;
1141       slot += common->name_entry_size;
1142       }
1143     cc += 1 + 2 * IMM2_SIZE;
1144     break;
1145 
1146     case OP_RECURSE:
1147     /* Set its value only once. */
1148     if (common->recursive_head_ptr == 0)
1149       {
1150       common->recursive_head_ptr = common->ovector_start;
1151       common->ovector_start += sizeof(sljit_sw);
1152       }
1153     cc += 1 + LINK_SIZE;
1154     break;
1155 
1156     case OP_CALLOUT:
1157     case OP_CALLOUT_STR:
1158     if (common->capture_last_ptr == 0)
1159       {
1160       common->capture_last_ptr = common->ovector_start;
1161       common->ovector_start += sizeof(sljit_sw);
1162       }
1163     cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE);
1164     break;
1165 
1166     case OP_ASSERTBACK:
1167     slot = bracketend(cc);
1168     if (slot > assert_back_end)
1169       assert_back_end = slot;
1170     cc += 1 + LINK_SIZE;
1171     break;
1172 
1173     case OP_THEN_ARG:
1174     common->has_then = TRUE;
1175     common->control_head_ptr = 1;
1176     /* Fall through. */
1177 
1178     case OP_COMMIT_ARG:
1179     case OP_PRUNE_ARG:
1180     if (cc < assert_na_end)
1181       return FALSE;
1182     /* Fall through */
1183     case OP_MARK:
1184     if (common->mark_ptr == 0)
1185       {
1186       common->mark_ptr = common->ovector_start;
1187       common->ovector_start += sizeof(sljit_sw);
1188       }
1189     cc += 1 + 2 + cc[1];
1190     break;
1191 
1192     case OP_THEN:
1193     common->has_then = TRUE;
1194     common->control_head_ptr = 1;
1195     cc += 1;
1196     break;
1197 
1198     case OP_SKIP:
1199     if (cc < assert_back_end)
1200       common->has_skip_in_assert_back = TRUE;
1201     if (cc < assert_na_end)
1202       return FALSE;
1203     cc += 1;
1204     break;
1205 
1206     case OP_SKIP_ARG:
1207     common->control_head_ptr = 1;
1208     common->has_skip_arg = TRUE;
1209     if (cc < assert_back_end)
1210       common->has_skip_in_assert_back = TRUE;
1211     if (cc < assert_na_end)
1212       return FALSE;
1213     cc += 1 + 2 + cc[1];
1214     break;
1215 
1216     case OP_PRUNE:
1217     case OP_COMMIT:
1218     case OP_ASSERT_ACCEPT:
1219     if (cc < assert_na_end)
1220       return FALSE;
1221     cc++;
1222     break;
1223 
1224     default:
1225     cc = next_opcode(common, cc);
1226     if (cc == NULL)
1227       return FALSE;
1228     break;
1229     }
1230   }
1231 return TRUE;
1232 }
1233 
1234 #define EARLY_FAIL_ENHANCE_MAX (1 + 3)
1235 
1236 /*
1237 start:
1238   0 - skip / early fail allowed
1239   1 - only early fail with range allowed
1240   >1 - (start - 1) early fail is processed
1241 
1242 return: current number of iterators enhanced with fast fail
1243 */
detect_early_fail(compiler_common * common,PCRE2_SPTR cc,int * private_data_start,sljit_s32 depth,int start,BOOL fast_forward_allowed)1244 static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start,
1245    sljit_s32 depth, int start, BOOL fast_forward_allowed)
1246 {
1247 PCRE2_SPTR begin = cc;
1248 PCRE2_SPTR next_alt;
1249 PCRE2_SPTR end;
1250 PCRE2_SPTR accelerated_start;
1251 BOOL prev_fast_forward_allowed;
1252 int result = 0;
1253 int count;
1254 
1255 SLJIT_ASSERT(*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA);
1256 SLJIT_ASSERT(*cc != OP_CBRA || common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] != 0);
1257 SLJIT_ASSERT(start < EARLY_FAIL_ENHANCE_MAX);
1258 
1259 next_alt = cc + GET(cc, 1);
1260 if (*next_alt == OP_ALT)
1261   fast_forward_allowed = FALSE;
1262 
1263 do
1264   {
1265   count = start;
1266   cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1267 
1268   while (TRUE)
1269     {
1270     accelerated_start = NULL;
1271 
1272     switch(*cc)
1273       {
1274       case OP_SOD:
1275       case OP_SOM:
1276       case OP_SET_SOM:
1277       case OP_NOT_WORD_BOUNDARY:
1278       case OP_WORD_BOUNDARY:
1279       case OP_EODN:
1280       case OP_EOD:
1281       case OP_CIRC:
1282       case OP_CIRCM:
1283       case OP_DOLL:
1284       case OP_DOLLM:
1285       /* Zero width assertions. */
1286       cc++;
1287       continue;
1288 
1289       case OP_NOT_DIGIT:
1290       case OP_DIGIT:
1291       case OP_NOT_WHITESPACE:
1292       case OP_WHITESPACE:
1293       case OP_NOT_WORDCHAR:
1294       case OP_WORDCHAR:
1295       case OP_ANY:
1296       case OP_ALLANY:
1297       case OP_ANYBYTE:
1298       case OP_NOT_HSPACE:
1299       case OP_HSPACE:
1300       case OP_NOT_VSPACE:
1301       case OP_VSPACE:
1302       fast_forward_allowed = FALSE;
1303       cc++;
1304       continue;
1305 
1306       case OP_ANYNL:
1307       case OP_EXTUNI:
1308       fast_forward_allowed = FALSE;
1309       if (count == 0)
1310         count = 1;
1311       cc++;
1312       continue;
1313 
1314       case OP_NOTPROP:
1315       case OP_PROP:
1316       fast_forward_allowed = FALSE;
1317       cc += 1 + 2;
1318       continue;
1319 
1320       case OP_CHAR:
1321       case OP_CHARI:
1322       case OP_NOT:
1323       case OP_NOTI:
1324       fast_forward_allowed = FALSE;
1325       cc += 2;
1326 #ifdef SUPPORT_UNICODE
1327       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1328 #endif
1329       continue;
1330 
1331       case OP_TYPESTAR:
1332       case OP_TYPEMINSTAR:
1333       case OP_TYPEPLUS:
1334       case OP_TYPEMINPLUS:
1335       case OP_TYPEPOSSTAR:
1336       case OP_TYPEPOSPLUS:
1337       /* The type or prop opcode is skipped in the next iteration. */
1338       cc += 1;
1339 
1340       if (cc[0] != OP_ANYNL && cc[0] != OP_EXTUNI)
1341         {
1342         accelerated_start = cc - 1;
1343         break;
1344         }
1345 
1346       if (count == 0)
1347         count = 1;
1348       fast_forward_allowed = FALSE;
1349       continue;
1350 
1351       case OP_TYPEUPTO:
1352       case OP_TYPEMINUPTO:
1353       case OP_TYPEEXACT:
1354       case OP_TYPEPOSUPTO:
1355       cc += IMM2_SIZE;
1356       /* Fall through */
1357 
1358       case OP_TYPEQUERY:
1359       case OP_TYPEMINQUERY:
1360       case OP_TYPEPOSQUERY:
1361       /* The type or prop opcode is skipped in the next iteration. */
1362       fast_forward_allowed = FALSE;
1363       if (count == 0)
1364         count = 1;
1365       cc += 1;
1366       continue;
1367 
1368       case OP_STAR:
1369       case OP_MINSTAR:
1370       case OP_PLUS:
1371       case OP_MINPLUS:
1372       case OP_POSSTAR:
1373       case OP_POSPLUS:
1374 
1375       case OP_STARI:
1376       case OP_MINSTARI:
1377       case OP_PLUSI:
1378       case OP_MINPLUSI:
1379       case OP_POSSTARI:
1380       case OP_POSPLUSI:
1381 
1382       case OP_NOTSTAR:
1383       case OP_NOTMINSTAR:
1384       case OP_NOTPLUS:
1385       case OP_NOTMINPLUS:
1386       case OP_NOTPOSSTAR:
1387       case OP_NOTPOSPLUS:
1388 
1389       case OP_NOTSTARI:
1390       case OP_NOTMINSTARI:
1391       case OP_NOTPLUSI:
1392       case OP_NOTMINPLUSI:
1393       case OP_NOTPOSSTARI:
1394       case OP_NOTPOSPLUSI:
1395       accelerated_start = cc;
1396       cc += 2;
1397 #ifdef SUPPORT_UNICODE
1398       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1399 #endif
1400       break;
1401 
1402       case OP_UPTO:
1403       case OP_MINUPTO:
1404       case OP_EXACT:
1405       case OP_POSUPTO:
1406       case OP_UPTOI:
1407       case OP_MINUPTOI:
1408       case OP_EXACTI:
1409       case OP_POSUPTOI:
1410       case OP_NOTUPTO:
1411       case OP_NOTMINUPTO:
1412       case OP_NOTEXACT:
1413       case OP_NOTPOSUPTO:
1414       case OP_NOTUPTOI:
1415       case OP_NOTMINUPTOI:
1416       case OP_NOTEXACTI:
1417       case OP_NOTPOSUPTOI:
1418       cc += IMM2_SIZE;
1419       /* Fall through */
1420 
1421       case OP_QUERY:
1422       case OP_MINQUERY:
1423       case OP_POSQUERY:
1424       case OP_QUERYI:
1425       case OP_MINQUERYI:
1426       case OP_POSQUERYI:
1427       case OP_NOTQUERY:
1428       case OP_NOTMINQUERY:
1429       case OP_NOTPOSQUERY:
1430       case OP_NOTQUERYI:
1431       case OP_NOTMINQUERYI:
1432       case OP_NOTPOSQUERYI:
1433       fast_forward_allowed = FALSE;
1434       if (count == 0)
1435         count = 1;
1436       cc += 2;
1437 #ifdef SUPPORT_UNICODE
1438       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1439 #endif
1440       continue;
1441 
1442       case OP_CLASS:
1443       case OP_NCLASS:
1444 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1445       case OP_XCLASS:
1446       accelerated_start = cc;
1447       cc += ((*cc == OP_XCLASS) ? GET(cc, 1) : (unsigned int)(1 + (32 / sizeof(PCRE2_UCHAR))));
1448 #else
1449       accelerated_start = cc;
1450       cc += (1 + (32 / sizeof(PCRE2_UCHAR)));
1451 #endif
1452 
1453       switch (*cc)
1454         {
1455         case OP_CRSTAR:
1456         case OP_CRMINSTAR:
1457         case OP_CRPLUS:
1458         case OP_CRMINPLUS:
1459         case OP_CRPOSSTAR:
1460         case OP_CRPOSPLUS:
1461         cc++;
1462         break;
1463 
1464         case OP_CRRANGE:
1465         case OP_CRMINRANGE:
1466         case OP_CRPOSRANGE:
1467         cc += 2 * IMM2_SIZE;
1468         /* Fall through */
1469         case OP_CRQUERY:
1470         case OP_CRMINQUERY:
1471         case OP_CRPOSQUERY:
1472         cc++;
1473         if (count == 0)
1474           count = 1;
1475         /* Fall through */
1476         default:
1477         accelerated_start = NULL;
1478         fast_forward_allowed = FALSE;
1479         continue;
1480         }
1481       break;
1482 
1483       case OP_ONCE:
1484       case OP_BRA:
1485       case OP_CBRA:
1486       end = cc + GET(cc, 1);
1487 
1488       prev_fast_forward_allowed = fast_forward_allowed;
1489       fast_forward_allowed = FALSE;
1490       if (depth >= 4)
1491         break;
1492 
1493       end = bracketend(cc) - (1 + LINK_SIZE);
1494       if (*end != OP_KET || (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0))
1495         break;
1496 
1497       count = detect_early_fail(common, cc, private_data_start, depth + 1, count, prev_fast_forward_allowed);
1498 
1499       if (PRIVATE_DATA(cc) != 0)
1500         common->private_data_ptrs[begin - common->start] = 1;
1501 
1502       if (count < EARLY_FAIL_ENHANCE_MAX)
1503         {
1504         cc = end + (1 + LINK_SIZE);
1505         continue;
1506         }
1507       break;
1508 
1509       case OP_KET:
1510       SLJIT_ASSERT(PRIVATE_DATA(cc) == 0);
1511       if (cc >= next_alt)
1512         break;
1513       cc += 1 + LINK_SIZE;
1514       continue;
1515       }
1516 
1517     if (accelerated_start != NULL)
1518       {
1519       if (count == 0)
1520         {
1521         count++;
1522 
1523         if (fast_forward_allowed)
1524           {
1525           common->fast_forward_bc_ptr = accelerated_start;
1526           common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_skip;
1527           *private_data_start += sizeof(sljit_sw);
1528           }
1529         else
1530           {
1531           common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail;
1532 
1533           if (common->early_fail_start_ptr == 0)
1534             common->early_fail_start_ptr = *private_data_start;
1535 
1536           *private_data_start += sizeof(sljit_sw);
1537           common->early_fail_end_ptr = *private_data_start;
1538 
1539           if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1540             return EARLY_FAIL_ENHANCE_MAX;
1541           }
1542         }
1543       else
1544         {
1545         common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail_range;
1546 
1547         if (common->early_fail_start_ptr == 0)
1548           common->early_fail_start_ptr = *private_data_start;
1549 
1550         *private_data_start += 2 * sizeof(sljit_sw);
1551         common->early_fail_end_ptr = *private_data_start;
1552 
1553         if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1554           return EARLY_FAIL_ENHANCE_MAX;
1555         }
1556 
1557       /* Cannot be part of a repeat. */
1558       common->private_data_ptrs[begin - common->start] = 1;
1559       count++;
1560 
1561       if (count < EARLY_FAIL_ENHANCE_MAX)
1562         continue;
1563       }
1564 
1565     break;
1566     }
1567 
1568   if (*cc != OP_ALT && *cc != OP_KET)
1569     result = EARLY_FAIL_ENHANCE_MAX;
1570   else if (result < count)
1571     result = count;
1572 
1573   cc = next_alt;
1574   next_alt = cc + GET(cc, 1);
1575   }
1576 while (*cc == OP_ALT);
1577 
1578 return result;
1579 }
1580 
get_class_iterator_size(PCRE2_SPTR cc)1581 static int get_class_iterator_size(PCRE2_SPTR cc)
1582 {
1583 sljit_u32 min;
1584 sljit_u32 max;
1585 switch(*cc)
1586   {
1587   case OP_CRSTAR:
1588   case OP_CRPLUS:
1589   return 2;
1590 
1591   case OP_CRMINSTAR:
1592   case OP_CRMINPLUS:
1593   case OP_CRQUERY:
1594   case OP_CRMINQUERY:
1595   return 1;
1596 
1597   case OP_CRRANGE:
1598   case OP_CRMINRANGE:
1599   min = GET2(cc, 1);
1600   max = GET2(cc, 1 + IMM2_SIZE);
1601   if (max == 0)
1602     return (*cc == OP_CRRANGE) ? 2 : 1;
1603   max -= min;
1604   if (max > 2)
1605     max = 2;
1606   return max;
1607 
1608   default:
1609   return 0;
1610   }
1611 }
1612 
detect_repeat(compiler_common * common,PCRE2_SPTR begin)1613 static BOOL detect_repeat(compiler_common *common, PCRE2_SPTR begin)
1614 {
1615 PCRE2_SPTR end = bracketend(begin);
1616 PCRE2_SPTR next;
1617 PCRE2_SPTR next_end;
1618 PCRE2_SPTR max_end;
1619 PCRE2_UCHAR type;
1620 sljit_sw length = end - begin;
1621 sljit_s32 min, max, i;
1622 
1623 /* Detect fixed iterations first. */
1624 if (end[-(1 + LINK_SIZE)] != OP_KET || PRIVATE_DATA(begin) != 0)
1625   return FALSE;
1626 
1627 /* /(?:AB){4,6}/ is currently converted to /(?:AB){3}(?AB){1,3}/
1628  * Skip the check of the second part. */
1629 if (PRIVATE_DATA(end - LINK_SIZE) != 0)
1630   return TRUE;
1631 
1632 next = end;
1633 min = 1;
1634 while (1)
1635   {
1636   if (*next != *begin)
1637     break;
1638   next_end = bracketend(next);
1639   if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1640     break;
1641   next = next_end;
1642   min++;
1643   }
1644 
1645 if (min == 2)
1646   return FALSE;
1647 
1648 max = 0;
1649 max_end = next;
1650 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1651   {
1652   type = *next;
1653   while (1)
1654     {
1655     if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1656       break;
1657     next_end = bracketend(next + 2 + LINK_SIZE);
1658     if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1659       break;
1660     next = next_end;
1661     max++;
1662     }
1663 
1664   if (next[0] == type && next[1] == *begin && max >= 1)
1665     {
1666     next_end = bracketend(next + 1);
1667     if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1668       {
1669       for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1670         if (*next_end != OP_KET)
1671           break;
1672 
1673       if (i == max)
1674         {
1675         common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1676         common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1677         /* +2 the original and the last. */
1678         common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1679         if (min == 1)
1680           return TRUE;
1681         min--;
1682         max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1683         }
1684       }
1685     }
1686   }
1687 
1688 if (min >= 3)
1689   {
1690   common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1691   common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1692   common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1693   return TRUE;
1694   }
1695 
1696 return FALSE;
1697 }
1698 
1699 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1700     case OP_MINSTAR: \
1701     case OP_MINPLUS: \
1702     case OP_QUERY: \
1703     case OP_MINQUERY: \
1704     case OP_MINSTARI: \
1705     case OP_MINPLUSI: \
1706     case OP_QUERYI: \
1707     case OP_MINQUERYI: \
1708     case OP_NOTMINSTAR: \
1709     case OP_NOTMINPLUS: \
1710     case OP_NOTQUERY: \
1711     case OP_NOTMINQUERY: \
1712     case OP_NOTMINSTARI: \
1713     case OP_NOTMINPLUSI: \
1714     case OP_NOTQUERYI: \
1715     case OP_NOTMINQUERYI:
1716 
1717 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1718     case OP_STAR: \
1719     case OP_PLUS: \
1720     case OP_STARI: \
1721     case OP_PLUSI: \
1722     case OP_NOTSTAR: \
1723     case OP_NOTPLUS: \
1724     case OP_NOTSTARI: \
1725     case OP_NOTPLUSI:
1726 
1727 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1728     case OP_UPTO: \
1729     case OP_MINUPTO: \
1730     case OP_UPTOI: \
1731     case OP_MINUPTOI: \
1732     case OP_NOTUPTO: \
1733     case OP_NOTMINUPTO: \
1734     case OP_NOTUPTOI: \
1735     case OP_NOTMINUPTOI:
1736 
1737 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1738     case OP_TYPEMINSTAR: \
1739     case OP_TYPEMINPLUS: \
1740     case OP_TYPEQUERY: \
1741     case OP_TYPEMINQUERY:
1742 
1743 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1744     case OP_TYPESTAR: \
1745     case OP_TYPEPLUS:
1746 
1747 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1748     case OP_TYPEUPTO: \
1749     case OP_TYPEMINUPTO:
1750 
set_private_data_ptrs(compiler_common * common,int * private_data_start,PCRE2_SPTR ccend)1751 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, PCRE2_SPTR ccend)
1752 {
1753 PCRE2_SPTR cc = common->start;
1754 PCRE2_SPTR alternative;
1755 PCRE2_SPTR end = NULL;
1756 int private_data_ptr = *private_data_start;
1757 int space, size, bracketlen;
1758 BOOL repeat_check = TRUE;
1759 
1760 while (cc < ccend)
1761   {
1762   space = 0;
1763   size = 0;
1764   bracketlen = 0;
1765   if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1766     break;
1767 
1768   /* When the bracket is prefixed by a zero iteration, skip the repeat check (at this point). */
1769   if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1770     {
1771     if (detect_repeat(common, cc))
1772       {
1773       /* These brackets are converted to repeats, so no global
1774       based single character repeat is allowed. */
1775       if (cc >= end)
1776         end = bracketend(cc);
1777       }
1778     }
1779   repeat_check = TRUE;
1780 
1781   switch(*cc)
1782     {
1783     case OP_KET:
1784     if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1785       {
1786       common->private_data_ptrs[cc - common->start] = private_data_ptr;
1787       private_data_ptr += sizeof(sljit_sw);
1788       cc += common->private_data_ptrs[cc + 1 - common->start];
1789       }
1790     cc += 1 + LINK_SIZE;
1791     break;
1792 
1793     case OP_ASSERT:
1794     case OP_ASSERT_NOT:
1795     case OP_ASSERTBACK:
1796     case OP_ASSERTBACK_NOT:
1797     case OP_ASSERT_NA:
1798     case OP_ASSERTBACK_NA:
1799     case OP_ONCE:
1800     case OP_SCRIPT_RUN:
1801     case OP_BRAPOS:
1802     case OP_SBRA:
1803     case OP_SBRAPOS:
1804     case OP_SCOND:
1805     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1806     private_data_ptr += sizeof(sljit_sw);
1807     bracketlen = 1 + LINK_SIZE;
1808     break;
1809 
1810     case OP_CBRAPOS:
1811     case OP_SCBRAPOS:
1812     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1813     private_data_ptr += sizeof(sljit_sw);
1814     bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1815     break;
1816 
1817     case OP_COND:
1818     /* Might be a hidden SCOND. */
1819     common->private_data_ptrs[cc - common->start] = 0;
1820     alternative = cc + GET(cc, 1);
1821     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1822       {
1823       common->private_data_ptrs[cc - common->start] = private_data_ptr;
1824       private_data_ptr += sizeof(sljit_sw);
1825       }
1826     bracketlen = 1 + LINK_SIZE;
1827     break;
1828 
1829     case OP_BRA:
1830     bracketlen = 1 + LINK_SIZE;
1831     break;
1832 
1833     case OP_CBRA:
1834     case OP_SCBRA:
1835     bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1836     break;
1837 
1838     case OP_BRAZERO:
1839     case OP_BRAMINZERO:
1840     case OP_BRAPOSZERO:
1841     size = 1;
1842     repeat_check = FALSE;
1843     break;
1844 
1845     CASE_ITERATOR_PRIVATE_DATA_1
1846     size = -2;
1847     space = 1;
1848     break;
1849 
1850     CASE_ITERATOR_PRIVATE_DATA_2A
1851     size = -2;
1852     space = 2;
1853     break;
1854 
1855     CASE_ITERATOR_PRIVATE_DATA_2B
1856     size = -(2 + IMM2_SIZE);
1857     space = 2;
1858     break;
1859 
1860     CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1861     size = 1;
1862     space = 1;
1863     break;
1864 
1865     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1866     size = 1;
1867     if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1868       space = 2;
1869     break;
1870 
1871     case OP_TYPEUPTO:
1872     size = 1 + IMM2_SIZE;
1873     if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1874       space = 2;
1875     break;
1876 
1877     case OP_TYPEMINUPTO:
1878     size = 1 + IMM2_SIZE;
1879     space = 2;
1880     break;
1881 
1882     case OP_CLASS:
1883     case OP_NCLASS:
1884     size = 1 + 32 / sizeof(PCRE2_UCHAR);
1885     space = get_class_iterator_size(cc + size);
1886     break;
1887 
1888 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1889     case OP_XCLASS:
1890     size = GET(cc, 1);
1891     space = get_class_iterator_size(cc + size);
1892     break;
1893 #endif
1894 
1895     default:
1896     cc = next_opcode(common, cc);
1897     SLJIT_ASSERT(cc != NULL);
1898     break;
1899     }
1900 
1901   /* Character iterators, which are not inside a repeated bracket,
1902      gets a private slot instead of allocating it on the stack. */
1903   if (space > 0 && cc >= end)
1904     {
1905     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1906     private_data_ptr += sizeof(sljit_sw) * space;
1907     }
1908 
1909   if (size != 0)
1910     {
1911     if (size < 0)
1912       {
1913       cc += -size;
1914 #ifdef SUPPORT_UNICODE
1915       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1916 #endif
1917       }
1918     else
1919       cc += size;
1920     }
1921 
1922   if (bracketlen > 0)
1923     {
1924     if (cc >= end)
1925       {
1926       end = bracketend(cc);
1927       if (end[-1 - LINK_SIZE] == OP_KET)
1928         end = NULL;
1929       }
1930     cc += bracketlen;
1931     }
1932   }
1933 *private_data_start = private_data_ptr;
1934 }
1935 
1936 /* Returns with a frame_types (always < 0) if no need for frame. */
get_framesize(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,BOOL recursive,BOOL * needs_control_head)1937 static int get_framesize(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL recursive, BOOL *needs_control_head)
1938 {
1939 int length = 0;
1940 int possessive = 0;
1941 BOOL stack_restore = FALSE;
1942 BOOL setsom_found = recursive;
1943 BOOL setmark_found = recursive;
1944 /* The last capture is a local variable even for recursions. */
1945 BOOL capture_last_found = FALSE;
1946 
1947 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1948 SLJIT_ASSERT(common->control_head_ptr != 0);
1949 *needs_control_head = TRUE;
1950 #else
1951 *needs_control_head = FALSE;
1952 #endif
1953 
1954 if (ccend == NULL)
1955   {
1956   ccend = bracketend(cc) - (1 + LINK_SIZE);
1957   if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1958     {
1959     possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1960     /* This is correct regardless of common->capture_last_ptr. */
1961     capture_last_found = TRUE;
1962     }
1963   cc = next_opcode(common, cc);
1964   }
1965 
1966 SLJIT_ASSERT(cc != NULL);
1967 while (cc < ccend)
1968   switch(*cc)
1969     {
1970     case OP_SET_SOM:
1971     SLJIT_ASSERT(common->has_set_som);
1972     stack_restore = TRUE;
1973     if (!setsom_found)
1974       {
1975       length += 2;
1976       setsom_found = TRUE;
1977       }
1978     cc += 1;
1979     break;
1980 
1981     case OP_MARK:
1982     case OP_COMMIT_ARG:
1983     case OP_PRUNE_ARG:
1984     case OP_THEN_ARG:
1985     SLJIT_ASSERT(common->mark_ptr != 0);
1986     stack_restore = TRUE;
1987     if (!setmark_found)
1988       {
1989       length += 2;
1990       setmark_found = TRUE;
1991       }
1992     if (common->control_head_ptr != 0)
1993       *needs_control_head = TRUE;
1994     cc += 1 + 2 + cc[1];
1995     break;
1996 
1997     case OP_RECURSE:
1998     stack_restore = TRUE;
1999     if (common->has_set_som && !setsom_found)
2000       {
2001       length += 2;
2002       setsom_found = TRUE;
2003       }
2004     if (common->mark_ptr != 0 && !setmark_found)
2005       {
2006       length += 2;
2007       setmark_found = TRUE;
2008       }
2009     if (common->capture_last_ptr != 0 && !capture_last_found)
2010       {
2011       length += 2;
2012       capture_last_found = TRUE;
2013       }
2014     cc += 1 + LINK_SIZE;
2015     break;
2016 
2017     case OP_CBRA:
2018     case OP_CBRAPOS:
2019     case OP_SCBRA:
2020     case OP_SCBRAPOS:
2021     stack_restore = TRUE;
2022     if (common->capture_last_ptr != 0 && !capture_last_found)
2023       {
2024       length += 2;
2025       capture_last_found = TRUE;
2026       }
2027     length += 3;
2028     cc += 1 + LINK_SIZE + IMM2_SIZE;
2029     break;
2030 
2031     case OP_THEN:
2032     stack_restore = TRUE;
2033     if (common->control_head_ptr != 0)
2034       *needs_control_head = TRUE;
2035     cc ++;
2036     break;
2037 
2038     default:
2039     stack_restore = TRUE;
2040     /* Fall through. */
2041 
2042     case OP_NOT_WORD_BOUNDARY:
2043     case OP_WORD_BOUNDARY:
2044     case OP_NOT_DIGIT:
2045     case OP_DIGIT:
2046     case OP_NOT_WHITESPACE:
2047     case OP_WHITESPACE:
2048     case OP_NOT_WORDCHAR:
2049     case OP_WORDCHAR:
2050     case OP_ANY:
2051     case OP_ALLANY:
2052     case OP_ANYBYTE:
2053     case OP_NOTPROP:
2054     case OP_PROP:
2055     case OP_ANYNL:
2056     case OP_NOT_HSPACE:
2057     case OP_HSPACE:
2058     case OP_NOT_VSPACE:
2059     case OP_VSPACE:
2060     case OP_EXTUNI:
2061     case OP_EODN:
2062     case OP_EOD:
2063     case OP_CIRC:
2064     case OP_CIRCM:
2065     case OP_DOLL:
2066     case OP_DOLLM:
2067     case OP_CHAR:
2068     case OP_CHARI:
2069     case OP_NOT:
2070     case OP_NOTI:
2071 
2072     case OP_EXACT:
2073     case OP_POSSTAR:
2074     case OP_POSPLUS:
2075     case OP_POSQUERY:
2076     case OP_POSUPTO:
2077 
2078     case OP_EXACTI:
2079     case OP_POSSTARI:
2080     case OP_POSPLUSI:
2081     case OP_POSQUERYI:
2082     case OP_POSUPTOI:
2083 
2084     case OP_NOTEXACT:
2085     case OP_NOTPOSSTAR:
2086     case OP_NOTPOSPLUS:
2087     case OP_NOTPOSQUERY:
2088     case OP_NOTPOSUPTO:
2089 
2090     case OP_NOTEXACTI:
2091     case OP_NOTPOSSTARI:
2092     case OP_NOTPOSPLUSI:
2093     case OP_NOTPOSQUERYI:
2094     case OP_NOTPOSUPTOI:
2095 
2096     case OP_TYPEEXACT:
2097     case OP_TYPEPOSSTAR:
2098     case OP_TYPEPOSPLUS:
2099     case OP_TYPEPOSQUERY:
2100     case OP_TYPEPOSUPTO:
2101 
2102     case OP_CLASS:
2103     case OP_NCLASS:
2104     case OP_XCLASS:
2105 
2106     case OP_CALLOUT:
2107     case OP_CALLOUT_STR:
2108 
2109     cc = next_opcode(common, cc);
2110     SLJIT_ASSERT(cc != NULL);
2111     break;
2112     }
2113 
2114 /* Possessive quantifiers can use a special case. */
2115 if (SLJIT_UNLIKELY(possessive == length))
2116   return stack_restore ? no_frame : no_stack;
2117 
2118 if (length > 0)
2119   return length + 1;
2120 return stack_restore ? no_frame : no_stack;
2121 }
2122 
init_frame(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,int stackpos,int stacktop)2123 static void init_frame(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, int stackpos, int stacktop)
2124 {
2125 DEFINE_COMPILER;
2126 BOOL setsom_found = FALSE;
2127 BOOL setmark_found = FALSE;
2128 /* The last capture is a local variable even for recursions. */
2129 BOOL capture_last_found = FALSE;
2130 int offset;
2131 
2132 /* >= 1 + shortest item size (2) */
2133 SLJIT_UNUSED_ARG(stacktop);
2134 SLJIT_ASSERT(stackpos >= stacktop + 2);
2135 
2136 stackpos = STACK(stackpos);
2137 if (ccend == NULL)
2138   {
2139   ccend = bracketend(cc) - (1 + LINK_SIZE);
2140   if (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)
2141     cc = next_opcode(common, cc);
2142   }
2143 
2144 SLJIT_ASSERT(cc != NULL);
2145 while (cc < ccend)
2146   switch(*cc)
2147     {
2148     case OP_SET_SOM:
2149     SLJIT_ASSERT(common->has_set_som);
2150     if (!setsom_found)
2151       {
2152       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2153       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2154       stackpos -= (int)sizeof(sljit_sw);
2155       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2156       stackpos -= (int)sizeof(sljit_sw);
2157       setsom_found = TRUE;
2158       }
2159     cc += 1;
2160     break;
2161 
2162     case OP_MARK:
2163     case OP_COMMIT_ARG:
2164     case OP_PRUNE_ARG:
2165     case OP_THEN_ARG:
2166     SLJIT_ASSERT(common->mark_ptr != 0);
2167     if (!setmark_found)
2168       {
2169       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2170       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2171       stackpos -= (int)sizeof(sljit_sw);
2172       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2173       stackpos -= (int)sizeof(sljit_sw);
2174       setmark_found = TRUE;
2175       }
2176     cc += 1 + 2 + cc[1];
2177     break;
2178 
2179     case OP_RECURSE:
2180     if (common->has_set_som && !setsom_found)
2181       {
2182       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2183       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2184       stackpos -= (int)sizeof(sljit_sw);
2185       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2186       stackpos -= (int)sizeof(sljit_sw);
2187       setsom_found = TRUE;
2188       }
2189     if (common->mark_ptr != 0 && !setmark_found)
2190       {
2191       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2192       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2193       stackpos -= (int)sizeof(sljit_sw);
2194       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2195       stackpos -= (int)sizeof(sljit_sw);
2196       setmark_found = TRUE;
2197       }
2198     if (common->capture_last_ptr != 0 && !capture_last_found)
2199       {
2200       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2201       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2202       stackpos -= (int)sizeof(sljit_sw);
2203       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2204       stackpos -= (int)sizeof(sljit_sw);
2205       capture_last_found = TRUE;
2206       }
2207     cc += 1 + LINK_SIZE;
2208     break;
2209 
2210     case OP_CBRA:
2211     case OP_CBRAPOS:
2212     case OP_SCBRA:
2213     case OP_SCBRAPOS:
2214     if (common->capture_last_ptr != 0 && !capture_last_found)
2215       {
2216       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2217       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2218       stackpos -= (int)sizeof(sljit_sw);
2219       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2220       stackpos -= (int)sizeof(sljit_sw);
2221       capture_last_found = TRUE;
2222       }
2223     offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2224     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
2225     stackpos -= (int)sizeof(sljit_sw);
2226     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
2227     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
2228     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2229     stackpos -= (int)sizeof(sljit_sw);
2230     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
2231     stackpos -= (int)sizeof(sljit_sw);
2232 
2233     cc += 1 + LINK_SIZE + IMM2_SIZE;
2234     break;
2235 
2236     default:
2237     cc = next_opcode(common, cc);
2238     SLJIT_ASSERT(cc != NULL);
2239     break;
2240     }
2241 
2242 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
2243 SLJIT_ASSERT(stackpos == STACK(stacktop));
2244 }
2245 
2246 #define RECURSE_TMP_REG_COUNT 3
2247 
2248 typedef struct delayed_mem_copy_status {
2249   struct sljit_compiler *compiler;
2250   int store_bases[RECURSE_TMP_REG_COUNT];
2251   int store_offsets[RECURSE_TMP_REG_COUNT];
2252   int tmp_regs[RECURSE_TMP_REG_COUNT];
2253   int saved_tmp_regs[RECURSE_TMP_REG_COUNT];
2254   int next_tmp_reg;
2255 } delayed_mem_copy_status;
2256 
delayed_mem_copy_init(delayed_mem_copy_status * status,compiler_common * common)2257 static void delayed_mem_copy_init(delayed_mem_copy_status *status, compiler_common *common)
2258 {
2259 int i;
2260 
2261 for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2262   {
2263   SLJIT_ASSERT(status->tmp_regs[i] >= 0);
2264   SLJIT_ASSERT(sljit_get_register_index(status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]);
2265 
2266   status->store_bases[i] = -1;
2267   }
2268 status->next_tmp_reg = 0;
2269 status->compiler = common->compiler;
2270 }
2271 
delayed_mem_copy_move(delayed_mem_copy_status * status,int load_base,sljit_sw load_offset,int store_base,sljit_sw store_offset)2272 static void delayed_mem_copy_move(delayed_mem_copy_status *status, int load_base, sljit_sw load_offset,
2273   int store_base, sljit_sw store_offset)
2274 {
2275 struct sljit_compiler *compiler = status->compiler;
2276 int next_tmp_reg = status->next_tmp_reg;
2277 int tmp_reg = status->tmp_regs[next_tmp_reg];
2278 
2279 SLJIT_ASSERT(load_base > 0 && store_base > 0);
2280 
2281 if (status->store_bases[next_tmp_reg] == -1)
2282   {
2283   /* Preserve virtual registers. */
2284   if (sljit_get_register_index(status->saved_tmp_regs[next_tmp_reg]) < 0)
2285     OP1(SLJIT_MOV, status->saved_tmp_regs[next_tmp_reg], 0, tmp_reg, 0);
2286   }
2287 else
2288   OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2289 
2290 OP1(SLJIT_MOV, tmp_reg, 0, SLJIT_MEM1(load_base), load_offset);
2291 status->store_bases[next_tmp_reg] = store_base;
2292 status->store_offsets[next_tmp_reg] = store_offset;
2293 
2294 status->next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2295 }
2296 
delayed_mem_copy_finish(delayed_mem_copy_status * status)2297 static void delayed_mem_copy_finish(delayed_mem_copy_status *status)
2298 {
2299 struct sljit_compiler *compiler = status->compiler;
2300 int next_tmp_reg = status->next_tmp_reg;
2301 int tmp_reg, saved_tmp_reg, i;
2302 
2303 for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2304   {
2305   if (status->store_bases[next_tmp_reg] != -1)
2306     {
2307     tmp_reg = status->tmp_regs[next_tmp_reg];
2308     saved_tmp_reg = status->saved_tmp_regs[next_tmp_reg];
2309 
2310     OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2311 
2312     /* Restore virtual registers. */
2313     if (sljit_get_register_index(saved_tmp_reg) < 0)
2314       OP1(SLJIT_MOV, tmp_reg, 0, saved_tmp_reg, 0);
2315     }
2316 
2317   next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2318   }
2319 }
2320 
2321 #undef RECURSE_TMP_REG_COUNT
2322 
recurse_check_bit(compiler_common * common,sljit_sw bit_index)2323 static BOOL recurse_check_bit(compiler_common *common, sljit_sw bit_index)
2324 {
2325 uint8_t *byte;
2326 uint8_t mask;
2327 
2328 SLJIT_ASSERT((bit_index & (sizeof(sljit_sw) - 1)) == 0);
2329 
2330 bit_index >>= SLJIT_WORD_SHIFT;
2331 
2332 SLJIT_ASSERT((bit_index >> 3) < common->recurse_bitset_size);
2333 
2334 mask = 1 << (bit_index & 0x7);
2335 byte = common->recurse_bitset + (bit_index >> 3);
2336 
2337 if (*byte & mask)
2338   return FALSE;
2339 
2340 *byte |= mask;
2341 return TRUE;
2342 }
2343 
2344 enum get_recurse_flags {
2345   recurse_flag_quit_found = (1 << 0),
2346   recurse_flag_accept_found = (1 << 1),
2347   recurse_flag_setsom_found = (1 << 2),
2348   recurse_flag_setmark_found = (1 << 3),
2349   recurse_flag_control_head_found = (1 << 4),
2350 };
2351 
get_recurse_data_length(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,uint32_t * result_flags)2352 static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, uint32_t *result_flags)
2353 {
2354 int length = 1;
2355 int size, offset;
2356 PCRE2_SPTR alternative;
2357 uint32_t recurse_flags = 0;
2358 
2359 memset(common->recurse_bitset, 0, common->recurse_bitset_size);
2360 
2361 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2362 SLJIT_ASSERT(common->control_head_ptr != 0);
2363 recurse_flags |= recurse_flag_control_head_found;
2364 #endif
2365 
2366 /* Calculate the sum of the private machine words. */
2367 while (cc < ccend)
2368   {
2369   size = 0;
2370   switch(*cc)
2371     {
2372     case OP_SET_SOM:
2373     SLJIT_ASSERT(common->has_set_som);
2374     recurse_flags |= recurse_flag_setsom_found;
2375     cc += 1;
2376     break;
2377 
2378     case OP_RECURSE:
2379     if (common->has_set_som)
2380       recurse_flags |= recurse_flag_setsom_found;
2381     if (common->mark_ptr != 0)
2382       recurse_flags |= recurse_flag_setmark_found;
2383     if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2384       length++;
2385     cc += 1 + LINK_SIZE;
2386     break;
2387 
2388     case OP_KET:
2389     offset = PRIVATE_DATA(cc);
2390     if (offset != 0)
2391       {
2392       if (recurse_check_bit(common, offset))
2393         length++;
2394       SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2395       cc += PRIVATE_DATA(cc + 1);
2396       }
2397     cc += 1 + LINK_SIZE;
2398     break;
2399 
2400     case OP_ASSERT:
2401     case OP_ASSERT_NOT:
2402     case OP_ASSERTBACK:
2403     case OP_ASSERTBACK_NOT:
2404     case OP_ASSERT_NA:
2405     case OP_ASSERTBACK_NA:
2406     case OP_ONCE:
2407     case OP_SCRIPT_RUN:
2408     case OP_BRAPOS:
2409     case OP_SBRA:
2410     case OP_SBRAPOS:
2411     case OP_SCOND:
2412     SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
2413     if (recurse_check_bit(common, PRIVATE_DATA(cc)))
2414       length++;
2415     cc += 1 + LINK_SIZE;
2416     break;
2417 
2418     case OP_CBRA:
2419     case OP_SCBRA:
2420     offset = GET2(cc, 1 + LINK_SIZE);
2421     if (recurse_check_bit(common, OVECTOR(offset << 1)))
2422       {
2423       SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
2424       length += 2;
2425       }
2426     if (common->optimized_cbracket[offset] == 0 && recurse_check_bit(common, OVECTOR_PRIV(offset)))
2427       length++;
2428     if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2429       length++;
2430     cc += 1 + LINK_SIZE + IMM2_SIZE;
2431     break;
2432 
2433     case OP_CBRAPOS:
2434     case OP_SCBRAPOS:
2435     offset = GET2(cc, 1 + LINK_SIZE);
2436     if (recurse_check_bit(common, OVECTOR(offset << 1)))
2437       {
2438       SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
2439       length += 2;
2440       }
2441     if (recurse_check_bit(common, OVECTOR_PRIV(offset)))
2442       length++;
2443     if (recurse_check_bit(common, PRIVATE_DATA(cc)))
2444       length++;
2445     if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2446       length++;
2447     cc += 1 + LINK_SIZE + IMM2_SIZE;
2448     break;
2449 
2450     case OP_COND:
2451     /* Might be a hidden SCOND. */
2452     alternative = cc + GET(cc, 1);
2453     if ((*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) && recurse_check_bit(common, PRIVATE_DATA(cc)))
2454       length++;
2455     cc += 1 + LINK_SIZE;
2456     break;
2457 
2458     CASE_ITERATOR_PRIVATE_DATA_1
2459     offset = PRIVATE_DATA(cc);
2460     if (offset != 0 && recurse_check_bit(common, offset))
2461       length++;
2462     cc += 2;
2463 #ifdef SUPPORT_UNICODE
2464     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2465 #endif
2466     break;
2467 
2468     CASE_ITERATOR_PRIVATE_DATA_2A
2469     offset = PRIVATE_DATA(cc);
2470     if (offset != 0 && recurse_check_bit(common, offset))
2471       {
2472       SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2473       length += 2;
2474       }
2475     cc += 2;
2476 #ifdef SUPPORT_UNICODE
2477     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2478 #endif
2479     break;
2480 
2481     CASE_ITERATOR_PRIVATE_DATA_2B
2482     offset = PRIVATE_DATA(cc);
2483     if (offset != 0 && recurse_check_bit(common, offset))
2484       {
2485       SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2486       length += 2;
2487       }
2488     cc += 2 + IMM2_SIZE;
2489 #ifdef SUPPORT_UNICODE
2490     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2491 #endif
2492     break;
2493 
2494     CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2495     offset = PRIVATE_DATA(cc);
2496     if (offset != 0 && recurse_check_bit(common, offset))
2497       length++;
2498     cc += 1;
2499     break;
2500 
2501     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2502     offset = PRIVATE_DATA(cc);
2503     if (offset != 0 && recurse_check_bit(common, offset))
2504       {
2505       SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2506       length += 2;
2507       }
2508     cc += 1;
2509     break;
2510 
2511     CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2512     offset = PRIVATE_DATA(cc);
2513     if (offset != 0 && recurse_check_bit(common, offset))
2514       {
2515       SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2516       length += 2;
2517       }
2518     cc += 1 + IMM2_SIZE;
2519     break;
2520 
2521     case OP_CLASS:
2522     case OP_NCLASS:
2523 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2524     case OP_XCLASS:
2525     size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2526 #else
2527     size = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2528 #endif
2529 
2530     offset = PRIVATE_DATA(cc);
2531     if (offset != 0 && recurse_check_bit(common, offset))
2532       length += get_class_iterator_size(cc + size);
2533     cc += size;
2534     break;
2535 
2536     case OP_MARK:
2537     case OP_COMMIT_ARG:
2538     case OP_PRUNE_ARG:
2539     case OP_THEN_ARG:
2540     SLJIT_ASSERT(common->mark_ptr != 0);
2541     recurse_flags |= recurse_flag_setmark_found;
2542     if (common->control_head_ptr != 0)
2543       recurse_flags |= recurse_flag_control_head_found;
2544     if (*cc != OP_MARK)
2545       recurse_flags |= recurse_flag_quit_found;
2546 
2547     cc += 1 + 2 + cc[1];
2548     break;
2549 
2550     case OP_PRUNE:
2551     case OP_SKIP:
2552     case OP_COMMIT:
2553     recurse_flags |= recurse_flag_quit_found;
2554     cc++;
2555     break;
2556 
2557     case OP_SKIP_ARG:
2558     recurse_flags |= recurse_flag_quit_found;
2559     cc += 1 + 2 + cc[1];
2560     break;
2561 
2562     case OP_THEN:
2563     SLJIT_ASSERT(common->control_head_ptr != 0);
2564     recurse_flags |= recurse_flag_quit_found | recurse_flag_control_head_found;
2565     cc++;
2566     break;
2567 
2568     case OP_ACCEPT:
2569     case OP_ASSERT_ACCEPT:
2570     recurse_flags |= recurse_flag_accept_found;
2571     cc++;
2572     break;
2573 
2574     default:
2575     cc = next_opcode(common, cc);
2576     SLJIT_ASSERT(cc != NULL);
2577     break;
2578     }
2579   }
2580 SLJIT_ASSERT(cc == ccend);
2581 
2582 if (recurse_flags & recurse_flag_control_head_found)
2583   length++;
2584 if (recurse_flags & recurse_flag_quit_found)
2585   {
2586   if (recurse_flags & recurse_flag_setsom_found)
2587     length++;
2588   if (recurse_flags & recurse_flag_setmark_found)
2589     length++;
2590   }
2591 
2592 *result_flags = recurse_flags;
2593 return length;
2594 }
2595 
2596 enum copy_recurse_data_types {
2597   recurse_copy_from_global,
2598   recurse_copy_private_to_global,
2599   recurse_copy_shared_to_global,
2600   recurse_copy_kept_shared_to_global,
2601   recurse_swap_global
2602 };
2603 
copy_recurse_data(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,int type,int stackptr,int stacktop,uint32_t recurse_flags)2604 static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
2605   int type, int stackptr, int stacktop, uint32_t recurse_flags)
2606 {
2607 delayed_mem_copy_status status;
2608 PCRE2_SPTR alternative;
2609 sljit_sw private_srcw[2];
2610 sljit_sw shared_srcw[3];
2611 sljit_sw kept_shared_srcw[2];
2612 int private_count, shared_count, kept_shared_count;
2613 int from_sp, base_reg, offset, i;
2614 
2615 memset(common->recurse_bitset, 0, common->recurse_bitset_size);
2616 
2617 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2618 SLJIT_ASSERT(common->control_head_ptr != 0);
2619 recurse_check_bit(common, common->control_head_ptr);
2620 #endif
2621 
2622 switch (type)
2623   {
2624   case recurse_copy_from_global:
2625   from_sp = TRUE;
2626   base_reg = STACK_TOP;
2627   break;
2628 
2629   case recurse_copy_private_to_global:
2630   case recurse_copy_shared_to_global:
2631   case recurse_copy_kept_shared_to_global:
2632   from_sp = FALSE;
2633   base_reg = STACK_TOP;
2634   break;
2635 
2636   default:
2637   SLJIT_ASSERT(type == recurse_swap_global);
2638   from_sp = FALSE;
2639   base_reg = TMP2;
2640   break;
2641   }
2642 
2643 stackptr = STACK(stackptr);
2644 stacktop = STACK(stacktop);
2645 
2646 status.tmp_regs[0] = TMP1;
2647 status.saved_tmp_regs[0] = TMP1;
2648 
2649 if (base_reg != TMP2)
2650   {
2651   status.tmp_regs[1] = TMP2;
2652   status.saved_tmp_regs[1] = TMP2;
2653   }
2654 else
2655   {
2656   status.saved_tmp_regs[1] = RETURN_ADDR;
2657   if (HAS_VIRTUAL_REGISTERS)
2658     status.tmp_regs[1] = STR_PTR;
2659   else
2660     status.tmp_regs[1] = RETURN_ADDR;
2661   }
2662 
2663 status.saved_tmp_regs[2] = TMP3;
2664 if (HAS_VIRTUAL_REGISTERS)
2665   status.tmp_regs[2] = STR_END;
2666 else
2667   status.tmp_regs[2] = TMP3;
2668 
2669 delayed_mem_copy_init(&status, common);
2670 
2671 if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2672   {
2673   SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2674 
2675   if (!from_sp)
2676     delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->recursive_head_ptr);
2677 
2678   if (from_sp || type == recurse_swap_global)
2679     delayed_mem_copy_move(&status, SLJIT_SP, common->recursive_head_ptr, base_reg, stackptr);
2680   }
2681 
2682 stackptr += sizeof(sljit_sw);
2683 
2684 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2685 if (type != recurse_copy_shared_to_global)
2686   {
2687   if (!from_sp)
2688     delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->control_head_ptr);
2689 
2690   if (from_sp || type == recurse_swap_global)
2691     delayed_mem_copy_move(&status, SLJIT_SP, common->control_head_ptr, base_reg, stackptr);
2692   }
2693 
2694 stackptr += sizeof(sljit_sw);
2695 #endif
2696 
2697 while (cc < ccend)
2698   {
2699   private_count = 0;
2700   shared_count = 0;
2701   kept_shared_count = 0;
2702 
2703   switch(*cc)
2704     {
2705     case OP_SET_SOM:
2706     SLJIT_ASSERT(common->has_set_som);
2707     if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, OVECTOR(0)))
2708       {
2709       kept_shared_srcw[0] = OVECTOR(0);
2710       kept_shared_count = 1;
2711       }
2712     cc += 1;
2713     break;
2714 
2715     case OP_RECURSE:
2716     if (recurse_flags & recurse_flag_quit_found)
2717       {
2718       if (common->has_set_som && recurse_check_bit(common, OVECTOR(0)))
2719         {
2720         kept_shared_srcw[0] = OVECTOR(0);
2721         kept_shared_count = 1;
2722         }
2723       if (common->mark_ptr != 0 && recurse_check_bit(common, common->mark_ptr))
2724         {
2725         kept_shared_srcw[kept_shared_count] = common->mark_ptr;
2726         kept_shared_count++;
2727         }
2728       }
2729     if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2730       {
2731       shared_srcw[0] = common->capture_last_ptr;
2732       shared_count = 1;
2733       }
2734     cc += 1 + LINK_SIZE;
2735     break;
2736 
2737     case OP_KET:
2738     private_srcw[0] = PRIVATE_DATA(cc);
2739     if (private_srcw[0] != 0)
2740       {
2741       if (recurse_check_bit(common, private_srcw[0]))
2742         private_count = 1;
2743       SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2744       cc += PRIVATE_DATA(cc + 1);
2745       }
2746     cc += 1 + LINK_SIZE;
2747     break;
2748 
2749     case OP_ASSERT:
2750     case OP_ASSERT_NOT:
2751     case OP_ASSERTBACK:
2752     case OP_ASSERTBACK_NOT:
2753     case OP_ASSERT_NA:
2754     case OP_ASSERTBACK_NA:
2755     case OP_ONCE:
2756     case OP_SCRIPT_RUN:
2757     case OP_BRAPOS:
2758     case OP_SBRA:
2759     case OP_SBRAPOS:
2760     case OP_SCOND:
2761     private_srcw[0] = PRIVATE_DATA(cc);
2762     if (recurse_check_bit(common, private_srcw[0]))
2763       private_count = 1;
2764     cc += 1 + LINK_SIZE;
2765     break;
2766 
2767     case OP_CBRA:
2768     case OP_SCBRA:
2769     offset = GET2(cc, 1 + LINK_SIZE);
2770     shared_srcw[0] = OVECTOR(offset << 1);
2771     if (recurse_check_bit(common, shared_srcw[0]))
2772       {
2773       shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
2774       SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
2775       shared_count = 2;
2776       }
2777 
2778     if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2779       {
2780       shared_srcw[shared_count] = common->capture_last_ptr;
2781       shared_count++;
2782       }
2783 
2784     if (common->optimized_cbracket[offset] == 0)
2785       {
2786       private_srcw[0] = OVECTOR_PRIV(offset);
2787       if (recurse_check_bit(common, private_srcw[0]))
2788         private_count = 1;
2789       }
2790 
2791     cc += 1 + LINK_SIZE + IMM2_SIZE;
2792     break;
2793 
2794     case OP_CBRAPOS:
2795     case OP_SCBRAPOS:
2796     offset = GET2(cc, 1 + LINK_SIZE);
2797     shared_srcw[0] = OVECTOR(offset << 1);
2798     if (recurse_check_bit(common, shared_srcw[0]))
2799       {
2800       shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
2801       SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
2802       shared_count = 2;
2803       }
2804 
2805     if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2806       {
2807       shared_srcw[shared_count] = common->capture_last_ptr;
2808       shared_count++;
2809       }
2810 
2811     private_srcw[0] = PRIVATE_DATA(cc);
2812     if (recurse_check_bit(common, private_srcw[0]))
2813       private_count = 1;
2814 
2815     offset = OVECTOR_PRIV(offset);
2816     if (recurse_check_bit(common, offset))
2817       {
2818       private_srcw[private_count] = offset;
2819       private_count++;
2820       }
2821     cc += 1 + LINK_SIZE + IMM2_SIZE;
2822     break;
2823 
2824     case OP_COND:
2825     /* Might be a hidden SCOND. */
2826     alternative = cc + GET(cc, 1);
2827     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2828       {
2829       private_srcw[0] = PRIVATE_DATA(cc);
2830       if (recurse_check_bit(common, private_srcw[0]))
2831         private_count = 1;
2832       }
2833     cc += 1 + LINK_SIZE;
2834     break;
2835 
2836     CASE_ITERATOR_PRIVATE_DATA_1
2837     private_srcw[0] = PRIVATE_DATA(cc);
2838     if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2839       private_count = 1;
2840     cc += 2;
2841 #ifdef SUPPORT_UNICODE
2842     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2843 #endif
2844     break;
2845 
2846     CASE_ITERATOR_PRIVATE_DATA_2A
2847     private_srcw[0] = PRIVATE_DATA(cc);
2848     if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2849       {
2850       private_count = 2;
2851       private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2852       SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2853       }
2854     cc += 2;
2855 #ifdef SUPPORT_UNICODE
2856     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2857 #endif
2858     break;
2859 
2860     CASE_ITERATOR_PRIVATE_DATA_2B
2861     private_srcw[0] = PRIVATE_DATA(cc);
2862     if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2863       {
2864       private_count = 2;
2865       private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2866       SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2867       }
2868     cc += 2 + IMM2_SIZE;
2869 #ifdef SUPPORT_UNICODE
2870     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2871 #endif
2872     break;
2873 
2874     CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2875     private_srcw[0] = PRIVATE_DATA(cc);
2876     if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2877       private_count = 1;
2878     cc += 1;
2879     break;
2880 
2881     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2882     private_srcw[0] = PRIVATE_DATA(cc);
2883     if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2884       {
2885       private_count = 2;
2886       private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2887       SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2888       }
2889     cc += 1;
2890     break;
2891 
2892     CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2893     private_srcw[0] = PRIVATE_DATA(cc);
2894     if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2895       {
2896       private_count = 2;
2897       private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2898       SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2899       }
2900     cc += 1 + IMM2_SIZE;
2901     break;
2902 
2903     case OP_CLASS:
2904     case OP_NCLASS:
2905 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2906     case OP_XCLASS:
2907     i = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2908 #else
2909     i = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2910 #endif
2911     if (PRIVATE_DATA(cc) != 0)
2912       {
2913       private_count = 1;
2914       private_srcw[0] = PRIVATE_DATA(cc);
2915       switch(get_class_iterator_size(cc + i))
2916         {
2917         case 1:
2918         break;
2919 
2920         case 2:
2921         if (recurse_check_bit(common, private_srcw[0]))
2922           {
2923           private_count = 2;
2924           private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2925           SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2926           }
2927         break;
2928 
2929         default:
2930         SLJIT_UNREACHABLE();
2931         break;
2932         }
2933       }
2934     cc += i;
2935     break;
2936 
2937     case OP_MARK:
2938     case OP_COMMIT_ARG:
2939     case OP_PRUNE_ARG:
2940     case OP_THEN_ARG:
2941     SLJIT_ASSERT(common->mark_ptr != 0);
2942     if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, common->mark_ptr))
2943       {
2944       kept_shared_srcw[0] = common->mark_ptr;
2945       kept_shared_count = 1;
2946       }
2947     if (common->control_head_ptr != 0 && recurse_check_bit(common, common->control_head_ptr))
2948       {
2949       private_srcw[0] = common->control_head_ptr;
2950       private_count = 1;
2951       }
2952     cc += 1 + 2 + cc[1];
2953     break;
2954 
2955     case OP_THEN:
2956     SLJIT_ASSERT(common->control_head_ptr != 0);
2957     if (recurse_check_bit(common, common->control_head_ptr))
2958       {
2959       private_srcw[0] = common->control_head_ptr;
2960       private_count = 1;
2961       }
2962     cc++;
2963     break;
2964 
2965     default:
2966     cc = next_opcode(common, cc);
2967     SLJIT_ASSERT(cc != NULL);
2968     continue;
2969     }
2970 
2971   if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2972     {
2973     SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2974 
2975     for (i = 0; i < private_count; i++)
2976       {
2977       SLJIT_ASSERT(private_srcw[i] != 0);
2978 
2979       if (!from_sp)
2980         delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, private_srcw[i]);
2981 
2982       if (from_sp || type == recurse_swap_global)
2983         delayed_mem_copy_move(&status, SLJIT_SP, private_srcw[i], base_reg, stackptr);
2984 
2985       stackptr += sizeof(sljit_sw);
2986       }
2987     }
2988   else
2989     stackptr += sizeof(sljit_sw) * private_count;
2990 
2991   if (type != recurse_copy_private_to_global && type != recurse_copy_kept_shared_to_global)
2992     {
2993     SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_swap_global);
2994 
2995     for (i = 0; i < shared_count; i++)
2996       {
2997       SLJIT_ASSERT(shared_srcw[i] != 0);
2998 
2999       if (!from_sp)
3000         delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, shared_srcw[i]);
3001 
3002       if (from_sp || type == recurse_swap_global)
3003         delayed_mem_copy_move(&status, SLJIT_SP, shared_srcw[i], base_reg, stackptr);
3004 
3005       stackptr += sizeof(sljit_sw);
3006       }
3007     }
3008   else
3009     stackptr += sizeof(sljit_sw) * shared_count;
3010 
3011   if (type != recurse_copy_private_to_global && type != recurse_swap_global)
3012     {
3013     SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_copy_kept_shared_to_global);
3014 
3015     for (i = 0; i < kept_shared_count; i++)
3016       {
3017       SLJIT_ASSERT(kept_shared_srcw[i] != 0);
3018 
3019       if (!from_sp)
3020         delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, kept_shared_srcw[i]);
3021 
3022       if (from_sp || type == recurse_swap_global)
3023         delayed_mem_copy_move(&status, SLJIT_SP, kept_shared_srcw[i], base_reg, stackptr);
3024 
3025       stackptr += sizeof(sljit_sw);
3026       }
3027     }
3028   else
3029     stackptr += sizeof(sljit_sw) * kept_shared_count;
3030   }
3031 
3032 SLJIT_ASSERT(cc == ccend && stackptr == stacktop);
3033 
3034 delayed_mem_copy_finish(&status);
3035 }
3036 
set_then_offsets(compiler_common * common,PCRE2_SPTR cc,sljit_u8 * current_offset)3037 static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_SPTR cc, sljit_u8 *current_offset)
3038 {
3039 PCRE2_SPTR end = bracketend(cc);
3040 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
3041 
3042 /* Assert captures then. */
3043 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA)
3044   current_offset = NULL;
3045 /* Conditional block does not. */
3046 if (*cc == OP_COND || *cc == OP_SCOND)
3047   has_alternatives = FALSE;
3048 
3049 cc = next_opcode(common, cc);
3050 if (has_alternatives)
3051   current_offset = common->then_offsets + (cc - common->start);
3052 
3053 while (cc < end)
3054   {
3055   if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
3056     cc = set_then_offsets(common, cc, current_offset);
3057   else
3058     {
3059     if (*cc == OP_ALT && has_alternatives)
3060       current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
3061     if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
3062       *current_offset = 1;
3063     cc = next_opcode(common, cc);
3064     }
3065   }
3066 
3067 return end;
3068 }
3069 
3070 #undef CASE_ITERATOR_PRIVATE_DATA_1
3071 #undef CASE_ITERATOR_PRIVATE_DATA_2A
3072 #undef CASE_ITERATOR_PRIVATE_DATA_2B
3073 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
3074 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
3075 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
3076 
is_powerof2(unsigned int value)3077 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
3078 {
3079 return (value & (value - 1)) == 0;
3080 }
3081 
set_jumps(jump_list * list,struct sljit_label * label)3082 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
3083 {
3084 while (list)
3085   {
3086   /* sljit_set_label is clever enough to do nothing
3087   if either the jump or the label is NULL. */
3088   SET_LABEL(list->jump, label);
3089   list = list->next;
3090   }
3091 }
3092 
add_jump(struct sljit_compiler * compiler,jump_list ** list,struct sljit_jump * jump)3093 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
3094 {
3095 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
3096 if (list_item)
3097   {
3098   list_item->next = *list;
3099   list_item->jump = jump;
3100   *list = list_item;
3101   }
3102 }
3103 
add_stub(compiler_common * common,struct sljit_jump * start)3104 static void add_stub(compiler_common *common, struct sljit_jump *start)
3105 {
3106 DEFINE_COMPILER;
3107 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
3108 
3109 if (list_item)
3110   {
3111   list_item->start = start;
3112   list_item->quit = LABEL();
3113   list_item->next = common->stubs;
3114   common->stubs = list_item;
3115   }
3116 }
3117 
flush_stubs(compiler_common * common)3118 static void flush_stubs(compiler_common *common)
3119 {
3120 DEFINE_COMPILER;
3121 stub_list *list_item = common->stubs;
3122 
3123 while (list_item)
3124   {
3125   JUMPHERE(list_item->start);
3126   add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
3127   JUMPTO(SLJIT_JUMP, list_item->quit);
3128   list_item = list_item->next;
3129   }
3130 common->stubs = NULL;
3131 }
3132 
count_match(compiler_common * common)3133 static SLJIT_INLINE void count_match(compiler_common *common)
3134 {
3135 DEFINE_COMPILER;
3136 
3137 OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
3138 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
3139 }
3140 
allocate_stack(compiler_common * common,int size)3141 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
3142 {
3143 /* May destroy all locals and registers except TMP2. */
3144 DEFINE_COMPILER;
3145 
3146 SLJIT_ASSERT(size > 0);
3147 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
3148 #ifdef DESTROY_REGISTERS
3149 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
3150 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3151 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3152 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
3153 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
3154 #endif
3155 add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
3156 }
3157 
free_stack(compiler_common * common,int size)3158 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
3159 {
3160 DEFINE_COMPILER;
3161 
3162 SLJIT_ASSERT(size > 0);
3163 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
3164 }
3165 
allocate_read_only_data(compiler_common * common,sljit_uw size)3166 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
3167 {
3168 DEFINE_COMPILER;
3169 sljit_uw *result;
3170 
3171 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3172   return NULL;
3173 
3174 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
3175 if (SLJIT_UNLIKELY(result == NULL))
3176   {
3177   sljit_set_compiler_memory_error(compiler);
3178   return NULL;
3179   }
3180 
3181 *(void**)result = common->read_only_data_head;
3182 common->read_only_data_head = (void *)result;
3183 return result + 1;
3184 }
3185 
reset_ovector(compiler_common * common,int length)3186 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
3187 {
3188 DEFINE_COMPILER;
3189 struct sljit_label *loop;
3190 sljit_s32 i;
3191 
3192 /* At this point we can freely use all temporary registers. */
3193 SLJIT_ASSERT(length > 1);
3194 /* TMP1 returns with begin - 1. */
3195 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
3196 if (length < 8)
3197   {
3198   for (i = 1; i < length; i++)
3199     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
3200   }
3201 else
3202   {
3203   if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3204     {
3205     GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
3206     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3207     loop = LABEL();
3208     sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
3209     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3210     JUMPTO(SLJIT_NOT_ZERO, loop);
3211     }
3212   else
3213     {
3214     GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));
3215     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3216     loop = LABEL();
3217     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);
3218     OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));
3219     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3220     JUMPTO(SLJIT_NOT_ZERO, loop);
3221     }
3222   }
3223 }
3224 
reset_early_fail(compiler_common * common)3225 static SLJIT_INLINE void reset_early_fail(compiler_common *common)
3226 {
3227 DEFINE_COMPILER;
3228 sljit_u32 size = (sljit_u32)(common->early_fail_end_ptr - common->early_fail_start_ptr);
3229 sljit_u32 uncleared_size;
3230 sljit_s32 src = SLJIT_IMM;
3231 sljit_s32 i;
3232 struct sljit_label *loop;
3233 
3234 SLJIT_ASSERT(common->early_fail_start_ptr < common->early_fail_end_ptr);
3235 
3236 if (size == sizeof(sljit_sw))
3237   {
3238   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->early_fail_start_ptr, SLJIT_IMM, 0);
3239   return;
3240   }
3241 
3242 if (sljit_get_register_index(TMP3) >= 0 && !sljit_has_cpu_feature(SLJIT_HAS_ZERO_REGISTER))
3243   {
3244   OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
3245   src = TMP3;
3246   }
3247 
3248 if (size <= 6 * sizeof(sljit_sw))
3249   {
3250   for (i = common->early_fail_start_ptr; i < common->early_fail_end_ptr; i += sizeof(sljit_sw))
3251     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, src, 0);
3252   return;
3253   }
3254 
3255 GET_LOCAL_BASE(TMP1, 0, common->early_fail_start_ptr);
3256 
3257 uncleared_size = ((size / sizeof(sljit_sw)) % 3) * sizeof(sljit_sw);
3258 
3259 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, size - uncleared_size);
3260 
3261 loop = LABEL();
3262 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3263 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3264 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * (sljit_sw)sizeof(sljit_sw), src, 0);
3265 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * (sljit_sw)sizeof(sljit_sw), src, 0);
3266 CMPTO(SLJIT_LESS, TMP1, 0, TMP2, 0, loop);
3267 
3268 if (uncleared_size >= sizeof(sljit_sw))
3269   OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3270 
3271 if (uncleared_size >= 2 * sizeof(sljit_sw))
3272   OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), sizeof(sljit_sw), src, 0);
3273 }
3274 
do_reset_match(compiler_common * common,int length)3275 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
3276 {
3277 DEFINE_COMPILER;
3278 struct sljit_label *loop;
3279 int i;
3280 
3281 SLJIT_ASSERT(length > 1);
3282 /* OVECTOR(1) contains the "string begin - 1" constant. */
3283 if (length > 2)
3284   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3285 if (length < 8)
3286   {
3287   for (i = 2; i < length; i++)
3288     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
3289   }
3290 else
3291   {
3292   if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3293     {
3294     GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
3295     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3296     loop = LABEL();
3297     sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
3298     OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3299     JUMPTO(SLJIT_NOT_ZERO, loop);
3300     }
3301   else
3302     {
3303     GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));
3304     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3305     loop = LABEL();
3306     OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
3307     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));
3308     OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3309     JUMPTO(SLJIT_NOT_ZERO, loop);
3310     }
3311   }
3312 
3313 if (!HAS_VIRTUAL_REGISTERS)
3314   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, stack));
3315 else
3316   OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
3317 
3318 if (common->mark_ptr != 0)
3319   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
3320 if (common->control_head_ptr != 0)
3321   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
3322 if (HAS_VIRTUAL_REGISTERS)
3323   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
3324 
3325 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3326 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));
3327 }
3328 
do_search_mark(sljit_sw * current,PCRE2_SPTR skip_arg)3329 static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg)
3330 {
3331 while (current != NULL)
3332   {
3333   switch (current[1])
3334     {
3335     case type_then_trap:
3336     break;
3337 
3338     case type_mark:
3339     if (PRIV(strcmp)(skip_arg, (PCRE2_SPTR)current[2]) == 0)
3340       return current[3];
3341     break;
3342 
3343     default:
3344     SLJIT_UNREACHABLE();
3345     break;
3346     }
3347   SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
3348   current = (sljit_sw*)current[0];
3349   }
3350 return 0;
3351 }
3352 
copy_ovector(compiler_common * common,int topbracket)3353 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
3354 {
3355 DEFINE_COMPILER;
3356 struct sljit_label *loop;
3357 BOOL has_pre;
3358 
3359 /* At this point we can freely use all registers. */
3360 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3361 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
3362 
3363 if (HAS_VIRTUAL_REGISTERS)
3364   {
3365   OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
3366   OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3367   if (common->mark_ptr != 0)
3368     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3369   OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount));
3370   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3371   if (common->mark_ptr != 0)
3372     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
3373   OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, match_data),
3374     SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3375   }
3376 else
3377   {
3378   OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3379   OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, match_data));
3380   if (common->mark_ptr != 0)
3381     OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3382   OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, oveccount));
3383   OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3384   if (common->mark_ptr != 0)
3385     OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R0, 0);
3386   OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3387   }
3388 
3389 has_pre = sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
3390 
3391 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
3392 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
3393 
3394 loop = LABEL();
3395 
3396 if (has_pre)
3397   sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
3398 else
3399   {
3400   OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
3401   OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
3402   }
3403 
3404 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(PCRE2_SIZE));
3405 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);
3406 /* Copy the integer value to the output buffer */
3407 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3408 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
3409 #endif
3410 
3411 SLJIT_ASSERT(sizeof(PCRE2_SIZE) == 4 || sizeof(PCRE2_SIZE) == 8);
3412 OP1(((sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV), SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);
3413 
3414 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3415 JUMPTO(SLJIT_NOT_ZERO, loop);
3416 
3417 /* Calculate the return value, which is the maximum ovector value. */
3418 if (topbracket > 1)
3419   {
3420   if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))) == SLJIT_SUCCESS)
3421     {
3422     GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
3423     OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3424 
3425     /* OVECTOR(0) is never equal to SLJIT_S2. */
3426     loop = LABEL();
3427     sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
3428     OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3429     CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3430     OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3431     }
3432   else
3433     {
3434     GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));
3435     OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3436 
3437     /* OVECTOR(0) is never equal to SLJIT_S2. */
3438     loop = LABEL();
3439     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
3440     OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * (sljit_sw)sizeof(sljit_sw));
3441     OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3442     CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3443     OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3444     }
3445   }
3446 else
3447   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
3448 }
3449 
return_with_partial_match(compiler_common * common,struct sljit_label * quit)3450 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
3451 {
3452 DEFINE_COMPILER;
3453 sljit_s32 mov_opcode;
3454 sljit_s32 arguments_reg = !HAS_VIRTUAL_REGISTERS ? ARGUMENTS : SLJIT_R1;
3455 
3456 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S0, str_end_must_be_saved_reg0);
3457 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
3458   && (common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start != 0 : common->hit_start == 0));
3459 
3460 if (arguments_reg != ARGUMENTS)
3461   OP1(SLJIT_MOV, arguments_reg, 0, ARGUMENTS, 0);
3462 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP),
3463   common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start : common->start_ptr);
3464 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_PARTIAL);
3465 
3466 /* Store match begin and end. */
3467 OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, begin));
3468 OP1(SLJIT_MOV, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0);
3469 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, match_data));
3470 
3471 mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
3472 
3473 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S1, 0);
3474 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3475 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
3476 #endif
3477 OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector), SLJIT_R2, 0);
3478 
3479 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_S1, 0);
3480 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3481 OP2(SLJIT_ASHR, STR_END, 0, STR_END, 0, SLJIT_IMM, UCHAR_SHIFT);
3482 #endif
3483 OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector) + sizeof(PCRE2_SIZE), STR_END, 0);
3484 
3485 JUMPTO(SLJIT_JUMP, quit);
3486 }
3487 
check_start_used_ptr(compiler_common * common)3488 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
3489 {
3490 /* May destroy TMP1. */
3491 DEFINE_COMPILER;
3492 struct sljit_jump *jump;
3493 
3494 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3495   {
3496   /* The value of -1 must be kept for start_used_ptr! */
3497   OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
3498   /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
3499   is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
3500   jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
3501   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3502   JUMPHERE(jump);
3503   }
3504 else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3505   {
3506   jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3507   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3508   JUMPHERE(jump);
3509   }
3510 }
3511 
char_has_othercase(compiler_common * common,PCRE2_SPTR cc)3512 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR cc)
3513 {
3514 /* Detects if the character has an othercase. */
3515 unsigned int c;
3516 
3517 #ifdef SUPPORT_UNICODE
3518 if (common->utf || common->ucp)
3519   {
3520   if (common->utf)
3521     {
3522     GETCHAR(c, cc);
3523     }
3524   else
3525     c = *cc;
3526 
3527   if (c > 127)
3528     return c != UCD_OTHERCASE(c);
3529 
3530   return common->fcc[c] != c;
3531   }
3532 else
3533 #endif
3534   c = *cc;
3535 return MAX_255(c) ? common->fcc[c] != c : FALSE;
3536 }
3537 
char_othercase(compiler_common * common,unsigned int c)3538 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
3539 {
3540 /* Returns with the othercase. */
3541 #ifdef SUPPORT_UNICODE
3542 if ((common->utf || common->ucp) && c > 127)
3543   return UCD_OTHERCASE(c);
3544 #endif
3545 return TABLE_GET(c, common->fcc, c);
3546 }
3547 
char_get_othercase_bit(compiler_common * common,PCRE2_SPTR cc)3548 static unsigned int char_get_othercase_bit(compiler_common *common, PCRE2_SPTR cc)
3549 {
3550 /* Detects if the character and its othercase has only 1 bit difference. */
3551 unsigned int c, oc, bit;
3552 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3553 int n;
3554 #endif
3555 
3556 #ifdef SUPPORT_UNICODE
3557 if (common->utf || common->ucp)
3558   {
3559   if (common->utf)
3560     {
3561     GETCHAR(c, cc);
3562     }
3563   else
3564     c = *cc;
3565 
3566   if (c <= 127)
3567     oc = common->fcc[c];
3568   else
3569     oc = UCD_OTHERCASE(c);
3570   }
3571 else
3572   {
3573   c = *cc;
3574   oc = TABLE_GET(c, common->fcc, c);
3575   }
3576 #else
3577 c = *cc;
3578 oc = TABLE_GET(c, common->fcc, c);
3579 #endif
3580 
3581 SLJIT_ASSERT(c != oc);
3582 
3583 bit = c ^ oc;
3584 /* Optimized for English alphabet. */
3585 if (c <= 127 && bit == 0x20)
3586   return (0 << 8) | 0x20;
3587 
3588 /* Since c != oc, they must have at least 1 bit difference. */
3589 if (!is_powerof2(bit))
3590   return 0;
3591 
3592 #if PCRE2_CODE_UNIT_WIDTH == 8
3593 
3594 #ifdef SUPPORT_UNICODE
3595 if (common->utf && c > 127)
3596   {
3597   n = GET_EXTRALEN(*cc);
3598   while ((bit & 0x3f) == 0)
3599     {
3600     n--;
3601     bit >>= 6;
3602     }
3603   return (n << 8) | bit;
3604   }
3605 #endif /* SUPPORT_UNICODE */
3606 return (0 << 8) | bit;
3607 
3608 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3609 
3610 #ifdef SUPPORT_UNICODE
3611 if (common->utf && c > 65535)
3612   {
3613   if (bit >= (1u << 10))
3614     bit >>= 10;
3615   else
3616     return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
3617   }
3618 #endif /* SUPPORT_UNICODE */
3619 return (bit < 256) ? ((0u << 8) | bit) : ((1u << 8) | (bit >> 8));
3620 
3621 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3622 }
3623 
check_partial(compiler_common * common,BOOL force)3624 static void check_partial(compiler_common *common, BOOL force)
3625 {
3626 /* Checks whether a partial matching is occurred. Does not modify registers. */
3627 DEFINE_COMPILER;
3628 struct sljit_jump *jump = NULL;
3629 
3630 SLJIT_ASSERT(!force || common->mode != PCRE2_JIT_COMPLETE);
3631 
3632 if (common->mode == PCRE2_JIT_COMPLETE)
3633   return;
3634 
3635 if (!force && !common->allow_empty_partial)
3636   jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3637 else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3638   jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
3639 
3640 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3641   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3642 else
3643   {
3644   if (common->partialmatchlabel != NULL)
3645     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3646   else
3647     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3648   }
3649 
3650 if (jump != NULL)
3651   JUMPHERE(jump);
3652 }
3653 
check_str_end(compiler_common * common,jump_list ** end_reached)3654 static void check_str_end(compiler_common *common, jump_list **end_reached)
3655 {
3656 /* Does not affect registers. Usually used in a tight spot. */
3657 DEFINE_COMPILER;
3658 struct sljit_jump *jump;
3659 
3660 if (common->mode == PCRE2_JIT_COMPLETE)
3661   {
3662   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3663   return;
3664   }
3665 
3666 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3667 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3668   {
3669   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3670   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3671   add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
3672   }
3673 else
3674   {
3675   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3676   if (common->partialmatchlabel != NULL)
3677     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3678   else
3679     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3680   }
3681 JUMPHERE(jump);
3682 }
3683 
detect_partial_match(compiler_common * common,jump_list ** backtracks)3684 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
3685 {
3686 DEFINE_COMPILER;
3687 struct sljit_jump *jump;
3688 
3689 if (common->mode == PCRE2_JIT_COMPLETE)
3690   {
3691   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3692   return;
3693   }
3694 
3695 /* Partial matching mode. */
3696 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3697 if (!common->allow_empty_partial)
3698   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3699 else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3700   add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1));
3701 
3702 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3703   {
3704   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3705   add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3706   }
3707 else
3708   {
3709   if (common->partialmatchlabel != NULL)
3710     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3711   else
3712     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3713   }
3714 JUMPHERE(jump);
3715 }
3716 
process_partial_match(compiler_common * common)3717 static void process_partial_match(compiler_common *common)
3718 {
3719 DEFINE_COMPILER;
3720 struct sljit_jump *jump;
3721 
3722 /* Partial matching mode. */
3723 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3724   {
3725   jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3726   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3727   JUMPHERE(jump);
3728   }
3729 else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3730   {
3731   if (common->partialmatchlabel != NULL)
3732     CMPTO(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0, common->partialmatchlabel);
3733   else
3734     add_jump(compiler, &common->partialmatch, CMP(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3735   }
3736 }
3737 
detect_partial_match_to(compiler_common * common,struct sljit_label * label)3738 static void detect_partial_match_to(compiler_common *common, struct sljit_label *label)
3739 {
3740 DEFINE_COMPILER;
3741 
3742 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, label);
3743 process_partial_match(common);
3744 }
3745 
peek_char(compiler_common * common,sljit_u32 max,sljit_s32 dst,sljit_sw dstw,jump_list ** backtracks)3746 static void peek_char(compiler_common *common, sljit_u32 max, sljit_s32 dst, sljit_sw dstw, jump_list **backtracks)
3747 {
3748 /* Reads the character into TMP1, keeps STR_PTR.
3749 Does not check STR_END. TMP2, dst, RETURN_ADDR Destroyed. */
3750 DEFINE_COMPILER;
3751 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3752 struct sljit_jump *jump;
3753 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3754 
3755 SLJIT_UNUSED_ARG(max);
3756 SLJIT_UNUSED_ARG(dst);
3757 SLJIT_UNUSED_ARG(dstw);
3758 SLJIT_UNUSED_ARG(backtracks);
3759 
3760 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3761 
3762 #ifdef SUPPORT_UNICODE
3763 #if PCRE2_CODE_UNIT_WIDTH == 8
3764 if (common->utf)
3765   {
3766   if (max < 128) return;
3767 
3768   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3769   OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3770   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3771   add_jump(compiler, common->invalid_utf ? &common->utfreadchar_invalid : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3772   OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3773   if (backtracks && common->invalid_utf)
3774     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3775   JUMPHERE(jump);
3776   }
3777 #elif PCRE2_CODE_UNIT_WIDTH == 16
3778 if (common->utf)
3779   {
3780   if (max < 0xd800) return;
3781 
3782   OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3783 
3784   if (common->invalid_utf)
3785     {
3786     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3787     OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3788     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3789     add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3790     OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3791     if (backtracks && common->invalid_utf)
3792       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3793     }
3794   else
3795     {
3796     /* TMP2 contains the high surrogate. */
3797     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
3798     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3799     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3800     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
3801     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3802     }
3803 
3804   JUMPHERE(jump);
3805   }
3806 #elif PCRE2_CODE_UNIT_WIDTH == 32
3807 if (common->invalid_utf)
3808   {
3809   if (max < 0xd800) return;
3810 
3811   if (backtracks != NULL)
3812     {
3813     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3814     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3815     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
3816     }
3817   else
3818     {
3819     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3820     OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000);
3821     CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
3822     OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3823     CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
3824     }
3825   }
3826 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3827 #endif /* SUPPORT_UNICODE */
3828 }
3829 
peek_char_back(compiler_common * common,sljit_u32 max,jump_list ** backtracks)3830 static void peek_char_back(compiler_common *common, sljit_u32 max, jump_list **backtracks)
3831 {
3832 /* Reads one character back without moving STR_PTR. TMP2 must
3833 contain the start of the subject buffer. Affects TMP1, TMP2, and RETURN_ADDR. */
3834 DEFINE_COMPILER;
3835 
3836 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3837 struct sljit_jump *jump;
3838 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3839 
3840 SLJIT_UNUSED_ARG(max);
3841 SLJIT_UNUSED_ARG(backtracks);
3842 
3843 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3844 
3845 #ifdef SUPPORT_UNICODE
3846 #if PCRE2_CODE_UNIT_WIDTH == 8
3847 if (common->utf)
3848   {
3849   if (max < 128) return;
3850 
3851   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3852   if (common->invalid_utf)
3853     {
3854     add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3855     if (backtracks != NULL)
3856       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3857     }
3858   else
3859     add_jump(compiler, &common->utfpeakcharback, JUMP(SLJIT_FAST_CALL));
3860   JUMPHERE(jump);
3861   }
3862 #elif PCRE2_CODE_UNIT_WIDTH == 16
3863 if (common->utf)
3864   {
3865   if (max < 0xd800) return;
3866 
3867   if (common->invalid_utf)
3868     {
3869     jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3870     add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3871     if (backtracks != NULL)
3872       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3873     }
3874   else
3875     {
3876     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3877     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xdc00);
3878     /* TMP2 contains the low surrogate. */
3879     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3880     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
3881     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3882     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
3883     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3884     }
3885     JUMPHERE(jump);
3886   }
3887 #elif PCRE2_CODE_UNIT_WIDTH == 32
3888 if (common->invalid_utf)
3889   {
3890   OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3891   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3892   add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
3893   }
3894 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3895 #endif /* SUPPORT_UNICODE */
3896 }
3897 
3898 #define READ_CHAR_UPDATE_STR_PTR 0x1
3899 #define READ_CHAR_UTF8_NEWLINE 0x2
3900 #define READ_CHAR_NEWLINE (READ_CHAR_UPDATE_STR_PTR | READ_CHAR_UTF8_NEWLINE)
3901 #define READ_CHAR_VALID_UTF 0x4
3902 
read_char(compiler_common * common,sljit_u32 min,sljit_u32 max,jump_list ** backtracks,sljit_u32 options)3903 static void read_char(compiler_common *common, sljit_u32 min, sljit_u32 max,
3904   jump_list **backtracks, sljit_u32 options)
3905 {
3906 /* Reads the precise value of a character into TMP1, if the character is
3907 between min and max (c >= min && c <= max). Otherwise it returns with a value
3908 outside the range. Does not check STR_END. */
3909 DEFINE_COMPILER;
3910 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3911 struct sljit_jump *jump;
3912 #endif
3913 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3914 struct sljit_jump *jump2;
3915 #endif
3916 
3917 SLJIT_UNUSED_ARG(min);
3918 SLJIT_UNUSED_ARG(max);
3919 SLJIT_UNUSED_ARG(backtracks);
3920 SLJIT_UNUSED_ARG(options);
3921 SLJIT_ASSERT(min <= max);
3922 
3923 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3924 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3925 
3926 #ifdef SUPPORT_UNICODE
3927 #if PCRE2_CODE_UNIT_WIDTH == 8
3928 if (common->utf)
3929   {
3930   if (max < 128 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
3931 
3932   if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
3933     {
3934     jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3935 
3936     if (options & READ_CHAR_UTF8_NEWLINE)
3937       add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
3938     else
3939       add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3940 
3941     if (backtracks != NULL)
3942       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3943     JUMPHERE(jump);
3944     return;
3945     }
3946 
3947   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3948   if (min >= 0x10000)
3949     {
3950     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
3951     if (options & READ_CHAR_UPDATE_STR_PTR)
3952       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3953     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3954     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
3955     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3956     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3957     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3958     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3959     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3960     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3961     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3962     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
3963     if (!(options & READ_CHAR_UPDATE_STR_PTR))
3964       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
3965     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3966     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3967     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3968     JUMPHERE(jump2);
3969     if (options & READ_CHAR_UPDATE_STR_PTR)
3970       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3971     }
3972   else if (min >= 0x800 && max <= 0xffff)
3973     {
3974     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
3975     if (options & READ_CHAR_UPDATE_STR_PTR)
3976       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3977     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3978     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
3979     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3980     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3981     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3982     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3983     if (!(options & READ_CHAR_UPDATE_STR_PTR))
3984       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3985     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3986     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3987     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3988     JUMPHERE(jump2);
3989     if (options & READ_CHAR_UPDATE_STR_PTR)
3990       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3991     }
3992   else if (max >= 0x800)
3993     {
3994     add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3995     }
3996   else if (max < 128)
3997     {
3998     OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3999     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4000     }
4001   else
4002     {
4003     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4004     if (!(options & READ_CHAR_UPDATE_STR_PTR))
4005       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4006     else
4007       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4008     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4009     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4010     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4011     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4012     if (options & READ_CHAR_UPDATE_STR_PTR)
4013       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
4014     }
4015   JUMPHERE(jump);
4016   }
4017 #elif PCRE2_CODE_UNIT_WIDTH == 16
4018 if (common->utf)
4019   {
4020   if (max < 0xd800 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
4021 
4022   if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
4023     {
4024     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4025     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4026 
4027     if (options & READ_CHAR_UTF8_NEWLINE)
4028       add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
4029     else
4030       add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4031 
4032     if (backtracks != NULL)
4033       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4034     JUMPHERE(jump);
4035     return;
4036     }
4037 
4038   if (max >= 0x10000)
4039     {
4040     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4041     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
4042     /* TMP2 contains the high surrogate. */
4043     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4044     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
4045     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4046     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
4047     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4048     JUMPHERE(jump);
4049     return;
4050     }
4051 
4052   /* Skip low surrogate if necessary. */
4053   OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4054 
4055   if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4056     {
4057     if (options & READ_CHAR_UPDATE_STR_PTR)
4058       OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4059     OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
4060     if (options & READ_CHAR_UPDATE_STR_PTR)
4061       CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0);
4062     if (max >= 0xd800)
4063       CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, 0x10000);
4064     }
4065   else
4066     {
4067     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4068     if (options & READ_CHAR_UPDATE_STR_PTR)
4069       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4070     if (max >= 0xd800)
4071       OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
4072     JUMPHERE(jump);
4073     }
4074   }
4075 #elif PCRE2_CODE_UNIT_WIDTH == 32
4076 if (common->invalid_utf)
4077   {
4078   if (backtracks != NULL)
4079     {
4080     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4081     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4082     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
4083     }
4084   else
4085     {
4086     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4087     OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000);
4088     CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4089     OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4090     CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4091     }
4092   }
4093 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4094 #endif /* SUPPORT_UNICODE */
4095 }
4096 
4097 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4098 
is_char7_bitset(const sljit_u8 * bitset,BOOL nclass)4099 static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
4100 {
4101 /* Tells whether the character codes below 128 are enough
4102 to determine a match. */
4103 const sljit_u8 value = nclass ? 0xff : 0;
4104 const sljit_u8 *end = bitset + 32;
4105 
4106 bitset += 16;
4107 do
4108   {
4109   if (*bitset++ != value)
4110     return FALSE;
4111   }
4112 while (bitset < end);
4113 return TRUE;
4114 }
4115 
read_char7_type(compiler_common * common,jump_list ** backtracks,BOOL negated)4116 static void read_char7_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4117 {
4118 /* Reads the precise character type of a character into TMP1, if the character
4119 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
4120 full_read argument tells whether characters above max are accepted or not. */
4121 DEFINE_COMPILER;
4122 struct sljit_jump *jump;
4123 
4124 SLJIT_ASSERT(common->utf);
4125 
4126 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4127 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4128 
4129 /* All values > 127 are zero in ctypes. */
4130 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4131 
4132 if (negated)
4133   {
4134   jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4135 
4136   if (common->invalid_utf)
4137     {
4138     add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4139     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4140     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4141     }
4142   else
4143     {
4144     OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4145     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4146     }
4147   JUMPHERE(jump);
4148   }
4149 }
4150 
4151 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4152 
read_char8_type(compiler_common * common,jump_list ** backtracks,BOOL negated)4153 static void read_char8_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4154 {
4155 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
4156 DEFINE_COMPILER;
4157 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
4158 struct sljit_jump *jump;
4159 #endif
4160 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4161 struct sljit_jump *jump2;
4162 #endif
4163 
4164 SLJIT_UNUSED_ARG(backtracks);
4165 SLJIT_UNUSED_ARG(negated);
4166 
4167 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4168 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4169 
4170 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4171 if (common->utf)
4172   {
4173   /* The result of this read may be unused, but saves an "else" part. */
4174   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4175   jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4176 
4177   if (!negated)
4178     {
4179     if (common->invalid_utf)
4180       add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4181 
4182     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4183     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4184     OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4185     if (common->invalid_utf)
4186       add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe0 - 0xc2));
4187 
4188     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4189     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4190     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4191     if (common->invalid_utf)
4192       add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40));
4193 
4194     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4195     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4196     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4197     JUMPHERE(jump2);
4198     }
4199   else if (common->invalid_utf)
4200     {
4201     add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4202     OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
4203     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4204 
4205     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4206     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4207     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4208     JUMPHERE(jump2);
4209     }
4210   else
4211     add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
4212 
4213   JUMPHERE(jump);
4214   return;
4215   }
4216 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4217 
4218 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32
4219 if (common->invalid_utf && negated)
4220   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x110000));
4221 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32 */
4222 
4223 #if PCRE2_CODE_UNIT_WIDTH != 8
4224 /* The ctypes array contains only 256 values. */
4225 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4226 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4227 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4228 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4229 #if PCRE2_CODE_UNIT_WIDTH != 8
4230 JUMPHERE(jump);
4231 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4232 
4233 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
4234 if (common->utf && negated)
4235   {
4236   /* Skip low surrogate if necessary. */
4237   if (!common->invalid_utf)
4238     {
4239     OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4240 
4241     if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4242       {
4243       OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4244       OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
4245       CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0);
4246       }
4247     else
4248       {
4249       jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4250       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4251       JUMPHERE(jump);
4252       }
4253     return;
4254     }
4255 
4256   OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4257   jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4258   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4259   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4260 
4261   OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4262   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4263   OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
4264   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4265 
4266   JUMPHERE(jump);
4267   return;
4268   }
4269 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 */
4270 }
4271 
move_back(compiler_common * common,jump_list ** backtracks,BOOL must_be_valid)4272 static void move_back(compiler_common *common, jump_list **backtracks, BOOL must_be_valid)
4273 {
4274 /* Goes one character back. Affects STR_PTR and TMP1. If must_be_valid is TRUE,
4275 TMP2 is not used. Otherwise TMP2 must contain the start of the subject buffer,
4276 and it is destroyed. Does not modify STR_PTR for invalid character sequences. */
4277 DEFINE_COMPILER;
4278 
4279 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4280 struct sljit_jump *jump;
4281 #endif
4282 
4283 #ifdef SUPPORT_UNICODE
4284 #if PCRE2_CODE_UNIT_WIDTH == 8
4285 struct sljit_label *label;
4286 
4287 if (common->utf)
4288   {
4289   if (!must_be_valid && common->invalid_utf)
4290     {
4291     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4292     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4293     jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4294     add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4295     if (backtracks != NULL)
4296       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4297     JUMPHERE(jump);
4298     return;
4299     }
4300 
4301   label = LABEL();
4302   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4303   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4304   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4305   CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
4306   return;
4307   }
4308 #elif PCRE2_CODE_UNIT_WIDTH == 16
4309 if (common->utf)
4310   {
4311   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4312   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4313 
4314   if (!must_be_valid && common->invalid_utf)
4315     {
4316     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4317     jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0xd800);
4318     add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4319     if (backtracks != NULL)
4320       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4321     JUMPHERE(jump);
4322     return;
4323     }
4324 
4325   /* Skip low surrogate if necessary. */
4326   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4327   OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xdc00);
4328   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4329   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4330   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4331   return;
4332   }
4333 #elif PCRE2_CODE_UNIT_WIDTH == 32
4334 if (common->invalid_utf && !must_be_valid)
4335   {
4336   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4337   if (backtracks != NULL)
4338     {
4339     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4340     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4341     return;
4342     }
4343 
4344   OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x110000);
4345   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
4346   OP2(SLJIT_SHL,  TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4347   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4348   return;
4349   }
4350 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4351 #endif /* SUPPORT_UNICODE */
4352 
4353 SLJIT_UNUSED_ARG(backtracks);
4354 SLJIT_UNUSED_ARG(must_be_valid);
4355 
4356 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4357 }
4358 
check_newlinechar(compiler_common * common,int nltype,jump_list ** backtracks,BOOL jumpifmatch)4359 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
4360 {
4361 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
4362 DEFINE_COMPILER;
4363 struct sljit_jump *jump;
4364 
4365 if (nltype == NLTYPE_ANY)
4366   {
4367   add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4368   sljit_set_current_flags(compiler, SLJIT_SET_Z);
4369   add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
4370   }
4371 else if (nltype == NLTYPE_ANYCRLF)
4372   {
4373   if (jumpifmatch)
4374     {
4375     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
4376     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4377     }
4378   else
4379     {
4380     jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4381     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4382     JUMPHERE(jump);
4383     }
4384   }
4385 else
4386   {
4387   SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
4388   add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4389   }
4390 }
4391 
4392 #ifdef SUPPORT_UNICODE
4393 
4394 #if PCRE2_CODE_UNIT_WIDTH == 8
do_utfreadchar(compiler_common * common)4395 static void do_utfreadchar(compiler_common *common)
4396 {
4397 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
4398 of the character (>= 0xc0). Return char value in TMP1. */
4399 DEFINE_COMPILER;
4400 struct sljit_jump *jump;
4401 
4402 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4403 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4404 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4405 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4406 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4407 
4408 /* Searching for the first zero. */
4409 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4410 jump = JUMP(SLJIT_NOT_ZERO);
4411 /* Two byte sequence. */
4412 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3000);
4413 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4414 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4415 
4416 JUMPHERE(jump);
4417 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4418 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4419 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4420 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4421 
4422 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000);
4423 jump = JUMP(SLJIT_NOT_ZERO);
4424 /* Three byte sequence. */
4425 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0000);
4426 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4427 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4428 
4429 /* Four byte sequence. */
4430 JUMPHERE(jump);
4431 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4432 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0000);
4433 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4434 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4435 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4436 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4437 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4438 }
4439 
do_utfreadtype8(compiler_common * common)4440 static void do_utfreadtype8(compiler_common *common)
4441 {
4442 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
4443 of the character (>= 0xc0). Return value in TMP1. */
4444 DEFINE_COMPILER;
4445 struct sljit_jump *jump;
4446 struct sljit_jump *compare;
4447 
4448 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4449 
4450 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0x20);
4451 jump = JUMP(SLJIT_NOT_ZERO);
4452 /* Two byte sequence. */
4453 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4454 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4455 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
4456 /* The upper 5 bits are known at this point. */
4457 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
4458 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4459 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4460 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
4461 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4462 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4463 
4464 JUMPHERE(compare);
4465 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4466 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4467 
4468 /* We only have types for characters less than 256. */
4469 JUMPHERE(jump);
4470 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4471 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4472 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4473 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4474 }
4475 
do_utfreadchar_invalid(compiler_common * common)4476 static void do_utfreadchar_invalid(compiler_common *common)
4477 {
4478 /* Slow decoding a UTF-8 character. TMP1 contains the first byte
4479 of the character (>= 0xc0). Return char value in TMP1. STR_PTR is
4480 undefined for invalid characters. */
4481 DEFINE_COMPILER;
4482 sljit_s32 i;
4483 sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4484 struct sljit_jump *jump;
4485 struct sljit_jump *buffer_end_close;
4486 struct sljit_label *three_byte_entry;
4487 struct sljit_label *exit_invalid_label;
4488 struct sljit_jump *exit_invalid[11];
4489 
4490 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4491 
4492 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc2);
4493 
4494 /* Usually more than 3 characters remained in the subject buffer. */
4495 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4496 
4497 /* Not a valid start of a multi-byte sequence, no more bytes read. */
4498 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xf5 - 0xc2);
4499 
4500 buffer_end_close = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4501 
4502 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4503 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4504 /* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4505 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4506 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4507 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4508 
4509 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4510 jump = JUMP(SLJIT_NOT_ZERO);
4511 
4512 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4513 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4514 
4515 JUMPHERE(jump);
4516 
4517 /* Three-byte sequence. */
4518 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4519 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4520 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4521 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4522 if (has_cmov)
4523   {
4524   OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4525   CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0x20000);
4526   exit_invalid[2] = NULL;
4527   }
4528 else
4529   exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4530 
4531 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000);
4532 jump = JUMP(SLJIT_NOT_ZERO);
4533 
4534 three_byte_entry = LABEL();
4535 
4536 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2d800);
4537 if (has_cmov)
4538   {
4539   OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4540   CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0xd800);
4541   exit_invalid[3] = NULL;
4542   }
4543 else
4544   exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4545 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4546 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4547 
4548 if (has_cmov)
4549   {
4550   OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4551   CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4552   exit_invalid[4] = NULL;
4553   }
4554 else
4555   exit_invalid[4] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4556 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4557 
4558 JUMPHERE(jump);
4559 
4560 /* Four-byte sequence. */
4561 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4562 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4563 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4564 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4565 if (has_cmov)
4566   {
4567   OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4568   CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0);
4569   exit_invalid[5] = NULL;
4570   }
4571 else
4572   exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4573 
4574 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc10000);
4575 if (has_cmov)
4576   {
4577   OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4578   CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000);
4579   exit_invalid[6] = NULL;
4580   }
4581 else
4582   exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4583 
4584 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4585 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4586 
4587 JUMPHERE(buffer_end_close);
4588 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4589 exit_invalid[7] = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4590 
4591 /* Two-byte sequence. */
4592 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4593 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4594 /* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4595 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4596 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4597 exit_invalid[8] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4598 
4599 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4600 jump = JUMP(SLJIT_NOT_ZERO);
4601 
4602 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4603 
4604 /* Three-byte sequence. */
4605 JUMPHERE(jump);
4606 exit_invalid[9] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4607 
4608 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4609 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4610 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4611 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4612 if (has_cmov)
4613   {
4614   OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4615   CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4616   exit_invalid[10] = NULL;
4617   }
4618 else
4619   exit_invalid[10] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4620 
4621 /* One will be substracted from STR_PTR later. */
4622 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4623 
4624 /* Four byte sequences are not possible. */
4625 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x30000, three_byte_entry);
4626 
4627 exit_invalid_label = LABEL();
4628 for (i = 0; i < 11; i++)
4629   sljit_set_label(exit_invalid[i], exit_invalid_label);
4630 
4631 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4632 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4633 }
4634 
do_utfreadnewline_invalid(compiler_common * common)4635 static void do_utfreadnewline_invalid(compiler_common *common)
4636 {
4637 /* Slow decoding a UTF-8 character, specialized for newlines.
4638 TMP1 contains the first byte of the character (>= 0xc0). Return
4639 char value in TMP1. */
4640 DEFINE_COMPILER;
4641 struct sljit_label *loop;
4642 struct sljit_label *skip_start;
4643 struct sljit_label *three_byte_exit;
4644 struct sljit_jump *jump[5];
4645 
4646 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4647 
4648 if (common->nltype != NLTYPE_ANY)
4649   {
4650   SLJIT_ASSERT(common->nltype != NLTYPE_FIXED || common->newline < 128);
4651 
4652   /* All newlines are ascii, just skip intermediate octets. */
4653   jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4654   loop = LABEL();
4655   if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)) == SLJIT_SUCCESS)
4656     sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4657   else
4658     {
4659     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4660     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4661     }
4662 
4663   OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4664   CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4665   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4666 
4667   JUMPHERE(jump[0]);
4668 
4669   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4670   OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4671   return;
4672   }
4673 
4674 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4675 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4676 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4677 
4678 jump[1] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xc2);
4679 jump[2] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xe2);
4680 
4681 skip_start = LABEL();
4682 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4683 jump[3] = CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80);
4684 
4685 /* Skip intermediate octets. */
4686 loop = LABEL();
4687 jump[4] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4688 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4689 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4690 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4691 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4692 
4693 JUMPHERE(jump[3]);
4694 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4695 
4696 three_byte_exit = LABEL();
4697 JUMPHERE(jump[0]);
4698 JUMPHERE(jump[4]);
4699 
4700 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4701 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4702 
4703 /* Two byte long newline: 0x85. */
4704 JUMPHERE(jump[1]);
4705 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x85, skip_start);
4706 
4707 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x85);
4708 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4709 
4710 /* Three byte long newlines: 0x2028 and 0x2029. */
4711 JUMPHERE(jump[2]);
4712 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, skip_start);
4713 CMPTO(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0, three_byte_exit);
4714 
4715 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4716 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4717 
4718 OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 0x80);
4719 CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40, skip_start);
4720 
4721 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0x2000);
4722 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4723 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4724 }
4725 
do_utfmoveback_invalid(compiler_common * common)4726 static void do_utfmoveback_invalid(compiler_common *common)
4727 {
4728 /* Goes one character back. */
4729 DEFINE_COMPILER;
4730 sljit_s32 i;
4731 struct sljit_jump *jump;
4732 struct sljit_jump *buffer_start_close;
4733 struct sljit_label *exit_ok_label;
4734 struct sljit_label *exit_invalid_label;
4735 struct sljit_jump *exit_invalid[7];
4736 
4737 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4738 
4739 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4740 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
4741 
4742 /* Two-byte sequence. */
4743 buffer_start_close = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4744 
4745 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4746 
4747 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4748 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x20);
4749 
4750 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4751 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4752 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4753 
4754 /* Three-byte sequence. */
4755 JUMPHERE(jump);
4756 exit_invalid[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4757 
4758 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4759 
4760 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4761 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x10);
4762 
4763 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4764 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4765 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4766 
4767 /* Four-byte sequence. */
4768 JUMPHERE(jump);
4769 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4770 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40);
4771 
4772 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4773 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0);
4774 exit_invalid[3] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x05);
4775 
4776 exit_ok_label = LABEL();
4777 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4778 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4779 
4780 /* Two-byte sequence. */
4781 JUMPHERE(buffer_start_close);
4782 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4783 
4784 exit_invalid[4] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4785 
4786 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4787 
4788 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4789 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20, exit_ok_label);
4790 
4791 /* Three-byte sequence. */
4792 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4793 exit_invalid[5] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4794 exit_invalid[6] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4795 
4796 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4797 
4798 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4799 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10, exit_ok_label);
4800 
4801 /* Four-byte sequences are not possible. */
4802 
4803 exit_invalid_label = LABEL();
4804 sljit_set_label(exit_invalid[5], exit_invalid_label);
4805 sljit_set_label(exit_invalid[6], exit_invalid_label);
4806 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4807 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4808 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4809 
4810 JUMPHERE(exit_invalid[4]);
4811 /* -2 + 4 = 2 */
4812 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4813 
4814 exit_invalid_label = LABEL();
4815 for (i = 0; i < 4; i++)
4816   sljit_set_label(exit_invalid[i], exit_invalid_label);
4817 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4818 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(4));
4819 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4820 }
4821 
do_utfpeakcharback(compiler_common * common)4822 static void do_utfpeakcharback(compiler_common *common)
4823 {
4824 /* Peak a character back. Does not modify STR_PTR. */
4825 DEFINE_COMPILER;
4826 struct sljit_jump *jump[2];
4827 
4828 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4829 
4830 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4831 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4832 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20);
4833 
4834 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4835 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4836 jump[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10);
4837 
4838 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
4839 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4840 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
4841 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4842 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4843 
4844 JUMPHERE(jump[1]);
4845 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4846 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4847 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4848 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4849 
4850 JUMPHERE(jump[0]);
4851 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4852 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4853 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4854 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4855 
4856 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4857 }
4858 
do_utfpeakcharback_invalid(compiler_common * common)4859 static void do_utfpeakcharback_invalid(compiler_common *common)
4860 {
4861 /* Peak a character back. Does not modify STR_PTR. */
4862 DEFINE_COMPILER;
4863 sljit_s32 i;
4864 sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4865 struct sljit_jump *jump[2];
4866 struct sljit_label *two_byte_entry;
4867 struct sljit_label *three_byte_entry;
4868 struct sljit_label *exit_invalid_label;
4869 struct sljit_jump *exit_invalid[8];
4870 
4871 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4872 
4873 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
4874 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
4875 jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4876 
4877 /* Two-byte sequence. */
4878 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4879 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4880 jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x1e);
4881 
4882 two_byte_entry = LABEL();
4883 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4884 /* If TMP1 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4885 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4886 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4887 
4888 JUMPHERE(jump[1]);
4889 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
4890 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4891 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4892 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4893 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4894 
4895 /* Three-byte sequence. */
4896 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4897 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
4898 jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x10);
4899 
4900 three_byte_entry = LABEL();
4901 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
4902 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4903 
4904 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4905 if (has_cmov)
4906   {
4907   OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4908   CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, -0xd800);
4909   exit_invalid[2] = NULL;
4910   }
4911 else
4912   exit_invalid[2] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4913 
4914 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4915 if (has_cmov)
4916   {
4917   OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4918   CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4919   exit_invalid[3] = NULL;
4920   }
4921 else
4922   exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4923 
4924 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4925 
4926 JUMPHERE(jump[1]);
4927 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0 - 0x80);
4928 exit_invalid[4] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4929 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
4930 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4931 
4932 /* Four-byte sequence. */
4933 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
4934 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4935 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
4936 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18);
4937 /* ADD is used instead of OR because of the SUB 0x10000 above. */
4938 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4939 
4940 if (has_cmov)
4941   {
4942   OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4943   CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000);
4944   exit_invalid[5] = NULL;
4945   }
4946 else
4947   exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4948 
4949 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4950 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4951 
4952 JUMPHERE(jump[0]);
4953 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4954 jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4955 
4956 /* Two-byte sequence. */
4957 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4958 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4959 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
4960 
4961 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
4962 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4963 exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4964 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4965 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4966 
4967 /* Three-byte sequence. */
4968 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4969 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
4970 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x10, three_byte_entry);
4971 
4972 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4973 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4974 
4975 JUMPHERE(jump[0]);
4976 exit_invalid[7] = CMP(SLJIT_GREATER, TMP2, 0, STR_PTR, 0);
4977 
4978 /* Two-byte sequence. */
4979 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4980 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4981 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
4982 
4983 exit_invalid_label = LABEL();
4984 for (i = 0; i < 8; i++)
4985   sljit_set_label(exit_invalid[i], exit_invalid_label);
4986 
4987 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4988 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4989 }
4990 
4991 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
4992 
4993 #if PCRE2_CODE_UNIT_WIDTH == 16
4994 
do_utfreadchar_invalid(compiler_common * common)4995 static void do_utfreadchar_invalid(compiler_common *common)
4996 {
4997 /* Slow decoding a UTF-16 character. TMP1 contains the first half
4998 of the character (>= 0xd800). Return char value in TMP1. STR_PTR is
4999 undefined for invalid characters. */
5000 DEFINE_COMPILER;
5001 struct sljit_jump *exit_invalid[3];
5002 
5003 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5004 
5005 /* TMP2 contains the high surrogate. */
5006 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
5007 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5008 
5009 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5010 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5011 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5012 
5013 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5014 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
5015 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
5016 
5017 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5018 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5019 
5020 JUMPHERE(exit_invalid[0]);
5021 JUMPHERE(exit_invalid[1]);
5022 JUMPHERE(exit_invalid[2]);
5023 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5024 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5025 }
5026 
do_utfreadnewline_invalid(compiler_common * common)5027 static void do_utfreadnewline_invalid(compiler_common *common)
5028 {
5029 /* Slow decoding a UTF-16 character, specialized for newlines.
5030 TMP1 contains the first half of the character (>= 0xd800). Return
5031 char value in TMP1. */
5032 
5033 DEFINE_COMPILER;
5034 struct sljit_jump *exit_invalid[2];
5035 
5036 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5037 
5038 /* TMP2 contains the high surrogate. */
5039 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5040 
5041 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5042 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
5043 
5044 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
5045 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
5046 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
5047 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
5048 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
5049 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5050 
5051 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5052 
5053 JUMPHERE(exit_invalid[0]);
5054 JUMPHERE(exit_invalid[1]);
5055 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5056 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5057 }
5058 
do_utfmoveback_invalid(compiler_common * common)5059 static void do_utfmoveback_invalid(compiler_common *common)
5060 {
5061 /* Goes one character back. */
5062 DEFINE_COMPILER;
5063 struct sljit_jump *exit_invalid[3];
5064 
5065 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5066 
5067 exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5068 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5069 
5070 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5071 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5072 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
5073 
5074 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5075 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
5076 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5077 
5078 JUMPHERE(exit_invalid[0]);
5079 JUMPHERE(exit_invalid[1]);
5080 JUMPHERE(exit_invalid[2]);
5081 
5082 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5083 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
5084 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5085 }
5086 
do_utfpeakcharback_invalid(compiler_common * common)5087 static void do_utfpeakcharback_invalid(compiler_common *common)
5088 {
5089 /* Peak a character back. Does not modify STR_PTR. */
5090 DEFINE_COMPILER;
5091 struct sljit_jump *jump;
5092 struct sljit_jump *exit_invalid[3];
5093 
5094 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5095 
5096 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000);
5097 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5098 exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
5099 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5100 
5101 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5102 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
5103 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
5104 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
5105 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5106 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5107 
5108 JUMPHERE(jump);
5109 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5110 
5111 JUMPHERE(exit_invalid[0]);
5112 JUMPHERE(exit_invalid[1]);
5113 JUMPHERE(exit_invalid[2]);
5114 
5115 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5116 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5117 }
5118 
5119 #endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
5120 
5121 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
5122 #define UCD_BLOCK_MASK 127
5123 #define UCD_BLOCK_SHIFT 7
5124 
do_getucd(compiler_common * common)5125 static void do_getucd(compiler_common *common)
5126 {
5127 /* Search the UCD record for the character comes in TMP1.
5128 Returns chartype in TMP1 and UCD offset in TMP2. */
5129 DEFINE_COMPILER;
5130 #if PCRE2_CODE_UNIT_WIDTH == 32
5131 struct sljit_jump *jump;
5132 #endif
5133 
5134 #if defined SLJIT_DEBUG && SLJIT_DEBUG
5135 /* dummy_ucd_record */
5136 const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5137 SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5138 SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5139 #endif
5140 
5141 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5142 
5143 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5144 
5145 #if PCRE2_CODE_UNIT_WIDTH == 32
5146 if (!common->utf)
5147   {
5148   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5149   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5150   JUMPHERE(jump);
5151   }
5152 #endif
5153 
5154 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5155 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5156 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5157 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5158 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5159 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5160 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5161 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5162 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5163 }
5164 
do_getucdtype(compiler_common * common)5165 static void do_getucdtype(compiler_common *common)
5166 {
5167 /* Search the UCD record for the character comes in TMP1.
5168 Returns chartype in TMP1 and UCD offset in TMP2. */
5169 DEFINE_COMPILER;
5170 #if PCRE2_CODE_UNIT_WIDTH == 32
5171 struct sljit_jump *jump;
5172 #endif
5173 
5174 #if defined SLJIT_DEBUG && SLJIT_DEBUG
5175 /* dummy_ucd_record */
5176 const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5177 SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5178 SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5179 #endif
5180 
5181 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5182 
5183 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5184 
5185 #if PCRE2_CODE_UNIT_WIDTH == 32
5186 if (!common->utf)
5187   {
5188   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5189   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5190   JUMPHERE(jump);
5191   }
5192 #endif
5193 
5194 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5195 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5196 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5197 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5198 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5199 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5200 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5201 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5202 
5203 /* TMP2 is multiplied by 12. Same as (TMP2 << 2) + ((TMP2 << 2) << 1). */
5204 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5205 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
5206 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5207 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 1);
5208 
5209 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5210 }
5211 
5212 #endif /* SUPPORT_UNICODE */
5213 
mainloop_entry(compiler_common * common)5214 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common)
5215 {
5216 DEFINE_COMPILER;
5217 struct sljit_label *mainloop;
5218 struct sljit_label *newlinelabel = NULL;
5219 struct sljit_jump *start;
5220 struct sljit_jump *end = NULL;
5221 struct sljit_jump *end2 = NULL;
5222 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5223 struct sljit_label *loop;
5224 struct sljit_jump *jump;
5225 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5226 jump_list *newline = NULL;
5227 sljit_u32 overall_options = common->re->overall_options;
5228 BOOL hascrorlf = (common->re->flags & PCRE2_HASCRORLF) != 0;
5229 BOOL newlinecheck = FALSE;
5230 BOOL readuchar = FALSE;
5231 
5232 if (!(hascrorlf || (overall_options & PCRE2_FIRSTLINE) != 0)
5233     && (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
5234   newlinecheck = TRUE;
5235 
5236 SLJIT_ASSERT(common->abort_label == NULL);
5237 
5238 if ((overall_options & PCRE2_FIRSTLINE) != 0)
5239   {
5240   /* Search for the end of the first line. */
5241   SLJIT_ASSERT(common->match_end_ptr != 0);
5242   OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5243 
5244   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5245     {
5246     mainloop = LABEL();
5247     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5248     end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5249     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5250     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5251     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
5252     CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
5253     JUMPHERE(end);
5254     OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5255     }
5256   else
5257     {
5258     end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5259     mainloop = LABEL();
5260     /* Continual stores does not cause data dependency. */
5261     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5262     read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
5263     check_newlinechar(common, common->nltype, &newline, TRUE);
5264     CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
5265     JUMPHERE(end);
5266     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5267     set_jumps(newline, LABEL());
5268     }
5269 
5270   OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5271   }
5272 else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0)
5273   {
5274   /* Check whether offset limit is set and valid. */
5275   SLJIT_ASSERT(common->match_end_ptr != 0);
5276 
5277   if (HAS_VIRTUAL_REGISTERS)
5278     {
5279     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5280     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5281     }
5282   else
5283     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5284 
5285   OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5286   end = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET);
5287   if (HAS_VIRTUAL_REGISTERS)
5288     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5289   else
5290     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
5291 
5292 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5293   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5294 #endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5295   if (HAS_VIRTUAL_REGISTERS)
5296     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5297 
5298   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
5299   end2 = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5300   OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5301   JUMPHERE(end2);
5302   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
5303   add_jump(compiler, &common->abort, CMP(SLJIT_LESS, TMP2, 0, STR_PTR, 0));
5304   JUMPHERE(end);
5305   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, TMP2, 0);
5306   }
5307 
5308 start = JUMP(SLJIT_JUMP);
5309 
5310 if (newlinecheck)
5311   {
5312   newlinelabel = LABEL();
5313   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5314   end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5315   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5316   OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
5317   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
5318 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5319   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5320 #endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5321   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5322   end2 = JUMP(SLJIT_JUMP);
5323   }
5324 
5325 mainloop = LABEL();
5326 
5327 /* Increasing the STR_PTR here requires one less jump in the most common case. */
5328 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5329 if (common->utf && !common->invalid_utf) readuchar = TRUE;
5330 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5331 if (newlinecheck) readuchar = TRUE;
5332 
5333 if (readuchar)
5334   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5335 
5336 if (newlinecheck)
5337   CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
5338 
5339 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5340 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5341 #if PCRE2_CODE_UNIT_WIDTH == 8
5342 if (common->invalid_utf)
5343   {
5344   /* Skip continuation code units. */
5345   loop = LABEL();
5346   jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5347   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5348   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5349   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5350   CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x40, loop);
5351   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5352   JUMPHERE(jump);
5353   }
5354 else if (common->utf)
5355   {
5356   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5357   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5358   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5359   JUMPHERE(jump);
5360   }
5361 #elif PCRE2_CODE_UNIT_WIDTH == 16
5362 if (common->invalid_utf)
5363   {
5364   /* Skip continuation code units. */
5365   loop = LABEL();
5366   jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5367   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5368   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5369   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5370   CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400, loop);
5371   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5372   JUMPHERE(jump);
5373   }
5374 else if (common->utf)
5375   {
5376   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5377 
5378   if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
5379     {
5380     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5381     OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5382     CMOV(SLJIT_LESS, STR_PTR, TMP2, 0);
5383     }
5384   else
5385     {
5386     OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5387     OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
5388     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5389     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5390     }
5391   }
5392 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
5393 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5394 JUMPHERE(start);
5395 
5396 if (newlinecheck)
5397   {
5398   JUMPHERE(end);
5399   JUMPHERE(end2);
5400   }
5401 
5402 return mainloop;
5403 }
5404 
5405 
add_prefix_char(PCRE2_UCHAR chr,fast_forward_char_data * chars,BOOL last)5406 static SLJIT_INLINE void add_prefix_char(PCRE2_UCHAR chr, fast_forward_char_data *chars, BOOL last)
5407 {
5408 sljit_u32 i, count = chars->count;
5409 
5410 if (count == 255)
5411   return;
5412 
5413 if (count == 0)
5414   {
5415   chars->count = 1;
5416   chars->chars[0] = chr;
5417 
5418   if (last)
5419     chars->last_count = 1;
5420   return;
5421   }
5422 
5423 for (i = 0; i < count; i++)
5424   if (chars->chars[i] == chr)
5425     return;
5426 
5427 if (count >= MAX_DIFF_CHARS)
5428   {
5429   chars->count = 255;
5430   return;
5431   }
5432 
5433 chars->chars[count] = chr;
5434 chars->count = count + 1;
5435 
5436 if (last)
5437   chars->last_count++;
5438 }
5439 
scan_prefix(compiler_common * common,PCRE2_SPTR cc,fast_forward_char_data * chars,int max_chars,sljit_u32 * rec_count)5440 static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars, int max_chars, sljit_u32 *rec_count)
5441 {
5442 /* Recursive function, which scans prefix literals. */
5443 BOOL last, any, class, caseless;
5444 int len, repeat, len_save, consumed = 0;
5445 sljit_u32 chr; /* Any unicode character. */
5446 sljit_u8 *bytes, *bytes_end, byte;
5447 PCRE2_SPTR alternative, cc_save, oc;
5448 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5449 PCRE2_UCHAR othercase[4];
5450 #elif defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
5451 PCRE2_UCHAR othercase[2];
5452 #else
5453 PCRE2_UCHAR othercase[1];
5454 #endif
5455 
5456 repeat = 1;
5457 while (TRUE)
5458   {
5459   if (*rec_count == 0)
5460     return 0;
5461   (*rec_count)--;
5462 
5463   last = TRUE;
5464   any = FALSE;
5465   class = FALSE;
5466   caseless = FALSE;
5467 
5468   switch (*cc)
5469     {
5470     case OP_CHARI:
5471     caseless = TRUE;
5472     /* Fall through */
5473     case OP_CHAR:
5474     last = FALSE;
5475     cc++;
5476     break;
5477 
5478     case OP_SOD:
5479     case OP_SOM:
5480     case OP_SET_SOM:
5481     case OP_NOT_WORD_BOUNDARY:
5482     case OP_WORD_BOUNDARY:
5483     case OP_EODN:
5484     case OP_EOD:
5485     case OP_CIRC:
5486     case OP_CIRCM:
5487     case OP_DOLL:
5488     case OP_DOLLM:
5489     /* Zero width assertions. */
5490     cc++;
5491     continue;
5492 
5493     case OP_ASSERT:
5494     case OP_ASSERT_NOT:
5495     case OP_ASSERTBACK:
5496     case OP_ASSERTBACK_NOT:
5497     case OP_ASSERT_NA:
5498     case OP_ASSERTBACK_NA:
5499     cc = bracketend(cc);
5500     continue;
5501 
5502     case OP_PLUSI:
5503     case OP_MINPLUSI:
5504     case OP_POSPLUSI:
5505     caseless = TRUE;
5506     /* Fall through */
5507     case OP_PLUS:
5508     case OP_MINPLUS:
5509     case OP_POSPLUS:
5510     cc++;
5511     break;
5512 
5513     case OP_EXACTI:
5514     caseless = TRUE;
5515     /* Fall through */
5516     case OP_EXACT:
5517     repeat = GET2(cc, 1);
5518     last = FALSE;
5519     cc += 1 + IMM2_SIZE;
5520     break;
5521 
5522     case OP_QUERYI:
5523     case OP_MINQUERYI:
5524     case OP_POSQUERYI:
5525     caseless = TRUE;
5526     /* Fall through */
5527     case OP_QUERY:
5528     case OP_MINQUERY:
5529     case OP_POSQUERY:
5530     len = 1;
5531     cc++;
5532 #ifdef SUPPORT_UNICODE
5533     if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5534 #endif
5535     max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
5536     if (max_chars == 0)
5537       return consumed;
5538     last = FALSE;
5539     break;
5540 
5541     case OP_KET:
5542     cc += 1 + LINK_SIZE;
5543     continue;
5544 
5545     case OP_ALT:
5546     cc += GET(cc, 1);
5547     continue;
5548 
5549     case OP_ONCE:
5550     case OP_BRA:
5551     case OP_BRAPOS:
5552     case OP_CBRA:
5553     case OP_CBRAPOS:
5554     alternative = cc + GET(cc, 1);
5555     while (*alternative == OP_ALT)
5556       {
5557       max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
5558       if (max_chars == 0)
5559         return consumed;
5560       alternative += GET(alternative, 1);
5561       }
5562 
5563     if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
5564       cc += IMM2_SIZE;
5565     cc += 1 + LINK_SIZE;
5566     continue;
5567 
5568     case OP_CLASS:
5569 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5570     if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
5571       return consumed;
5572 #endif
5573     class = TRUE;
5574     break;
5575 
5576     case OP_NCLASS:
5577 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5578     if (common->utf) return consumed;
5579 #endif
5580     class = TRUE;
5581     break;
5582 
5583 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
5584     case OP_XCLASS:
5585 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5586     if (common->utf) return consumed;
5587 #endif
5588     any = TRUE;
5589     cc += GET(cc, 1);
5590     break;
5591 #endif
5592 
5593     case OP_DIGIT:
5594 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5595     if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
5596       return consumed;
5597 #endif
5598     any = TRUE;
5599     cc++;
5600     break;
5601 
5602     case OP_WHITESPACE:
5603 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5604     if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
5605       return consumed;
5606 #endif
5607     any = TRUE;
5608     cc++;
5609     break;
5610 
5611     case OP_WORDCHAR:
5612 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5613     if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
5614       return consumed;
5615 #endif
5616     any = TRUE;
5617     cc++;
5618     break;
5619 
5620     case OP_NOT:
5621     case OP_NOTI:
5622     cc++;
5623     /* Fall through. */
5624     case OP_NOT_DIGIT:
5625     case OP_NOT_WHITESPACE:
5626     case OP_NOT_WORDCHAR:
5627     case OP_ANY:
5628     case OP_ALLANY:
5629 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5630     if (common->utf) return consumed;
5631 #endif
5632     any = TRUE;
5633     cc++;
5634     break;
5635 
5636 #ifdef SUPPORT_UNICODE
5637     case OP_NOTPROP:
5638     case OP_PROP:
5639 #if PCRE2_CODE_UNIT_WIDTH != 32
5640     if (common->utf) return consumed;
5641 #endif
5642     any = TRUE;
5643     cc += 1 + 2;
5644     break;
5645 #endif
5646 
5647     case OP_TYPEEXACT:
5648     repeat = GET2(cc, 1);
5649     cc += 1 + IMM2_SIZE;
5650     continue;
5651 
5652     case OP_NOTEXACT:
5653     case OP_NOTEXACTI:
5654 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5655     if (common->utf) return consumed;
5656 #endif
5657     any = TRUE;
5658     repeat = GET2(cc, 1);
5659     cc += 1 + IMM2_SIZE + 1;
5660     break;
5661 
5662     default:
5663     return consumed;
5664     }
5665 
5666   if (any)
5667     {
5668     do
5669       {
5670       chars->count = 255;
5671 
5672       consumed++;
5673       if (--max_chars == 0)
5674         return consumed;
5675       chars++;
5676       }
5677     while (--repeat > 0);
5678 
5679     repeat = 1;
5680     continue;
5681     }
5682 
5683   if (class)
5684     {
5685     bytes = (sljit_u8*) (cc + 1);
5686     cc += 1 + 32 / sizeof(PCRE2_UCHAR);
5687 
5688     switch (*cc)
5689       {
5690       case OP_CRSTAR:
5691       case OP_CRMINSTAR:
5692       case OP_CRPOSSTAR:
5693       case OP_CRQUERY:
5694       case OP_CRMINQUERY:
5695       case OP_CRPOSQUERY:
5696       max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
5697       if (max_chars == 0)
5698         return consumed;
5699       break;
5700 
5701       default:
5702       case OP_CRPLUS:
5703       case OP_CRMINPLUS:
5704       case OP_CRPOSPLUS:
5705       break;
5706 
5707       case OP_CRRANGE:
5708       case OP_CRMINRANGE:
5709       case OP_CRPOSRANGE:
5710       repeat = GET2(cc, 1);
5711       if (repeat <= 0)
5712         return consumed;
5713       break;
5714       }
5715 
5716     do
5717       {
5718       if (bytes[31] & 0x80)
5719         chars->count = 255;
5720       else if (chars->count != 255)
5721         {
5722         bytes_end = bytes + 32;
5723         chr = 0;
5724         do
5725           {
5726           byte = *bytes++;
5727           SLJIT_ASSERT((chr & 0x7) == 0);
5728           if (byte == 0)
5729             chr += 8;
5730           else
5731             {
5732             do
5733               {
5734               if ((byte & 0x1) != 0)
5735                 add_prefix_char(chr, chars, TRUE);
5736               byte >>= 1;
5737               chr++;
5738               }
5739             while (byte != 0);
5740             chr = (chr + 7) & ~7;
5741             }
5742           }
5743         while (chars->count != 255 && bytes < bytes_end);
5744         bytes = bytes_end - 32;
5745         }
5746 
5747       consumed++;
5748       if (--max_chars == 0)
5749         return consumed;
5750       chars++;
5751       }
5752     while (--repeat > 0);
5753 
5754     switch (*cc)
5755       {
5756       case OP_CRSTAR:
5757       case OP_CRMINSTAR:
5758       case OP_CRPOSSTAR:
5759       return consumed;
5760 
5761       case OP_CRQUERY:
5762       case OP_CRMINQUERY:
5763       case OP_CRPOSQUERY:
5764       cc++;
5765       break;
5766 
5767       case OP_CRRANGE:
5768       case OP_CRMINRANGE:
5769       case OP_CRPOSRANGE:
5770       if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
5771         return consumed;
5772       cc += 1 + 2 * IMM2_SIZE;
5773       break;
5774       }
5775 
5776     repeat = 1;
5777     continue;
5778     }
5779 
5780   len = 1;
5781 #ifdef SUPPORT_UNICODE
5782   if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5783 #endif
5784 
5785   if (caseless && char_has_othercase(common, cc))
5786     {
5787 #ifdef SUPPORT_UNICODE
5788     if (common->utf)
5789       {
5790       GETCHAR(chr, cc);
5791       if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
5792         return consumed;
5793       }
5794     else
5795 #endif
5796       {
5797       chr = *cc;
5798 #ifdef SUPPORT_UNICODE
5799       if (common->ucp && chr > 127)
5800         othercase[0] = UCD_OTHERCASE(chr);
5801       else
5802 #endif
5803         othercase[0] = TABLE_GET(chr, common->fcc, chr);
5804       }
5805     }
5806   else
5807     {
5808     caseless = FALSE;
5809     othercase[0] = 0; /* Stops compiler warning - PH */
5810     }
5811 
5812   len_save = len;
5813   cc_save = cc;
5814   while (TRUE)
5815     {
5816     oc = othercase;
5817     do
5818       {
5819       len--;
5820       consumed++;
5821 
5822       chr = *cc;
5823       add_prefix_char(*cc, chars, len == 0);
5824 
5825       if (caseless)
5826         add_prefix_char(*oc, chars, len == 0);
5827 
5828       if (--max_chars == 0)
5829         return consumed;
5830       chars++;
5831       cc++;
5832       oc++;
5833       }
5834     while (len > 0);
5835 
5836     if (--repeat == 0)
5837       break;
5838 
5839     len = len_save;
5840     cc = cc_save;
5841     }
5842 
5843   repeat = 1;
5844   if (last)
5845     return consumed;
5846   }
5847 }
5848 
5849 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
jumpto_if_not_utf_char_start(struct sljit_compiler * compiler,sljit_s32 reg,struct sljit_label * label)5850 static void jumpto_if_not_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg, struct sljit_label *label)
5851 {
5852 #if PCRE2_CODE_UNIT_WIDTH == 8
5853 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
5854 CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0x80, label);
5855 #elif PCRE2_CODE_UNIT_WIDTH == 16
5856 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
5857 CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00, label);
5858 #else
5859 #error "Unknown code width"
5860 #endif
5861 }
5862 #endif
5863 
5864 #include "pcre2_jit_simd_inc.h"
5865 
5866 #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
5867 
check_fast_forward_char_pair_simd(compiler_common * common,fast_forward_char_data * chars,int max)5868 static BOOL check_fast_forward_char_pair_simd(compiler_common *common, fast_forward_char_data *chars, int max)
5869 {
5870   sljit_s32 i, j, max_i = 0, max_j = 0;
5871   sljit_u32 max_pri = 0;
5872   PCRE2_UCHAR a1, a2, a_pri, b1, b2, b_pri;
5873 
5874   for (i = max - 1; i >= 1; i--)
5875     {
5876     if (chars[i].last_count > 2)
5877       {
5878       a1 = chars[i].chars[0];
5879       a2 = chars[i].chars[1];
5880       a_pri = chars[i].last_count;
5881 
5882       j = i - max_fast_forward_char_pair_offset();
5883       if (j < 0)
5884         j = 0;
5885 
5886       while (j < i)
5887         {
5888         b_pri = chars[j].last_count;
5889         if (b_pri > 2 && a_pri + b_pri >= max_pri)
5890           {
5891           b1 = chars[j].chars[0];
5892           b2 = chars[j].chars[1];
5893 
5894           if (a1 != b1 && a1 != b2 && a2 != b1 && a2 != b2)
5895             {
5896             max_pri = a_pri + b_pri;
5897             max_i = i;
5898             max_j = j;
5899             }
5900           }
5901         j++;
5902         }
5903       }
5904     }
5905 
5906 if (max_pri == 0)
5907   return FALSE;
5908 
5909 fast_forward_char_pair_simd(common, max_i, chars[max_i].chars[0], chars[max_i].chars[1], max_j, chars[max_j].chars[0], chars[max_j].chars[1]);
5910 return TRUE;
5911 }
5912 
5913 #endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
5914 
fast_forward_first_char2(compiler_common * common,PCRE2_UCHAR char1,PCRE2_UCHAR char2,sljit_s32 offset)5915 static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
5916 {
5917 DEFINE_COMPILER;
5918 struct sljit_label *start;
5919 struct sljit_jump *match;
5920 struct sljit_jump *partial_quit;
5921 PCRE2_UCHAR mask;
5922 BOOL has_match_end = (common->match_end_ptr != 0);
5923 
5924 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);
5925 
5926 if (has_match_end)
5927   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
5928 
5929 if (offset > 0)
5930   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
5931 
5932 if (has_match_end)
5933   {
5934   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
5935 
5936   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
5937   OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
5938   CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
5939   }
5940 
5941 #ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
5942 
5943 if (JIT_HAS_FAST_FORWARD_CHAR_SIMD)
5944   {
5945   fast_forward_char_simd(common, char1, char2, offset);
5946 
5947   if (offset > 0)
5948     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
5949 
5950   if (has_match_end)
5951     OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
5952   return;
5953   }
5954 
5955 #endif
5956 
5957 start = LABEL();
5958 
5959 partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5960 if (common->mode == PCRE2_JIT_COMPLETE)
5961   add_jump(compiler, &common->failed_match, partial_quit);
5962 
5963 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5964 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5965 
5966 if (char1 == char2)
5967   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1, start);
5968 else
5969   {
5970   mask = char1 ^ char2;
5971   if (is_powerof2(mask))
5972     {
5973     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
5974     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask, start);
5975     }
5976   else
5977     {
5978     match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
5979     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char2, start);
5980     JUMPHERE(match);
5981     }
5982   }
5983 
5984 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5985 if (common->utf && offset > 0)
5986   {
5987   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-(offset + 1)));
5988   jumpto_if_not_utf_char_start(compiler, TMP1, start);
5989   }
5990 #endif
5991 
5992 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
5993 
5994 if (common->mode != PCRE2_JIT_COMPLETE)
5995   JUMPHERE(partial_quit);
5996 
5997 if (has_match_end)
5998   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
5999 }
6000 
fast_forward_first_n_chars(compiler_common * common)6001 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
6002 {
6003 DEFINE_COMPILER;
6004 struct sljit_label *start;
6005 struct sljit_jump *match;
6006 fast_forward_char_data chars[MAX_N_CHARS];
6007 sljit_s32 offset;
6008 PCRE2_UCHAR mask;
6009 PCRE2_UCHAR *char_set, *char_set_end;
6010 int i, max, from;
6011 int range_right = -1, range_len;
6012 sljit_u8 *update_table = NULL;
6013 BOOL in_range;
6014 sljit_u32 rec_count;
6015 
6016 for (i = 0; i < MAX_N_CHARS; i++)
6017   {
6018   chars[i].count = 0;
6019   chars[i].last_count = 0;
6020   }
6021 
6022 rec_count = 10000;
6023 max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
6024 
6025 if (max < 1)
6026   return FALSE;
6027 
6028 /* Convert last_count to priority. */
6029 for (i = 0; i < max; i++)
6030   {
6031   SLJIT_ASSERT(chars[i].count > 0 && chars[i].last_count <= chars[i].count);
6032 
6033   if (chars[i].count == 1)
6034     {
6035     chars[i].last_count = (chars[i].last_count == 1) ? 7 : 5;
6036     /* Simplifies algorithms later. */
6037     chars[i].chars[1] = chars[i].chars[0];
6038     }
6039   else if (chars[i].count == 2)
6040     {
6041     SLJIT_ASSERT(chars[i].chars[0] != chars[i].chars[1]);
6042 
6043     if (is_powerof2(chars[i].chars[0] ^ chars[i].chars[1]))
6044       chars[i].last_count = (chars[i].last_count == 2) ? 6 : 4;
6045     else
6046       chars[i].last_count = (chars[i].last_count == 2) ? 3 : 2;
6047     }
6048   else
6049     chars[i].last_count = (chars[i].count == 255) ? 0 : 1;
6050   }
6051 
6052 #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6053 if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && check_fast_forward_char_pair_simd(common, chars, max))
6054   return TRUE;
6055 #endif
6056 
6057 in_range = FALSE;
6058 /* Prevent compiler "uninitialized" warning */
6059 from = 0;
6060 range_len = 4 /* minimum length */ - 1;
6061 for (i = 0; i <= max; i++)
6062   {
6063   if (in_range && (i - from) > range_len && (chars[i - 1].count < 255))
6064     {
6065     range_len = i - from;
6066     range_right = i - 1;
6067     }
6068 
6069   if (i < max && chars[i].count < 255)
6070     {
6071     SLJIT_ASSERT(chars[i].count > 0);
6072     if (!in_range)
6073       {
6074       in_range = TRUE;
6075       from = i;
6076       }
6077     }
6078   else
6079     in_range = FALSE;
6080   }
6081 
6082 if (range_right >= 0)
6083   {
6084   update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
6085   if (update_table == NULL)
6086     return TRUE;
6087   memset(update_table, IN_UCHARS(range_len), 256);
6088 
6089   for (i = 0; i < range_len; i++)
6090     {
6091     SLJIT_ASSERT(chars[range_right - i].count > 0 && chars[range_right - i].count < 255);
6092 
6093     char_set = chars[range_right - i].chars;
6094     char_set_end = char_set + chars[range_right - i].count;
6095     do
6096       {
6097       if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
6098         update_table[(*char_set) & 0xff] = IN_UCHARS(i);
6099       char_set++;
6100       }
6101     while (char_set < char_set_end);
6102     }
6103   }
6104 
6105 offset = -1;
6106 /* Scan forward. */
6107 for (i = 0; i < max; i++)
6108   {
6109   if (range_right == i)
6110     continue;
6111 
6112   if (offset == -1)
6113     {
6114     if (chars[i].last_count >= 2)
6115       offset = i;
6116     }
6117   else if (chars[offset].last_count < chars[i].last_count)
6118     offset = i;
6119   }
6120 
6121 SLJIT_ASSERT(offset == -1 || (chars[offset].count >= 1 && chars[offset].count <= 2));
6122 
6123 if (range_right < 0)
6124   {
6125   if (offset < 0)
6126     return FALSE;
6127   /* Works regardless the value is 1 or 2. */
6128   fast_forward_first_char2(common, chars[offset].chars[0], chars[offset].chars[1], offset);
6129   return TRUE;
6130   }
6131 
6132 SLJIT_ASSERT(range_right != offset);
6133 
6134 if (common->match_end_ptr != 0)
6135   {
6136   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6137   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6138   OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6139   add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6140   OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
6141   CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
6142   }
6143 else
6144   {
6145   OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6146   add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6147   }
6148 
6149 SLJIT_ASSERT(range_right >= 0);
6150 
6151 if (!HAS_VIRTUAL_REGISTERS)
6152   OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
6153 
6154 start = LABEL();
6155 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6156 
6157 #if PCRE2_CODE_UNIT_WIDTH == 8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
6158 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
6159 #else
6160 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
6161 #endif
6162 
6163 if (!HAS_VIRTUAL_REGISTERS)
6164   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
6165 else
6166   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
6167 
6168 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6169 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
6170 
6171 if (offset >= 0)
6172   {
6173   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
6174   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6175 
6176   if (chars[offset].count == 1)
6177     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0], start);
6178   else
6179     {
6180     mask = chars[offset].chars[0] ^ chars[offset].chars[1];
6181     if (is_powerof2(mask))
6182       {
6183       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
6184       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0] | mask, start);
6185       }
6186     else
6187       {
6188       match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0]);
6189       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[1], start);
6190       JUMPHERE(match);
6191       }
6192     }
6193   }
6194 
6195 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6196 if (common->utf && offset != 0)
6197   {
6198   if (offset < 0)
6199     {
6200     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6201     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6202     }
6203   else
6204     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6205 
6206   jumpto_if_not_utf_char_start(compiler, TMP1, start);
6207 
6208   if (offset < 0)
6209     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6210   }
6211 #endif
6212 
6213 if (offset >= 0)
6214   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6215 
6216 if (common->match_end_ptr != 0)
6217   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6218 else
6219   OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6220 return TRUE;
6221 }
6222 
fast_forward_first_char(compiler_common * common)6223 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common)
6224 {
6225 PCRE2_UCHAR first_char = (PCRE2_UCHAR)(common->re->first_codeunit);
6226 PCRE2_UCHAR oc;
6227 
6228 oc = first_char;
6229 if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0)
6230   {
6231   oc = TABLE_GET(first_char, common->fcc, first_char);
6232 #if defined SUPPORT_UNICODE
6233   if (first_char > 127 && (common->utf || common->ucp))
6234     oc = UCD_OTHERCASE(first_char);
6235 #endif
6236   }
6237 
6238 fast_forward_first_char2(common, first_char, oc, 0);
6239 }
6240 
fast_forward_newline(compiler_common * common)6241 static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
6242 {
6243 DEFINE_COMPILER;
6244 struct sljit_label *loop;
6245 struct sljit_jump *lastchar = NULL;
6246 struct sljit_jump *firstchar;
6247 struct sljit_jump *quit = NULL;
6248 struct sljit_jump *foundcr = NULL;
6249 struct sljit_jump *notfoundnl;
6250 jump_list *newline = NULL;
6251 
6252 if (common->match_end_ptr != 0)
6253   {
6254   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6255   OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6256   }
6257 
6258 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6259   {
6260 #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6261   if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && common->mode == PCRE2_JIT_COMPLETE)
6262     {
6263     if (HAS_VIRTUAL_REGISTERS)
6264       {
6265       OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6266       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6267       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6268       }
6269     else
6270       {
6271       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6272       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6273       }
6274     firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6275 
6276     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6277     OP2U(SLJIT_SUB | SLJIT_SET_Z, STR_PTR, 0, TMP1, 0);
6278     OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_NOT_EQUAL);
6279 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6280     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6281 #endif
6282     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6283 
6284     fast_forward_char_pair_simd(common, 1, common->newline & 0xff, common->newline & 0xff, 0, (common->newline >> 8) & 0xff, (common->newline >> 8) & 0xff);
6285     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6286     }
6287   else
6288 #endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
6289     {
6290     lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6291     if (HAS_VIRTUAL_REGISTERS)
6292       {
6293       OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6294       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6295       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6296       }
6297     else
6298       {
6299       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6300       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6301       }
6302     firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6303 
6304     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
6305     OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, STR_PTR, 0, TMP1, 0);
6306     OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
6307 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6308     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
6309 #endif
6310     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6311 
6312     loop = LABEL();
6313     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6314     quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6315     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6316     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6317     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
6318     CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
6319 
6320     JUMPHERE(quit);
6321     JUMPHERE(lastchar);
6322     }
6323 
6324   JUMPHERE(firstchar);
6325 
6326   if (common->match_end_ptr != 0)
6327     OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6328   return;
6329   }
6330 
6331 if (HAS_VIRTUAL_REGISTERS)
6332   {
6333   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6334   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6335   }
6336 else
6337   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6338 
6339 /* Example: match /^/ to \r\n from offset 1. */
6340 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6341 
6342 if (common->nltype == NLTYPE_ANY)
6343   move_back(common, NULL, FALSE);
6344 else
6345   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6346 
6347 loop = LABEL();
6348 common->ff_newline_shortcut = loop;
6349 
6350 #ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
6351 if (JIT_HAS_FAST_FORWARD_CHAR_SIMD && (common->nltype == NLTYPE_FIXED || common->nltype == NLTYPE_ANYCRLF))
6352   {
6353   if (common->nltype == NLTYPE_ANYCRLF)
6354     {
6355     fast_forward_char_simd(common, CHAR_CR, CHAR_LF, 0);
6356     if (common->mode != PCRE2_JIT_COMPLETE)
6357       lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6358 
6359     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6360     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6361     quit = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6362     }
6363    else
6364     {
6365     fast_forward_char_simd(common, common->newline, common->newline, 0);
6366 
6367     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6368     if (common->mode != PCRE2_JIT_COMPLETE)
6369       {
6370       OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
6371       CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
6372       }
6373     }
6374   }
6375 else
6376 #endif /* JIT_HAS_FAST_FORWARD_CHAR_SIMD */
6377   {
6378   read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
6379   lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6380   if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6381     foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6382   check_newlinechar(common, common->nltype, &newline, FALSE);
6383   set_jumps(newline, loop);
6384   }
6385 
6386 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6387   {
6388   if (quit == NULL)
6389     {
6390     quit = JUMP(SLJIT_JUMP);
6391     JUMPHERE(foundcr);
6392     }
6393 
6394   notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6395   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6396   OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NL);
6397   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
6398 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6399   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6400 #endif
6401   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6402   JUMPHERE(notfoundnl);
6403   JUMPHERE(quit);
6404   }
6405 
6406 if (lastchar)
6407   JUMPHERE(lastchar);
6408 JUMPHERE(firstchar);
6409 
6410 if (common->match_end_ptr != 0)
6411   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6412 }
6413 
6414 static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
6415 
fast_forward_start_bits(compiler_common * common)6416 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common)
6417 {
6418 DEFINE_COMPILER;
6419 const sljit_u8 *start_bits = common->re->start_bitmap;
6420 struct sljit_label *start;
6421 struct sljit_jump *partial_quit;
6422 #if PCRE2_CODE_UNIT_WIDTH != 8
6423 struct sljit_jump *found = NULL;
6424 #endif
6425 jump_list *matches = NULL;
6426 
6427 if (common->match_end_ptr != 0)
6428   {
6429   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6430   OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
6431   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6432   OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
6433   CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
6434   }
6435 
6436 start = LABEL();
6437 
6438 partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6439 if (common->mode == PCRE2_JIT_COMPLETE)
6440   add_jump(compiler, &common->failed_match, partial_quit);
6441 
6442 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6443 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6444 
6445 if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &matches))
6446   {
6447 #if PCRE2_CODE_UNIT_WIDTH != 8
6448   if ((start_bits[31] & 0x80) != 0)
6449     found = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255);
6450   else
6451     CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255, start);
6452 #elif defined SUPPORT_UNICODE
6453   if (common->utf && is_char7_bitset(start_bits, FALSE))
6454     CMPTO(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 127, start);
6455 #endif
6456   OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6457   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6458   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
6459   if (!HAS_VIRTUAL_REGISTERS)
6460     {
6461     OP2(SLJIT_SHL, TMP3, 0, SLJIT_IMM, 1, TMP2, 0);
6462     OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP3, 0);
6463     }
6464   else
6465     {
6466     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6467     OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
6468     }
6469   JUMPTO(SLJIT_ZERO, start);
6470   }
6471 else
6472   set_jumps(matches, start);
6473 
6474 #if PCRE2_CODE_UNIT_WIDTH != 8
6475 if (found != NULL)
6476   JUMPHERE(found);
6477 #endif
6478 
6479 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6480 
6481 if (common->mode != PCRE2_JIT_COMPLETE)
6482   JUMPHERE(partial_quit);
6483 
6484 if (common->match_end_ptr != 0)
6485   OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
6486 }
6487 
search_requested_char(compiler_common * common,PCRE2_UCHAR req_char,BOOL caseless,BOOL has_firstchar)6488 static SLJIT_INLINE jump_list *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar)
6489 {
6490 DEFINE_COMPILER;
6491 struct sljit_label *loop;
6492 struct sljit_jump *toolong;
6493 struct sljit_jump *already_found;
6494 struct sljit_jump *found;
6495 struct sljit_jump *found_oc = NULL;
6496 jump_list *not_found = NULL;
6497 sljit_u32 oc, bit;
6498 
6499 SLJIT_ASSERT(common->req_char_ptr != 0);
6500 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(REQ_CU_MAX) * 100);
6501 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
6502 toolong = CMP(SLJIT_LESS, TMP2, 0, STR_END, 0);
6503 already_found = CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0);
6504 
6505 if (has_firstchar)
6506   OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6507 else
6508   OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
6509 
6510 oc = req_char;
6511 if (caseless)
6512   {
6513   oc = TABLE_GET(req_char, common->fcc, req_char);
6514 #if defined SUPPORT_UNICODE
6515   if (req_char > 127 && (common->utf || common->ucp))
6516     oc = UCD_OTHERCASE(req_char);
6517 #endif
6518   }
6519 
6520 #ifdef JIT_HAS_FAST_REQUESTED_CHAR_SIMD
6521 if (JIT_HAS_FAST_REQUESTED_CHAR_SIMD)
6522   {
6523   not_found = fast_requested_char_simd(common, req_char, oc);
6524   }
6525 else
6526 #endif
6527   {
6528   loop = LABEL();
6529   add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
6530 
6531   OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
6532 
6533   if (req_char == oc)
6534     found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6535   else
6536     {
6537     bit = req_char ^ oc;
6538     if (is_powerof2(bit))
6539       {
6540        OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
6541       found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
6542       }
6543     else
6544       {
6545       found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6546       found_oc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
6547       }
6548     }
6549   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6550   JUMPTO(SLJIT_JUMP, loop);
6551 
6552   JUMPHERE(found);
6553   if (found_oc)
6554     JUMPHERE(found_oc);
6555   }
6556 
6557 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
6558 
6559 JUMPHERE(already_found);
6560 JUMPHERE(toolong);
6561 return not_found;
6562 }
6563 
do_revertframes(compiler_common * common)6564 static void do_revertframes(compiler_common *common)
6565 {
6566 DEFINE_COMPILER;
6567 struct sljit_jump *jump;
6568 struct sljit_label *mainloop;
6569 
6570 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
6571 GET_LOCAL_BASE(TMP1, 0, 0);
6572 
6573 /* Drop frames until we reach STACK_TOP. */
6574 mainloop = LABEL();
6575 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -sizeof(sljit_sw));
6576 jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6577 
6578 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6579 if (HAS_VIRTUAL_REGISTERS)
6580   {
6581   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6582   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
6583   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
6584   }
6585 else
6586   {
6587   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6588   OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
6589   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
6590   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
6591   GET_LOCAL_BASE(TMP1, 0, 0);
6592   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0);
6593   }
6594 JUMPTO(SLJIT_JUMP, mainloop);
6595 
6596 JUMPHERE(jump);
6597 jump = CMP(SLJIT_NOT_ZERO /* SIG_LESS */, TMP2, 0, SLJIT_IMM, 0);
6598 /* End of reverting values. */
6599 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
6600 
6601 JUMPHERE(jump);
6602 OP2(SLJIT_SUB, TMP2, 0, SLJIT_IMM, 0, TMP2, 0);
6603 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6604 if (HAS_VIRTUAL_REGISTERS)
6605   {
6606   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6607   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
6608   }
6609 else
6610   {
6611   OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6612   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
6613   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0);
6614   }
6615 JUMPTO(SLJIT_JUMP, mainloop);
6616 }
6617 
check_wordboundary(compiler_common * common)6618 static void check_wordboundary(compiler_common *common)
6619 {
6620 DEFINE_COMPILER;
6621 struct sljit_jump *skipread;
6622 jump_list *skipread_list = NULL;
6623 #ifdef SUPPORT_UNICODE
6624 struct sljit_label *valid_utf;
6625 jump_list *invalid_utf1 = NULL;
6626 #endif /* SUPPORT_UNICODE */
6627 jump_list *invalid_utf2 = NULL;
6628 #if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE
6629 struct sljit_jump *jump;
6630 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 || SUPPORT_UNICODE */
6631 
6632 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
6633 
6634 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6635 /* Get type of the previous char, and put it to TMP3. */
6636 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6637 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6638 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
6639 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6640 
6641 #ifdef SUPPORT_UNICODE
6642 if (common->invalid_utf)
6643   {
6644   peek_char_back(common, READ_CHAR_MAX, &invalid_utf1);
6645 
6646   if (common->mode != PCRE2_JIT_COMPLETE)
6647     {
6648     OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
6649     OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
6650     move_back(common, NULL, TRUE);
6651     check_start_used_ptr(common);
6652     OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
6653     OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
6654     }
6655   }
6656 else
6657 #endif /* SUPPORT_UNICODE */
6658   {
6659   if (common->mode == PCRE2_JIT_COMPLETE)
6660     peek_char_back(common, READ_CHAR_MAX, NULL);
6661   else
6662     {
6663     move_back(common, NULL, TRUE);
6664     check_start_used_ptr(common);
6665     read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR);
6666     }
6667   }
6668 
6669 /* Testing char type. */
6670 #ifdef SUPPORT_UNICODE
6671 if (common->ucp)
6672   {
6673   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
6674   jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
6675   add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6676   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
6677   OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
6678   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6679   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
6680   OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
6681   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6682   JUMPHERE(jump);
6683   OP1(SLJIT_MOV, TMP3, 0, TMP2, 0);
6684   }
6685 else
6686 #endif /* SUPPORT_UNICODE */
6687   {
6688 #if PCRE2_CODE_UNIT_WIDTH != 8
6689   jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6690 #elif defined SUPPORT_UNICODE
6691   /* Here TMP3 has already been zeroed. */
6692   jump = NULL;
6693   if (common->utf)
6694     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6695 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6696   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
6697   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
6698   OP2(SLJIT_AND, TMP3, 0, TMP1, 0, SLJIT_IMM, 1);
6699 #if PCRE2_CODE_UNIT_WIDTH != 8
6700   JUMPHERE(jump);
6701 #elif defined SUPPORT_UNICODE
6702   if (jump != NULL)
6703     JUMPHERE(jump);
6704 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6705   }
6706 JUMPHERE(skipread);
6707 
6708 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6709 check_str_end(common, &skipread_list);
6710 peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf2);
6711 
6712 /* Testing char type. This is a code duplication. */
6713 #ifdef SUPPORT_UNICODE
6714 
6715 valid_utf = LABEL();
6716 
6717 if (common->ucp)
6718   {
6719   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
6720   jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
6721   add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6722   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
6723   OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
6724   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6725   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
6726   OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
6727   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6728   JUMPHERE(jump);
6729   }
6730 else
6731 #endif /* SUPPORT_UNICODE */
6732   {
6733 #if PCRE2_CODE_UNIT_WIDTH != 8
6734   /* TMP2 may be destroyed by peek_char. */
6735   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6736   jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6737 #elif defined SUPPORT_UNICODE
6738   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6739   jump = NULL;
6740   if (common->utf)
6741     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6742 #endif
6743   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
6744   OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
6745   OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
6746 #if PCRE2_CODE_UNIT_WIDTH != 8
6747   JUMPHERE(jump);
6748 #elif defined SUPPORT_UNICODE
6749   if (jump != NULL)
6750     JUMPHERE(jump);
6751 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6752   }
6753 set_jumps(skipread_list, LABEL());
6754 
6755 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6756 OP2(SLJIT_XOR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, TMP3, 0);
6757 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6758 
6759 #ifdef SUPPORT_UNICODE
6760 if (common->invalid_utf)
6761   {
6762   set_jumps(invalid_utf1, LABEL());
6763 
6764   peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, NULL);
6765   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR, valid_utf);
6766 
6767   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6768   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, -1);
6769   OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6770 
6771   set_jumps(invalid_utf2, LABEL());
6772   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6773   OP1(SLJIT_MOV, TMP2, 0, TMP3, 0);
6774   OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6775   }
6776 #endif /* SUPPORT_UNICODE */
6777 }
6778 
optimize_class_ranges(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)6779 static BOOL optimize_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6780 {
6781 /* May destroy TMP1. */
6782 DEFINE_COMPILER;
6783 int ranges[MAX_CLASS_RANGE_SIZE];
6784 sljit_u8 bit, cbit, all;
6785 int i, byte, length = 0;
6786 
6787 bit = bits[0] & 0x1;
6788 /* All bits will be zero or one (since bit is zero or one). */
6789 all = -bit;
6790 
6791 for (i = 0; i < 256; )
6792   {
6793   byte = i >> 3;
6794   if ((i & 0x7) == 0 && bits[byte] == all)
6795     i += 8;
6796   else
6797     {
6798     cbit = (bits[byte] >> (i & 0x7)) & 0x1;
6799     if (cbit != bit)
6800       {
6801       if (length >= MAX_CLASS_RANGE_SIZE)
6802         return FALSE;
6803       ranges[length] = i;
6804       length++;
6805       bit = cbit;
6806       all = -cbit;
6807       }
6808     i++;
6809     }
6810   }
6811 
6812 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
6813   {
6814   if (length >= MAX_CLASS_RANGE_SIZE)
6815     return FALSE;
6816   ranges[length] = 256;
6817   length++;
6818   }
6819 
6820 if (length < 0 || length > 4)
6821   return FALSE;
6822 
6823 bit = bits[0] & 0x1;
6824 if (invert) bit ^= 0x1;
6825 
6826 /* No character is accepted. */
6827 if (length == 0 && bit == 0)
6828   add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6829 
6830 switch(length)
6831   {
6832   case 0:
6833   /* When bit != 0, all characters are accepted. */
6834   return TRUE;
6835 
6836   case 1:
6837   add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6838   return TRUE;
6839 
6840   case 2:
6841   if (ranges[0] + 1 != ranges[1])
6842     {
6843     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6844     add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6845     }
6846   else
6847     add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6848   return TRUE;
6849 
6850   case 3:
6851   if (bit != 0)
6852     {
6853     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
6854     if (ranges[0] + 1 != ranges[1])
6855       {
6856       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6857       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6858       }
6859     else
6860       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6861     return TRUE;
6862     }
6863 
6864   add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
6865   if (ranges[1] + 1 != ranges[2])
6866     {
6867     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
6868     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
6869     }
6870   else
6871     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
6872   return TRUE;
6873 
6874   case 4:
6875   if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
6876       && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
6877       && (ranges[1] & (ranges[2] - ranges[0])) == 0
6878       && is_powerof2(ranges[2] - ranges[0]))
6879     {
6880     SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
6881     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
6882     if (ranges[2] + 1 != ranges[3])
6883       {
6884       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
6885       add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
6886       }
6887     else
6888       add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
6889     return TRUE;
6890     }
6891 
6892   if (bit != 0)
6893     {
6894     i = 0;
6895     if (ranges[0] + 1 != ranges[1])
6896       {
6897       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6898       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6899       i = ranges[0];
6900       }
6901     else
6902       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6903 
6904     if (ranges[2] + 1 != ranges[3])
6905       {
6906       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
6907       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
6908       }
6909     else
6910       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
6911     return TRUE;
6912     }
6913 
6914   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6915   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
6916   if (ranges[1] + 1 != ranges[2])
6917     {
6918     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
6919     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
6920     }
6921   else
6922     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6923   return TRUE;
6924 
6925   default:
6926   SLJIT_UNREACHABLE();
6927   return FALSE;
6928   }
6929 }
6930 
optimize_class_chars(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)6931 static BOOL optimize_class_chars(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6932 {
6933 /* May destroy TMP1. */
6934 DEFINE_COMPILER;
6935 uint16_t char_list[MAX_CLASS_CHARS_SIZE];
6936 uint8_t byte;
6937 sljit_s32 type;
6938 int i, j, k, len, c;
6939 
6940 if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
6941   return FALSE;
6942 
6943 len = 0;
6944 
6945 for (i = 0; i < 32; i++)
6946   {
6947   byte = bits[i];
6948 
6949   if (nclass)
6950     byte = ~byte;
6951 
6952   j = 0;
6953   while (byte != 0)
6954     {
6955     if (byte & 0x1)
6956       {
6957       c = i * 8 + j;
6958 
6959       k = len;
6960 
6961       if ((c & 0x20) != 0)
6962         {
6963         for (k = 0; k < len; k++)
6964           if (char_list[k] == c - 0x20)
6965             {
6966             char_list[k] |= 0x120;
6967             break;
6968             }
6969         }
6970 
6971       if (k == len)
6972         {
6973         if (len >= MAX_CLASS_CHARS_SIZE)
6974           return FALSE;
6975 
6976         char_list[len++] = (uint16_t) c;
6977         }
6978       }
6979 
6980     byte >>= 1;
6981     j++;
6982     }
6983   }
6984 
6985 if (len == 0) return FALSE;  /* Should never occur, but stops analyzers complaining. */
6986 
6987 i = 0;
6988 j = 0;
6989 
6990 if (char_list[0] == 0)
6991   {
6992   i++;
6993   OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0);
6994   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_ZERO);
6995   }
6996 else
6997   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6998 
6999 while (i < len)
7000   {
7001   if ((char_list[i] & 0x100) != 0)
7002     j++;
7003   else
7004     {
7005     OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i]);
7006     CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
7007     }
7008   i++;
7009   }
7010 
7011 if (j != 0)
7012   {
7013   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x20);
7014 
7015   for (i = 0; i < len; i++)
7016     if ((char_list[i] & 0x100) != 0)
7017       {
7018       j--;
7019       OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff);
7020       CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
7021       }
7022   }
7023 
7024 if (invert)
7025   nclass = !nclass;
7026 
7027 type = nclass ? SLJIT_NOT_EQUAL : SLJIT_EQUAL;
7028 add_jump(compiler, backtracks, CMP(type, TMP2, 0, SLJIT_IMM, 0));
7029 return TRUE;
7030 }
7031 
optimize_class(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)7032 static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
7033 {
7034 /* May destroy TMP1. */
7035 if (optimize_class_ranges(common, bits, nclass, invert, backtracks))
7036   return TRUE;
7037 return optimize_class_chars(common, bits, nclass, invert, backtracks);
7038 }
7039 
check_anynewline(compiler_common * common)7040 static void check_anynewline(compiler_common *common)
7041 {
7042 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7043 DEFINE_COMPILER;
7044 
7045 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
7046 
7047 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
7048 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
7049 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7050 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
7051 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7052 #if PCRE2_CODE_UNIT_WIDTH == 8
7053 if (common->utf)
7054   {
7055 #endif
7056   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7057   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7058   OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
7059 #if PCRE2_CODE_UNIT_WIDTH == 8
7060   }
7061 #endif
7062 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7063 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7064 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7065 }
7066 
check_hspace(compiler_common * common)7067 static void check_hspace(compiler_common *common)
7068 {
7069 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7070 DEFINE_COMPILER;
7071 
7072 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
7073 
7074 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x09);
7075 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7076 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x20);
7077 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7078 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xa0);
7079 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7080 #if PCRE2_CODE_UNIT_WIDTH == 8
7081 if (common->utf)
7082   {
7083 #endif
7084   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7085   OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x1680);
7086   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7087   OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e);
7088   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7089   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
7090   OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
7091   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7092   OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
7093   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7094   OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
7095   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7096   OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
7097 #if PCRE2_CODE_UNIT_WIDTH == 8
7098   }
7099 #endif
7100 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7101 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7102 
7103 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7104 }
7105 
check_vspace(compiler_common * common)7106 static void check_vspace(compiler_common *common)
7107 {
7108 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7109 DEFINE_COMPILER;
7110 
7111 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
7112 
7113 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
7114 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
7115 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7116 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
7117 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7118 #if PCRE2_CODE_UNIT_WIDTH == 8
7119 if (common->utf)
7120   {
7121 #endif
7122   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7123   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7124   OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
7125 #if PCRE2_CODE_UNIT_WIDTH == 8
7126   }
7127 #endif
7128 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7129 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7130 
7131 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7132 }
7133 
do_casefulcmp(compiler_common * common)7134 static void do_casefulcmp(compiler_common *common)
7135 {
7136 DEFINE_COMPILER;
7137 struct sljit_jump *jump;
7138 struct sljit_label *label;
7139 int char1_reg;
7140 int char2_reg;
7141 
7142 if (HAS_VIRTUAL_REGISTERS)
7143   {
7144   char1_reg = STR_END;
7145   char2_reg = STACK_TOP;
7146   }
7147 else
7148   {
7149   char1_reg = TMP3;
7150   char2_reg = RETURN_ADDR;
7151   }
7152 
7153 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7154 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7155 
7156 if (char1_reg == STR_END)
7157   {
7158   OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0);
7159   OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
7160   }
7161 
7162 if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7163   {
7164   label = LABEL();
7165   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7166   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7167   jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7168   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7169   JUMPTO(SLJIT_NOT_ZERO, label);
7170 
7171   JUMPHERE(jump);
7172   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7173   }
7174 else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7175   {
7176   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7177   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7178 
7179   label = LABEL();
7180   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7181   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7182   jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7183   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7184   JUMPTO(SLJIT_NOT_ZERO, label);
7185 
7186   JUMPHERE(jump);
7187   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7188   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7189   }
7190 else
7191   {
7192   label = LABEL();
7193   OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7194   OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7195   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7196   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7197   jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7198   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7199   JUMPTO(SLJIT_NOT_ZERO, label);
7200 
7201   JUMPHERE(jump);
7202   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7203   }
7204 
7205 if (char1_reg == STR_END)
7206   {
7207   OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0);
7208   OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);
7209   }
7210 
7211 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7212 }
7213 
do_caselesscmp(compiler_common * common)7214 static void do_caselesscmp(compiler_common *common)
7215 {
7216 DEFINE_COMPILER;
7217 struct sljit_jump *jump;
7218 struct sljit_label *label;
7219 int char1_reg = STR_END;
7220 int char2_reg;
7221 int lcc_table;
7222 int opt_type = 0;
7223 
7224 if (HAS_VIRTUAL_REGISTERS)
7225   {
7226   char2_reg = STACK_TOP;
7227   lcc_table = STACK_LIMIT;
7228   }
7229 else
7230   {
7231   char2_reg = RETURN_ADDR;
7232   lcc_table = TMP3;
7233   }
7234 
7235 if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7236   opt_type = 1;
7237 else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7238   opt_type = 2;
7239 
7240 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7241 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7242 
7243 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0);
7244 
7245 if (char2_reg == STACK_TOP)
7246   {
7247   OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0);
7248   OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0);
7249   }
7250 
7251 OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
7252 
7253 if (opt_type == 1)
7254   {
7255   label = LABEL();
7256   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7257   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7258   }
7259 else if (opt_type == 2)
7260   {
7261   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7262   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7263 
7264   label = LABEL();
7265   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7266   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7267   }
7268 else
7269   {
7270   label = LABEL();
7271   OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7272   OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7273   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7274   }
7275 
7276 #if PCRE2_CODE_UNIT_WIDTH != 8
7277 jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255);
7278 #endif
7279 OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0);
7280 #if PCRE2_CODE_UNIT_WIDTH != 8
7281 JUMPHERE(jump);
7282 jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255);
7283 #endif
7284 OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0);
7285 #if PCRE2_CODE_UNIT_WIDTH != 8
7286 JUMPHERE(jump);
7287 #endif
7288 
7289 if (opt_type == 0)
7290   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7291 
7292 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7293 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7294 JUMPTO(SLJIT_NOT_ZERO, label);
7295 
7296 JUMPHERE(jump);
7297 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7298 
7299 if (opt_type == 2)
7300   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7301 
7302 if (char2_reg == STACK_TOP)
7303   {
7304   OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0);
7305   OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0);
7306   }
7307 
7308 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
7309 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7310 }
7311 
byte_sequence_compare(compiler_common * common,BOOL caseless,PCRE2_SPTR cc,compare_context * context,jump_list ** backtracks)7312 static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc,
7313     compare_context *context, jump_list **backtracks)
7314 {
7315 DEFINE_COMPILER;
7316 unsigned int othercasebit = 0;
7317 PCRE2_SPTR othercasechar = NULL;
7318 #ifdef SUPPORT_UNICODE
7319 int utflength;
7320 #endif
7321 
7322 if (caseless && char_has_othercase(common, cc))
7323   {
7324   othercasebit = char_get_othercase_bit(common, cc);
7325   SLJIT_ASSERT(othercasebit);
7326   /* Extracting bit difference info. */
7327 #if PCRE2_CODE_UNIT_WIDTH == 8
7328   othercasechar = cc + (othercasebit >> 8);
7329   othercasebit &= 0xff;
7330 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7331   /* Note that this code only handles characters in the BMP. If there
7332   ever are characters outside the BMP whose othercase differs in only one
7333   bit from itself (there currently are none), this code will need to be
7334   revised for PCRE2_CODE_UNIT_WIDTH == 32. */
7335   othercasechar = cc + (othercasebit >> 9);
7336   if ((othercasebit & 0x100) != 0)
7337     othercasebit = (othercasebit & 0xff) << 8;
7338   else
7339     othercasebit &= 0xff;
7340 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7341   }
7342 
7343 if (context->sourcereg == -1)
7344   {
7345 #if PCRE2_CODE_UNIT_WIDTH == 8
7346 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7347   if (context->length >= 4)
7348     OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7349   else if (context->length >= 2)
7350     OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7351   else
7352 #endif
7353     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7354 #elif PCRE2_CODE_UNIT_WIDTH == 16
7355 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7356   if (context->length >= 4)
7357     OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7358   else
7359 #endif
7360     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7361 #elif PCRE2_CODE_UNIT_WIDTH == 32
7362   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7363 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7364   context->sourcereg = TMP2;
7365   }
7366 
7367 #ifdef SUPPORT_UNICODE
7368 utflength = 1;
7369 if (common->utf && HAS_EXTRALEN(*cc))
7370   utflength += GET_EXTRALEN(*cc);
7371 
7372 do
7373   {
7374 #endif
7375 
7376   context->length -= IN_UCHARS(1);
7377 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7378 
7379   /* Unaligned read is supported. */
7380   if (othercasebit != 0 && othercasechar == cc)
7381     {
7382     context->c.asuchars[context->ucharptr] = *cc | othercasebit;
7383     context->oc.asuchars[context->ucharptr] = othercasebit;
7384     }
7385   else
7386     {
7387     context->c.asuchars[context->ucharptr] = *cc;
7388     context->oc.asuchars[context->ucharptr] = 0;
7389     }
7390   context->ucharptr++;
7391 
7392 #if PCRE2_CODE_UNIT_WIDTH == 8
7393   if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
7394 #else
7395   if (context->ucharptr >= 2 || context->length == 0)
7396 #endif
7397     {
7398     if (context->length >= 4)
7399       OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7400     else if (context->length >= 2)
7401       OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7402 #if PCRE2_CODE_UNIT_WIDTH == 8
7403     else if (context->length >= 1)
7404       OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7405 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7406     context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7407 
7408     switch(context->ucharptr)
7409       {
7410       case 4 / sizeof(PCRE2_UCHAR):
7411       if (context->oc.asint != 0)
7412         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
7413       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
7414       break;
7415 
7416       case 2 / sizeof(PCRE2_UCHAR):
7417       if (context->oc.asushort != 0)
7418         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
7419       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
7420       break;
7421 
7422 #if PCRE2_CODE_UNIT_WIDTH == 8
7423       case 1:
7424       if (context->oc.asbyte != 0)
7425         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
7426       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
7427       break;
7428 #endif
7429 
7430       default:
7431       SLJIT_UNREACHABLE();
7432       break;
7433       }
7434     context->ucharptr = 0;
7435     }
7436 
7437 #else
7438 
7439   /* Unaligned read is unsupported or in 32 bit mode. */
7440   if (context->length >= 1)
7441     OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7442 
7443   context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7444 
7445   if (othercasebit != 0 && othercasechar == cc)
7446     {
7447     OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
7448     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
7449     }
7450   else
7451     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
7452 
7453 #endif
7454 
7455   cc++;
7456 #ifdef SUPPORT_UNICODE
7457   utflength--;
7458   }
7459 while (utflength > 0);
7460 #endif
7461 
7462 return cc;
7463 }
7464 
7465 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
7466 
7467 #define SET_TYPE_OFFSET(value) \
7468   if ((value) != typeoffset) \
7469     { \
7470     if ((value) < typeoffset) \
7471       OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
7472     else \
7473       OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
7474     } \
7475   typeoffset = (value);
7476 
7477 #define SET_CHAR_OFFSET(value) \
7478   if ((value) != charoffset) \
7479     { \
7480     if ((value) < charoffset) \
7481       OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
7482     else \
7483       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
7484     } \
7485   charoffset = (value);
7486 
7487 static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr);
7488 
7489 #ifdef SUPPORT_UNICODE
7490 #define XCLASS_SAVE_CHAR 0x001
7491 #define XCLASS_CHAR_SAVED 0x002
7492 #define XCLASS_HAS_TYPE 0x004
7493 #define XCLASS_HAS_SCRIPT 0x008
7494 #define XCLASS_HAS_SCRIPT_EXTENSION 0x010
7495 #define XCLASS_HAS_BOOL 0x020
7496 #define XCLASS_HAS_BIDICL 0x040
7497 #define XCLASS_NEEDS_UCD (XCLASS_HAS_TYPE | XCLASS_HAS_SCRIPT | XCLASS_HAS_SCRIPT_EXTENSION | XCLASS_HAS_BOOL | XCLASS_HAS_BIDICL)
7498 #define XCLASS_SCRIPT_EXTENSION_NOTPROP 0x080
7499 #define XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR 0x100
7500 #define XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0 0x200
7501 
7502 #endif /* SUPPORT_UNICODE */
7503 
compile_xclass_matchingpath(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks)7504 static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
7505 {
7506 DEFINE_COMPILER;
7507 jump_list *found = NULL;
7508 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
7509 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
7510 struct sljit_jump *jump = NULL;
7511 PCRE2_SPTR ccbegin;
7512 int compares, invertcmp, numberofcmps;
7513 #if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7514 BOOL utf = common->utf;
7515 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
7516 
7517 #ifdef SUPPORT_UNICODE
7518 sljit_u32 unicode_status = 0;
7519 int typereg = TMP1;
7520 const sljit_u32 *other_cases;
7521 sljit_uw typeoffset;
7522 #endif /* SUPPORT_UNICODE */
7523 
7524 /* Scanning the necessary info. */
7525 cc++;
7526 ccbegin = cc;
7527 compares = 0;
7528 
7529 if (cc[-1] & XCL_MAP)
7530   {
7531   min = 0;
7532   cc += 32 / sizeof(PCRE2_UCHAR);
7533   }
7534 
7535 while (*cc != XCL_END)
7536   {
7537   compares++;
7538   if (*cc == XCL_SINGLE)
7539     {
7540     cc ++;
7541     GETCHARINCTEST(c, cc);
7542     if (c > max) max = c;
7543     if (c < min) min = c;
7544 #ifdef SUPPORT_UNICODE
7545     unicode_status |= XCLASS_SAVE_CHAR;
7546 #endif /* SUPPORT_UNICODE */
7547     }
7548   else if (*cc == XCL_RANGE)
7549     {
7550     cc ++;
7551     GETCHARINCTEST(c, cc);
7552     if (c < min) min = c;
7553     GETCHARINCTEST(c, cc);
7554     if (c > max) max = c;
7555 #ifdef SUPPORT_UNICODE
7556     unicode_status |= XCLASS_SAVE_CHAR;
7557 #endif /* SUPPORT_UNICODE */
7558     }
7559 #ifdef SUPPORT_UNICODE
7560   else
7561     {
7562     SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7563     cc++;
7564     if (*cc == PT_CLIST && cc[-1] == XCL_PROP)
7565       {
7566       other_cases = PRIV(ucd_caseless_sets) + cc[1];
7567       while (*other_cases != NOTACHAR)
7568         {
7569         if (*other_cases > max) max = *other_cases;
7570         if (*other_cases < min) min = *other_cases;
7571         other_cases++;
7572         }
7573       }
7574     else
7575       {
7576       max = READ_CHAR_MAX;
7577       min = 0;
7578       }
7579 
7580     switch(*cc)
7581       {
7582       case PT_ANY:
7583       /* Any either accepts everything or ignored. */
7584       if (cc[-1] == XCL_PROP)
7585         {
7586         compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
7587         if (list == backtracks)
7588           add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7589         return;
7590         }
7591       break;
7592 
7593       case PT_LAMP:
7594       case PT_GC:
7595       case PT_PC:
7596       case PT_ALNUM:
7597       unicode_status |= XCLASS_HAS_TYPE;
7598       break;
7599 
7600       case PT_SCX:
7601       unicode_status |= XCLASS_HAS_SCRIPT_EXTENSION;
7602       if (cc[-1] == XCL_NOTPROP)
7603         {
7604         unicode_status |= XCLASS_SCRIPT_EXTENSION_NOTPROP;
7605         break;
7606         }
7607       compares++;
7608       /* Fall through */
7609 
7610       case PT_SC:
7611       unicode_status |= XCLASS_HAS_SCRIPT;
7612       break;
7613 
7614       case PT_SPACE:
7615       case PT_PXSPACE:
7616       case PT_WORD:
7617       case PT_PXGRAPH:
7618       case PT_PXPRINT:
7619       case PT_PXPUNCT:
7620       unicode_status |= XCLASS_SAVE_CHAR | XCLASS_HAS_TYPE;
7621       break;
7622 
7623       case PT_CLIST:
7624       case PT_UCNC:
7625       unicode_status |= XCLASS_SAVE_CHAR;
7626       break;
7627 
7628       case PT_BOOL:
7629       unicode_status |= XCLASS_HAS_BOOL;
7630       break;
7631 
7632       case PT_BIDICL:
7633       unicode_status |= XCLASS_HAS_BIDICL;
7634       break;
7635 
7636       default:
7637       SLJIT_UNREACHABLE();
7638       break;
7639       }
7640     cc += 2;
7641     }
7642 #endif /* SUPPORT_UNICODE */
7643   }
7644 SLJIT_ASSERT(compares > 0);
7645 
7646 /* We are not necessary in utf mode even in 8 bit mode. */
7647 cc = ccbegin;
7648 if ((cc[-1] & XCL_NOT) != 0)
7649   read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR);
7650 else
7651   {
7652 #ifdef SUPPORT_UNICODE
7653   read_char(common, min, max, (unicode_status & XCLASS_NEEDS_UCD) ? backtracks : NULL, 0);
7654 #else /* !SUPPORT_UNICODE */
7655   read_char(common, min, max, NULL, 0);
7656 #endif /* SUPPORT_UNICODE */
7657   }
7658 
7659 if ((cc[-1] & XCL_HASPROP) == 0)
7660   {
7661   if ((cc[-1] & XCL_MAP) != 0)
7662     {
7663     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7664     if (!optimize_class(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
7665       {
7666       OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7667       OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7668       OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7669       OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7670       OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
7671       add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
7672       }
7673 
7674     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7675     JUMPHERE(jump);
7676 
7677     cc += 32 / sizeof(PCRE2_UCHAR);
7678     }
7679   else
7680     {
7681     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
7682     add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
7683     }
7684   }
7685 else if ((cc[-1] & XCL_MAP) != 0)
7686   {
7687   OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7688 #ifdef SUPPORT_UNICODE
7689   unicode_status |= XCLASS_CHAR_SAVED;
7690 #endif /* SUPPORT_UNICODE */
7691   if (!optimize_class(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
7692     {
7693 #if PCRE2_CODE_UNIT_WIDTH == 8
7694     jump = NULL;
7695     if (common->utf)
7696 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7697       jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7698 
7699     OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7700     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7701     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7702     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7703     OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
7704     add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
7705 
7706 #if PCRE2_CODE_UNIT_WIDTH == 8
7707     if (common->utf)
7708 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7709       JUMPHERE(jump);
7710     }
7711 
7712   OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7713   cc += 32 / sizeof(PCRE2_UCHAR);
7714   }
7715 
7716 #ifdef SUPPORT_UNICODE
7717 if (unicode_status & XCLASS_NEEDS_UCD)
7718   {
7719   if ((unicode_status & (XCLASS_SAVE_CHAR | XCLASS_CHAR_SAVED)) == XCLASS_SAVE_CHAR)
7720     OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7721 
7722 #if PCRE2_CODE_UNIT_WIDTH == 32
7723   if (!common->utf)
7724     {
7725     jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
7726     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
7727     JUMPHERE(jump);
7728     }
7729 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
7730 
7731   OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7732   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
7733   OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
7734   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
7735   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7736   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7737   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
7738   OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
7739   OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
7740   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
7741   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
7742 
7743   ccbegin = cc;
7744 
7745   if (unicode_status & XCLASS_HAS_BIDICL)
7746     {
7747     OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
7748     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BIDICLASS_SHIFT);
7749 
7750     while (*cc != XCL_END)
7751       {
7752       if (*cc == XCL_SINGLE)
7753         {
7754         cc ++;
7755         GETCHARINCTEST(c, cc);
7756         }
7757       else if (*cc == XCL_RANGE)
7758         {
7759         cc ++;
7760         GETCHARINCTEST(c, cc);
7761         GETCHARINCTEST(c, cc);
7762         }
7763       else
7764         {
7765         SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7766         cc++;
7767         if (*cc == PT_BIDICL)
7768           {
7769           compares--;
7770           invertcmp = (compares == 0 && list != backtracks);
7771           if (cc[-1] == XCL_NOTPROP)
7772             invertcmp ^= 0x1;
7773           jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
7774           add_jump(compiler, compares > 0 ? list : backtracks, jump);
7775           }
7776         cc += 2;
7777         }
7778       }
7779 
7780     cc = ccbegin;
7781     }
7782 
7783   if (unicode_status & XCLASS_HAS_BOOL)
7784     {
7785     OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, bprops));
7786     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BPROPS_MASK);
7787     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
7788 
7789     while (*cc != XCL_END)
7790       {
7791       if (*cc == XCL_SINGLE)
7792         {
7793         cc ++;
7794         GETCHARINCTEST(c, cc);
7795         }
7796       else if (*cc == XCL_RANGE)
7797         {
7798         cc ++;
7799         GETCHARINCTEST(c, cc);
7800         GETCHARINCTEST(c, cc);
7801         }
7802       else
7803         {
7804         SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7805         cc++;
7806         if (*cc == PT_BOOL)
7807           {
7808           compares--;
7809           invertcmp = (compares == 0 && list != backtracks);
7810           if (cc[-1] == XCL_NOTPROP)
7811             invertcmp ^= 0x1;
7812 
7813           OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_boolprop_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)1 << (cc[1] & 0x1f));
7814           add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
7815           }
7816         cc += 2;
7817         }
7818       }
7819 
7820     cc = ccbegin;
7821     }
7822 
7823   if (unicode_status & XCLASS_HAS_SCRIPT)
7824     {
7825     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
7826 
7827     while (*cc != XCL_END)
7828       {
7829       if (*cc == XCL_SINGLE)
7830         {
7831         cc ++;
7832         GETCHARINCTEST(c, cc);
7833         }
7834       else if (*cc == XCL_RANGE)
7835         {
7836         cc ++;
7837         GETCHARINCTEST(c, cc);
7838         GETCHARINCTEST(c, cc);
7839         }
7840       else
7841         {
7842         SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7843         cc++;
7844         switch (*cc)
7845           {
7846           case PT_SCX:
7847           if (cc[-1] == XCL_NOTPROP)
7848             break;
7849           /* Fall through */
7850 
7851           case PT_SC:
7852           compares--;
7853           invertcmp = (compares == 0 && list != backtracks);
7854           if (cc[-1] == XCL_NOTPROP)
7855             invertcmp ^= 0x1;
7856 
7857           add_jump(compiler, compares > 0 ? list : backtracks, CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]));
7858           }
7859         cc += 2;
7860         }
7861       }
7862 
7863     cc = ccbegin;
7864     }
7865 
7866   if (unicode_status & XCLASS_HAS_SCRIPT_EXTENSION)
7867     {
7868     OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
7869     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_SCRIPTX_MASK);
7870     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
7871 
7872     if (unicode_status & XCLASS_SCRIPT_EXTENSION_NOTPROP)
7873       {
7874       if (unicode_status & XCLASS_HAS_TYPE)
7875         {
7876         if (unicode_status & XCLASS_SAVE_CHAR)
7877           {
7878           OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP2, 0);
7879           unicode_status |= XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0;
7880           }
7881         else
7882           {
7883           OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0);
7884           unicode_status |= XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR;
7885           }
7886         }
7887       OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
7888       }
7889 
7890     while (*cc != XCL_END)
7891       {
7892       if (*cc == XCL_SINGLE)
7893         {
7894         cc ++;
7895         GETCHARINCTEST(c, cc);
7896         }
7897       else if (*cc == XCL_RANGE)
7898         {
7899         cc ++;
7900         GETCHARINCTEST(c, cc);
7901         GETCHARINCTEST(c, cc);
7902         }
7903       else
7904         {
7905         SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7906         cc++;
7907         if (*cc == PT_SCX)
7908           {
7909           compares--;
7910           invertcmp = (compares == 0 && list != backtracks);
7911 
7912           jump = NULL;
7913           if (cc[-1] == XCL_NOTPROP)
7914             {
7915             jump = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, (int)cc[1]);
7916             if (invertcmp)
7917               {
7918               add_jump(compiler, backtracks, jump);
7919               jump = NULL;
7920               }
7921             invertcmp ^= 0x1;
7922             }
7923 
7924           OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_script_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)1 << (cc[1] & 0x1f));
7925           add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
7926 
7927           if (jump != NULL)
7928             JUMPHERE(jump);
7929           }
7930         cc += 2;
7931         }
7932       }
7933 
7934     if (unicode_status & XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0)
7935       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7936     else if (unicode_status & XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR)
7937       OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0);
7938     cc = ccbegin;
7939     }
7940 
7941   if (unicode_status & XCLASS_SAVE_CHAR)
7942     OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7943 
7944   if (unicode_status & XCLASS_HAS_TYPE)
7945     {
7946     if (unicode_status & XCLASS_SAVE_CHAR)
7947       typereg = RETURN_ADDR;
7948 
7949     OP1(SLJIT_MOV_U8, typereg, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7950     }
7951   }
7952 #endif /* SUPPORT_UNICODE */
7953 
7954 /* Generating code. */
7955 charoffset = 0;
7956 numberofcmps = 0;
7957 #ifdef SUPPORT_UNICODE
7958 typeoffset = 0;
7959 #endif /* SUPPORT_UNICODE */
7960 
7961 while (*cc != XCL_END)
7962   {
7963   compares--;
7964   invertcmp = (compares == 0 && list != backtracks);
7965   jump = NULL;
7966 
7967   if (*cc == XCL_SINGLE)
7968     {
7969     cc ++;
7970     GETCHARINCTEST(c, cc);
7971 
7972     if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
7973       {
7974       OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7975       OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7976       numberofcmps++;
7977       }
7978     else if (numberofcmps > 0)
7979       {
7980       OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7981       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7982       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7983       numberofcmps = 0;
7984       }
7985     else
7986       {
7987       jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7988       numberofcmps = 0;
7989       }
7990     }
7991   else if (*cc == XCL_RANGE)
7992     {
7993     cc ++;
7994     GETCHARINCTEST(c, cc);
7995     SET_CHAR_OFFSET(c);
7996     GETCHARINCTEST(c, cc);
7997 
7998     if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
7999       {
8000       OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8001       OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8002       numberofcmps++;
8003       }
8004     else if (numberofcmps > 0)
8005       {
8006       OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8007       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
8008       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8009       numberofcmps = 0;
8010       }
8011     else
8012       {
8013       jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8014       numberofcmps = 0;
8015       }
8016     }
8017 #ifdef SUPPORT_UNICODE
8018   else
8019     {
8020     SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
8021     if (*cc == XCL_NOTPROP)
8022       invertcmp ^= 0x1;
8023     cc++;
8024     switch(*cc)
8025       {
8026       case PT_ANY:
8027       if (!invertcmp)
8028         jump = JUMP(SLJIT_JUMP);
8029       break;
8030 
8031       case PT_LAMP:
8032       OP2U(SLJIT_SUB | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
8033       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8034       OP2U(SLJIT_SUB | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
8035       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8036       OP2U(SLJIT_SUB | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
8037       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
8038       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8039       break;
8040 
8041       case PT_GC:
8042       c = PRIV(ucp_typerange)[(int)cc[1] * 2];
8043       SET_TYPE_OFFSET(c);
8044       jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
8045       break;
8046 
8047       case PT_PC:
8048       jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
8049       break;
8050 
8051       case PT_SC:
8052       case PT_SCX:
8053       case PT_BOOL:
8054       case PT_BIDICL:
8055       compares++;
8056       /* Do nothing. */
8057       break;
8058 
8059       case PT_SPACE:
8060       case PT_PXSPACE:
8061       SET_CHAR_OFFSET(9);
8062       OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
8063       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8064 
8065       OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
8066       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8067 
8068       OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
8069       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8070 
8071       SET_TYPE_OFFSET(ucp_Zl);
8072       OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
8073       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
8074       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8075       break;
8076 
8077       case PT_WORD:
8078       OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
8079       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8080       /* Fall through. */
8081 
8082       case PT_ALNUM:
8083       SET_TYPE_OFFSET(ucp_Ll);
8084       OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
8085       OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8086       SET_TYPE_OFFSET(ucp_Nd);
8087       OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
8088       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
8089       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8090       break;
8091 
8092       case PT_CLIST:
8093       other_cases = PRIV(ucd_caseless_sets) + cc[1];
8094 
8095       /* At least three characters are required.
8096          Otherwise this case would be handled by the normal code path. */
8097       SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
8098       SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
8099 
8100       /* Optimizing character pairs, if their difference is power of 2. */
8101       if (is_powerof2(other_cases[1] ^ other_cases[0]))
8102         {
8103         if (charoffset == 0)
8104           OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8105         else
8106           {
8107           OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
8108           OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8109           }
8110         OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[1]);
8111         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8112         other_cases += 2;
8113         }
8114       else if (is_powerof2(other_cases[2] ^ other_cases[1]))
8115         {
8116         if (charoffset == 0)
8117           OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
8118         else
8119           {
8120           OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
8121           OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8122           }
8123         OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[2]);
8124         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8125 
8126         OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
8127         OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
8128 
8129         other_cases += 3;
8130         }
8131       else
8132         {
8133         OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
8134         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8135         }
8136 
8137       while (*other_cases != NOTACHAR)
8138         {
8139         OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
8140         OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
8141         }
8142       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8143       break;
8144 
8145       case PT_UCNC:
8146       OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
8147       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8148       OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
8149       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8150       OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
8151       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8152 
8153       SET_CHAR_OFFSET(0xa0);
8154       OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
8155       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8156       SET_CHAR_OFFSET(0);
8157       OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
8158       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
8159       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8160       break;
8161 
8162       case PT_PXGRAPH:
8163       /* C and Z groups are the farthest two groups. */
8164       SET_TYPE_OFFSET(ucp_Ll);
8165       OP2U(SLJIT_SUB | SLJIT_SET_GREATER, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
8166       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
8167 
8168       jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
8169 
8170       /* In case of ucp_Cf, we overwrite the result. */
8171       SET_CHAR_OFFSET(0x2066);
8172       OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
8173       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8174 
8175       OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
8176       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8177 
8178       OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
8179       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8180 
8181       JUMPHERE(jump);
8182       jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
8183       break;
8184 
8185       case PT_PXPRINT:
8186       /* C and Z groups are the farthest two groups. */
8187       SET_TYPE_OFFSET(ucp_Ll);
8188       OP2U(SLJIT_SUB | SLJIT_SET_GREATER, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
8189       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
8190 
8191       OP2U(SLJIT_SUB | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
8192       OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_NOT_EQUAL);
8193 
8194       jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
8195 
8196       /* In case of ucp_Cf, we overwrite the result. */
8197       SET_CHAR_OFFSET(0x2066);
8198       OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
8199       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8200 
8201       OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
8202       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8203 
8204       JUMPHERE(jump);
8205       jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
8206       break;
8207 
8208       case PT_PXPUNCT:
8209       SET_TYPE_OFFSET(ucp_Sc);
8210       OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
8211       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8212 
8213       SET_CHAR_OFFSET(0);
8214       OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x7f);
8215       OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
8216 
8217       SET_TYPE_OFFSET(ucp_Pc);
8218       OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
8219       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
8220       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8221       break;
8222 
8223       default:
8224       SLJIT_UNREACHABLE();
8225       break;
8226       }
8227     cc += 2;
8228     }
8229 #endif /* SUPPORT_UNICODE */
8230 
8231   if (jump != NULL)
8232     add_jump(compiler, compares > 0 ? list : backtracks, jump);
8233   }
8234 
8235 if (found != NULL)
8236   set_jumps(found, LABEL());
8237 }
8238 
8239 #undef SET_TYPE_OFFSET
8240 #undef SET_CHAR_OFFSET
8241 
8242 #endif
8243 
compile_simple_assertion_matchingpath(compiler_common * common,PCRE2_UCHAR type,PCRE2_SPTR cc,jump_list ** backtracks)8244 static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks)
8245 {
8246 DEFINE_COMPILER;
8247 int length;
8248 struct sljit_jump *jump[4];
8249 #ifdef SUPPORT_UNICODE
8250 struct sljit_label *label;
8251 #endif /* SUPPORT_UNICODE */
8252 
8253 switch(type)
8254   {
8255   case OP_SOD:
8256   if (HAS_VIRTUAL_REGISTERS)
8257     {
8258     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8259     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8260     }
8261   else
8262     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8263   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8264   return cc;
8265 
8266   case OP_SOM:
8267   if (HAS_VIRTUAL_REGISTERS)
8268     {
8269     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8270     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
8271     }
8272   else
8273     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
8274   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8275   return cc;
8276 
8277   case OP_NOT_WORD_BOUNDARY:
8278   case OP_WORD_BOUNDARY:
8279   add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
8280 #ifdef SUPPORT_UNICODE
8281   if (common->invalid_utf)
8282     {
8283     add_jump(compiler, backtracks, CMP((type == OP_NOT_WORD_BOUNDARY) ? SLJIT_NOT_EQUAL : SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8284     return cc;
8285     }
8286 #endif /* SUPPORT_UNICODE */
8287   sljit_set_current_flags(compiler, SLJIT_SET_Z);
8288   add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8289   return cc;
8290 
8291   case OP_EODN:
8292   /* Requires rather complex checks. */
8293   jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
8294   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8295     {
8296     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8297     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8298     if (common->mode == PCRE2_JIT_COMPLETE)
8299       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8300     else
8301       {
8302       jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
8303       OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, STR_END, 0);
8304       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
8305       OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
8306       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
8307       add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
8308       check_partial(common, TRUE);
8309       add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8310       JUMPHERE(jump[1]);
8311       }
8312     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8313     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8314     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8315     }
8316   else if (common->nltype == NLTYPE_FIXED)
8317     {
8318     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8319     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8320     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8321     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
8322     }
8323   else
8324     {
8325     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8326     jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
8327     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8328     OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, TMP2, 0, STR_END, 0);
8329     jump[2] = JUMP(SLJIT_GREATER);
8330     add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
8331     /* Equal. */
8332     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8333     jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
8334     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8335 
8336     JUMPHERE(jump[1]);
8337     if (common->nltype == NLTYPE_ANYCRLF)
8338       {
8339       OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8340       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
8341       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
8342       }
8343     else
8344       {
8345       OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
8346       read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
8347       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
8348       add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
8349       sljit_set_current_flags(compiler, SLJIT_SET_Z);
8350       add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
8351       OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
8352       }
8353     JUMPHERE(jump[2]);
8354     JUMPHERE(jump[3]);
8355     }
8356   JUMPHERE(jump[0]);
8357   if (common->mode != PCRE2_JIT_COMPLETE)
8358     check_partial(common, TRUE);
8359   return cc;
8360 
8361   case OP_EOD:
8362   add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8363   if (common->mode != PCRE2_JIT_COMPLETE)
8364     check_partial(common, TRUE);
8365   return cc;
8366 
8367   case OP_DOLL:
8368   if (HAS_VIRTUAL_REGISTERS)
8369     {
8370     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8371     OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8372     }
8373   else
8374     OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8375   add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8376 
8377   if (!common->endonly)
8378     compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
8379   else
8380     {
8381     add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8382     check_partial(common, FALSE);
8383     }
8384   return cc;
8385 
8386   case OP_DOLLM:
8387   jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
8388   if (HAS_VIRTUAL_REGISTERS)
8389     {
8390     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8391     OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8392     }
8393   else
8394     OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8395   add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8396   check_partial(common, FALSE);
8397   jump[0] = JUMP(SLJIT_JUMP);
8398   JUMPHERE(jump[1]);
8399 
8400   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8401     {
8402     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8403     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8404     if (common->mode == PCRE2_JIT_COMPLETE)
8405       add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
8406     else
8407       {
8408       jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
8409       /* STR_PTR = STR_END - IN_UCHARS(1) */
8410       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8411       check_partial(common, TRUE);
8412       add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8413       JUMPHERE(jump[1]);
8414       }
8415 
8416     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8417     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8418     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8419     }
8420   else
8421     {
8422     peek_char(common, common->nlmax, TMP3, 0, NULL);
8423     check_newlinechar(common, common->nltype, backtracks, FALSE);
8424     }
8425   JUMPHERE(jump[0]);
8426   return cc;
8427 
8428   case OP_CIRC:
8429   if (HAS_VIRTUAL_REGISTERS)
8430     {
8431     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8432     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
8433     add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8434     OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8435     add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8436     }
8437   else
8438     {
8439     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8440     add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8441     OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8442     add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8443     }
8444   return cc;
8445 
8446   case OP_CIRCM:
8447   /* TMP2 might be used by peek_char_back. */
8448   if (HAS_VIRTUAL_REGISTERS)
8449     {
8450     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8451     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8452     jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8453     OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8454     }
8455   else
8456     {
8457     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8458     jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8459     OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8460     }
8461   add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8462   jump[0] = JUMP(SLJIT_JUMP);
8463   JUMPHERE(jump[1]);
8464 
8465   if (!common->alt_circumflex)
8466     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8467 
8468   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8469     {
8470     OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8471     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, TMP2, 0));
8472     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
8473     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
8474     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8475     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8476     }
8477   else
8478     {
8479     peek_char_back(common, common->nlmax, backtracks);
8480     check_newlinechar(common, common->nltype, backtracks, FALSE);
8481     }
8482   JUMPHERE(jump[0]);
8483   return cc;
8484 
8485   case OP_REVERSE:
8486   length = GET(cc, 0);
8487   if (length == 0)
8488     return cc + LINK_SIZE;
8489   if (HAS_VIRTUAL_REGISTERS)
8490     {
8491     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8492     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8493     }
8494   else
8495     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8496 #ifdef SUPPORT_UNICODE
8497   if (common->utf)
8498     {
8499     OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, length);
8500     label = LABEL();
8501     add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0));
8502     move_back(common, backtracks, FALSE);
8503     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
8504     JUMPTO(SLJIT_NOT_ZERO, label);
8505     }
8506   else
8507 #endif
8508     {
8509     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
8510     add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0));
8511     }
8512   check_start_used_ptr(common);
8513   return cc + LINK_SIZE;
8514   }
8515 SLJIT_UNREACHABLE();
8516 return cc;
8517 }
8518 
8519 #ifdef SUPPORT_UNICODE
8520 
8521 #if PCRE2_CODE_UNIT_WIDTH != 32
8522 
do_extuni_utf(jit_arguments * args,PCRE2_SPTR cc)8523 static PCRE2_SPTR SLJIT_FUNC do_extuni_utf(jit_arguments *args, PCRE2_SPTR cc)
8524 {
8525 PCRE2_SPTR start_subject = args->begin;
8526 PCRE2_SPTR end_subject = args->end;
8527 int lgb, rgb, ricount;
8528 PCRE2_SPTR prevcc, endcc, bptr;
8529 BOOL first = TRUE;
8530 uint32_t c;
8531 
8532 prevcc = cc;
8533 endcc = NULL;
8534 do
8535   {
8536   GETCHARINC(c, cc);
8537   rgb = UCD_GRAPHBREAK(c);
8538 
8539   if (first)
8540     {
8541     lgb = rgb;
8542     endcc = cc;
8543     first = FALSE;
8544     continue;
8545     }
8546 
8547   if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8548     break;
8549 
8550   /* Not breaking between Regional Indicators is allowed only if there
8551   are an even number of preceding RIs. */
8552 
8553   if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
8554     {
8555     ricount = 0;
8556     bptr = prevcc;
8557 
8558     /* bptr is pointing to the left-hand character */
8559     while (bptr > start_subject)
8560       {
8561       bptr--;
8562       BACKCHAR(bptr);
8563       GETCHAR(c, bptr);
8564 
8565       if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator)
8566         break;
8567 
8568       ricount++;
8569       }
8570 
8571     if ((ricount & 1) != 0) break;  /* Grapheme break required */
8572     }
8573 
8574   /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8575   allows any number of them before a following Extended_Pictographic. */
8576 
8577   if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8578        lgb != ucp_gbExtended_Pictographic)
8579     lgb = rgb;
8580 
8581   prevcc = endcc;
8582   endcc = cc;
8583   }
8584 while (cc < end_subject);
8585 
8586 return endcc;
8587 }
8588 
8589 #endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
8590 
do_extuni_utf_invalid(jit_arguments * args,PCRE2_SPTR cc)8591 static PCRE2_SPTR SLJIT_FUNC do_extuni_utf_invalid(jit_arguments *args, PCRE2_SPTR cc)
8592 {
8593 PCRE2_SPTR start_subject = args->begin;
8594 PCRE2_SPTR end_subject = args->end;
8595 int lgb, rgb, ricount;
8596 PCRE2_SPTR prevcc, endcc, bptr;
8597 BOOL first = TRUE;
8598 uint32_t c;
8599 
8600 prevcc = cc;
8601 endcc = NULL;
8602 do
8603   {
8604   GETCHARINC_INVALID(c, cc, end_subject, break);
8605   rgb = UCD_GRAPHBREAK(c);
8606 
8607   if (first)
8608     {
8609     lgb = rgb;
8610     endcc = cc;
8611     first = FALSE;
8612     continue;
8613     }
8614 
8615   if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8616     break;
8617 
8618   /* Not breaking between Regional Indicators is allowed only if there
8619   are an even number of preceding RIs. */
8620 
8621   if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
8622     {
8623     ricount = 0;
8624     bptr = prevcc;
8625 
8626     /* bptr is pointing to the left-hand character */
8627     while (bptr > start_subject)
8628       {
8629       GETCHARBACK_INVALID(c, bptr, start_subject, break);
8630 
8631       if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator)
8632         break;
8633 
8634       ricount++;
8635       }
8636 
8637     if ((ricount & 1) != 0)
8638       break;  /* Grapheme break required */
8639     }
8640 
8641   /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8642   allows any number of them before a following Extended_Pictographic. */
8643 
8644   if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8645        lgb != ucp_gbExtended_Pictographic)
8646     lgb = rgb;
8647 
8648   prevcc = endcc;
8649   endcc = cc;
8650   }
8651 while (cc < end_subject);
8652 
8653 return endcc;
8654 }
8655 
do_extuni_no_utf(jit_arguments * args,PCRE2_SPTR cc)8656 static PCRE2_SPTR SLJIT_FUNC do_extuni_no_utf(jit_arguments *args, PCRE2_SPTR cc)
8657 {
8658 PCRE2_SPTR start_subject = args->begin;
8659 PCRE2_SPTR end_subject = args->end;
8660 int lgb, rgb, ricount;
8661 PCRE2_SPTR bptr;
8662 uint32_t c;
8663 
8664 /* Patch by PH */
8665 /* GETCHARINC(c, cc); */
8666 c = *cc++;
8667 
8668 #if PCRE2_CODE_UNIT_WIDTH == 32
8669 if (c >= 0x110000)
8670   return NULL;
8671 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8672 lgb = UCD_GRAPHBREAK(c);
8673 
8674 while (cc < end_subject)
8675   {
8676   c = *cc;
8677 #if PCRE2_CODE_UNIT_WIDTH == 32
8678   if (c >= 0x110000)
8679     break;
8680 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8681   rgb = UCD_GRAPHBREAK(c);
8682 
8683   if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8684     break;
8685 
8686   /* Not breaking between Regional Indicators is allowed only if there
8687   are an even number of preceding RIs. */
8688 
8689   if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
8690     {
8691     ricount = 0;
8692     bptr = cc - 1;
8693 
8694     /* bptr is pointing to the left-hand character */
8695     while (bptr > start_subject)
8696       {
8697       bptr--;
8698       c = *bptr;
8699 #if PCRE2_CODE_UNIT_WIDTH == 32
8700       if (c >= 0x110000)
8701         break;
8702 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8703 
8704       if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break;
8705 
8706       ricount++;
8707       }
8708 
8709     if ((ricount & 1) != 0)
8710       break;  /* Grapheme break required */
8711     }
8712 
8713   /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8714   allows any number of them before a following Extended_Pictographic. */
8715 
8716   if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8717        lgb != ucp_gbExtended_Pictographic)
8718     lgb = rgb;
8719 
8720   cc++;
8721   }
8722 
8723 return cc;
8724 }
8725 
8726 #endif /* SUPPORT_UNICODE */
8727 
compile_char1_matchingpath(compiler_common * common,PCRE2_UCHAR type,PCRE2_SPTR cc,jump_list ** backtracks,BOOL check_str_ptr)8728 static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr)
8729 {
8730 DEFINE_COMPILER;
8731 int length;
8732 unsigned int c, oc, bit;
8733 compare_context context;
8734 struct sljit_jump *jump[3];
8735 jump_list *end_list;
8736 #ifdef SUPPORT_UNICODE
8737 PCRE2_UCHAR propdata[5];
8738 #endif /* SUPPORT_UNICODE */
8739 
8740 switch(type)
8741   {
8742   case OP_NOT_DIGIT:
8743   case OP_DIGIT:
8744   /* Digits are usually 0-9, so it is worth to optimize them. */
8745   if (check_str_ptr)
8746     detect_partial_match(common, backtracks);
8747 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8748   if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_digit, FALSE))
8749     read_char7_type(common, backtracks, type == OP_NOT_DIGIT);
8750   else
8751 #endif
8752     read_char8_type(common, backtracks, type == OP_NOT_DIGIT);
8753     /* Flip the starting bit in the negative case. */
8754   OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_digit);
8755   add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8756   return cc;
8757 
8758   case OP_NOT_WHITESPACE:
8759   case OP_WHITESPACE:
8760   if (check_str_ptr)
8761     detect_partial_match(common, backtracks);
8762 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8763   if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_space, FALSE))
8764     read_char7_type(common, backtracks, type == OP_NOT_WHITESPACE);
8765   else
8766 #endif
8767     read_char8_type(common, backtracks, type == OP_NOT_WHITESPACE);
8768   OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_space);
8769   add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8770   return cc;
8771 
8772   case OP_NOT_WORDCHAR:
8773   case OP_WORDCHAR:
8774   if (check_str_ptr)
8775     detect_partial_match(common, backtracks);
8776 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8777   if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_word, FALSE))
8778     read_char7_type(common, backtracks, type == OP_NOT_WORDCHAR);
8779   else
8780 #endif
8781     read_char8_type(common, backtracks, type == OP_NOT_WORDCHAR);
8782   OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_word);
8783   add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8784   return cc;
8785 
8786   case OP_ANY:
8787   if (check_str_ptr)
8788     detect_partial_match(common, backtracks);
8789   read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
8790   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8791     {
8792     jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
8793     end_list = NULL;
8794     if (common->mode != PCRE2_JIT_PARTIAL_HARD)
8795       add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8796     else
8797       check_str_end(common, &end_list);
8798 
8799     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8800     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
8801     set_jumps(end_list, LABEL());
8802     JUMPHERE(jump[0]);
8803     }
8804   else
8805     check_newlinechar(common, common->nltype, backtracks, TRUE);
8806   return cc;
8807 
8808   case OP_ALLANY:
8809   if (check_str_ptr)
8810     detect_partial_match(common, backtracks);
8811 #ifdef SUPPORT_UNICODE
8812   if (common->utf)
8813     {
8814     if (common->invalid_utf)
8815       {
8816       read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR);
8817       return cc;
8818       }
8819 
8820 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
8821     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8822     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8823 #if PCRE2_CODE_UNIT_WIDTH == 8
8824     jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
8825     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
8826     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8827 #elif PCRE2_CODE_UNIT_WIDTH == 16
8828     jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
8829     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
8830     OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xd800);
8831     OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
8832     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8833     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8834 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
8835     JUMPHERE(jump[0]);
8836     return cc;
8837 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
8838     }
8839 #endif /* SUPPORT_UNICODE */
8840   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8841   return cc;
8842 
8843   case OP_ANYBYTE:
8844   if (check_str_ptr)
8845     detect_partial_match(common, backtracks);
8846   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8847   return cc;
8848 
8849 #ifdef SUPPORT_UNICODE
8850   case OP_NOTPROP:
8851   case OP_PROP:
8852   propdata[0] = XCL_HASPROP;
8853   propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
8854   propdata[2] = cc[0];
8855   propdata[3] = cc[1];
8856   propdata[4] = XCL_END;
8857   if (check_str_ptr)
8858     detect_partial_match(common, backtracks);
8859   compile_xclass_matchingpath(common, propdata, backtracks);
8860   return cc + 2;
8861 #endif
8862 
8863   case OP_ANYNL:
8864   if (check_str_ptr)
8865     detect_partial_match(common, backtracks);
8866   read_char(common, common->bsr_nlmin, common->bsr_nlmax, NULL, 0);
8867   jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
8868   /* We don't need to handle soft partial matching case. */
8869   end_list = NULL;
8870   if (common->mode != PCRE2_JIT_PARTIAL_HARD)
8871     add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8872   else
8873     check_str_end(common, &end_list);
8874   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8875   jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
8876   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8877   jump[2] = JUMP(SLJIT_JUMP);
8878   JUMPHERE(jump[0]);
8879   check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
8880   set_jumps(end_list, LABEL());
8881   JUMPHERE(jump[1]);
8882   JUMPHERE(jump[2]);
8883   return cc;
8884 
8885   case OP_NOT_HSPACE:
8886   case OP_HSPACE:
8887   if (check_str_ptr)
8888     detect_partial_match(common, backtracks);
8889 
8890   if (type == OP_NOT_HSPACE)
8891     read_char(common, 0x9, 0x3000, backtracks, READ_CHAR_UPDATE_STR_PTR);
8892   else
8893     read_char(common, 0x9, 0x3000, NULL, 0);
8894 
8895   add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
8896   sljit_set_current_flags(compiler, SLJIT_SET_Z);
8897   add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8898   return cc;
8899 
8900   case OP_NOT_VSPACE:
8901   case OP_VSPACE:
8902   if (check_str_ptr)
8903     detect_partial_match(common, backtracks);
8904 
8905   if (type == OP_NOT_VSPACE)
8906     read_char(common, 0xa, 0x2029, backtracks, READ_CHAR_UPDATE_STR_PTR);
8907   else
8908     read_char(common, 0xa, 0x2029, NULL, 0);
8909 
8910   add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
8911   sljit_set_current_flags(compiler, SLJIT_SET_Z);
8912   add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8913   return cc;
8914 
8915 #ifdef SUPPORT_UNICODE
8916   case OP_EXTUNI:
8917   if (check_str_ptr)
8918     detect_partial_match(common, backtracks);
8919 
8920   SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
8921   OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
8922 
8923 #if PCRE2_CODE_UNIT_WIDTH != 32
8924   sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
8925     common->utf ? (common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_utf)) : SLJIT_FUNC_ADDR(do_extuni_no_utf));
8926   if (common->invalid_utf)
8927     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
8928 #else
8929   sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
8930     common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_no_utf));
8931   if (!common->utf || common->invalid_utf)
8932     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
8933 #endif
8934 
8935   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
8936 
8937   if (common->mode == PCRE2_JIT_PARTIAL_HARD)
8938     {
8939     jump[0] = CMP(SLJIT_LESS, SLJIT_RETURN_REG, 0, STR_END, 0);
8940     /* Since we successfully read a char above, partial matching must occure. */
8941     check_partial(common, TRUE);
8942     JUMPHERE(jump[0]);
8943     }
8944   return cc;
8945 #endif
8946 
8947   case OP_CHAR:
8948   case OP_CHARI:
8949   length = 1;
8950 #ifdef SUPPORT_UNICODE
8951   if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
8952 #endif
8953 
8954   if (check_str_ptr && common->mode != PCRE2_JIT_COMPLETE)
8955     detect_partial_match(common, backtracks);
8956 
8957   if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)
8958     {
8959     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
8960     if (length > 1 || (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE))
8961       add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
8962 
8963     context.length = IN_UCHARS(length);
8964     context.sourcereg = -1;
8965 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
8966     context.ucharptr = 0;
8967 #endif
8968     return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
8969     }
8970 
8971 #ifdef SUPPORT_UNICODE
8972   if (common->utf)
8973     {
8974     GETCHAR(c, cc);
8975     }
8976   else
8977 #endif
8978     c = *cc;
8979 
8980   SLJIT_ASSERT(type == OP_CHARI && char_has_othercase(common, cc));
8981 
8982   if (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE)
8983     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8984 
8985   oc = char_othercase(common, c);
8986   read_char(common, c < oc ? c : oc, c > oc ? c : oc, NULL, 0);
8987 
8988   SLJIT_ASSERT(!is_powerof2(c ^ oc));
8989 
8990   if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
8991     {
8992     OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, oc);
8993     CMOV(SLJIT_EQUAL, TMP1, SLJIT_IMM, c);
8994     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8995     }
8996   else
8997     {
8998     jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
8999     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
9000     JUMPHERE(jump[0]);
9001     }
9002   return cc + length;
9003 
9004   case OP_NOT:
9005   case OP_NOTI:
9006   if (check_str_ptr)
9007     detect_partial_match(common, backtracks);
9008 
9009   length = 1;
9010 #ifdef SUPPORT_UNICODE
9011   if (common->utf)
9012     {
9013 #if PCRE2_CODE_UNIT_WIDTH == 8
9014     c = *cc;
9015     if (c < 128 && !common->invalid_utf)
9016       {
9017       OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
9018       if (type == OP_NOT || !char_has_othercase(common, cc))
9019         add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9020       else
9021         {
9022         /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
9023         OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
9024         add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
9025         }
9026       /* Skip the variable-length character. */
9027       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9028       jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
9029       OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
9030       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
9031       JUMPHERE(jump[0]);
9032       return cc + 1;
9033       }
9034     else
9035 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
9036       {
9037       GETCHARLEN(c, cc, length);
9038       }
9039     }
9040   else
9041 #endif /* SUPPORT_UNICODE */
9042     c = *cc;
9043 
9044   if (type == OP_NOT || !char_has_othercase(common, cc))
9045     {
9046     read_char(common, c, c, backtracks, READ_CHAR_UPDATE_STR_PTR);
9047     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9048     }
9049   else
9050     {
9051     oc = char_othercase(common, c);
9052     read_char(common, c < oc ? c : oc, c > oc ? c : oc, backtracks, READ_CHAR_UPDATE_STR_PTR);
9053     bit = c ^ oc;
9054     if (is_powerof2(bit))
9055       {
9056       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
9057       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
9058       }
9059     else
9060       {
9061       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9062       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
9063       }
9064     }
9065   return cc + length;
9066 
9067   case OP_CLASS:
9068   case OP_NCLASS:
9069   if (check_str_ptr)
9070     detect_partial_match(common, backtracks);
9071 
9072 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
9073   bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
9074   if (type == OP_NCLASS)
9075     read_char(common, 0, bit, backtracks, READ_CHAR_UPDATE_STR_PTR);
9076   else
9077     read_char(common, 0, bit, NULL, 0);
9078 #else
9079   if (type == OP_NCLASS)
9080     read_char(common, 0, 255, backtracks, READ_CHAR_UPDATE_STR_PTR);
9081   else
9082     read_char(common, 0, 255, NULL, 0);
9083 #endif
9084 
9085   if (optimize_class(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
9086     return cc + 32 / sizeof(PCRE2_UCHAR);
9087 
9088 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
9089   jump[0] = NULL;
9090   if (common->utf)
9091     {
9092     jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
9093     if (type == OP_CLASS)
9094       {
9095       add_jump(compiler, backtracks, jump[0]);
9096       jump[0] = NULL;
9097       }
9098     }
9099 #elif PCRE2_CODE_UNIT_WIDTH != 8
9100   jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
9101   if (type == OP_CLASS)
9102     {
9103     add_jump(compiler, backtracks, jump[0]);
9104     jump[0] = NULL;
9105     }
9106 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
9107 
9108   OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
9109   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
9110   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
9111   OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
9112   OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
9113   add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
9114 
9115 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
9116   if (jump[0] != NULL)
9117     JUMPHERE(jump[0]);
9118 #endif
9119   return cc + 32 / sizeof(PCRE2_UCHAR);
9120 
9121 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
9122   case OP_XCLASS:
9123   if (check_str_ptr)
9124     detect_partial_match(common, backtracks);
9125   compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
9126   return cc + GET(cc, 0) - 1;
9127 #endif
9128   }
9129 SLJIT_UNREACHABLE();
9130 return cc;
9131 }
9132 
compile_charn_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,jump_list ** backtracks)9133 static SLJIT_INLINE PCRE2_SPTR compile_charn_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, jump_list **backtracks)
9134 {
9135 /* This function consumes at least one input character. */
9136 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
9137 DEFINE_COMPILER;
9138 PCRE2_SPTR ccbegin = cc;
9139 compare_context context;
9140 int size;
9141 
9142 context.length = 0;
9143 do
9144   {
9145   if (cc >= ccend)
9146     break;
9147 
9148   if (*cc == OP_CHAR)
9149     {
9150     size = 1;
9151 #ifdef SUPPORT_UNICODE
9152     if (common->utf && HAS_EXTRALEN(cc[1]))
9153       size += GET_EXTRALEN(cc[1]);
9154 #endif
9155     }
9156   else if (*cc == OP_CHARI)
9157     {
9158     size = 1;
9159 #ifdef SUPPORT_UNICODE
9160     if (common->utf)
9161       {
9162       if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
9163         size = 0;
9164       else if (HAS_EXTRALEN(cc[1]))
9165         size += GET_EXTRALEN(cc[1]);
9166       }
9167     else
9168 #endif
9169     if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
9170       size = 0;
9171     }
9172   else
9173     size = 0;
9174 
9175   cc += 1 + size;
9176   context.length += IN_UCHARS(size);
9177   }
9178 while (size > 0 && context.length <= 128);
9179 
9180 cc = ccbegin;
9181 if (context.length > 0)
9182   {
9183   /* We have a fixed-length byte sequence. */
9184   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
9185   add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
9186 
9187   context.sourcereg = -1;
9188 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
9189   context.ucharptr = 0;
9190 #endif
9191   do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
9192   return cc;
9193   }
9194 
9195 /* A non-fixed length character will be checked if length == 0. */
9196 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
9197 }
9198 
9199 /* Forward definitions. */
9200 static void compile_matchingpath(compiler_common *, PCRE2_SPTR, PCRE2_SPTR, backtrack_common *);
9201 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
9202 
9203 #define PUSH_BACKTRACK(size, ccstart, error) \
9204   do \
9205     { \
9206     backtrack = sljit_alloc_memory(compiler, (size)); \
9207     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9208       return error; \
9209     memset(backtrack, 0, size); \
9210     backtrack->prev = parent->top; \
9211     backtrack->cc = (ccstart); \
9212     parent->top = backtrack; \
9213     } \
9214   while (0)
9215 
9216 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
9217   do \
9218     { \
9219     backtrack = sljit_alloc_memory(compiler, (size)); \
9220     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9221       return; \
9222     memset(backtrack, 0, size); \
9223     backtrack->prev = parent->top; \
9224     backtrack->cc = (ccstart); \
9225     parent->top = backtrack; \
9226     } \
9227   while (0)
9228 
9229 #define BACKTRACK_AS(type) ((type *)backtrack)
9230 
compile_dnref_search(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks)9231 static void compile_dnref_search(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
9232 {
9233 /* The OVECTOR offset goes to TMP2. */
9234 DEFINE_COMPILER;
9235 int count = GET2(cc, 1 + IMM2_SIZE);
9236 PCRE2_SPTR slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
9237 unsigned int offset;
9238 jump_list *found = NULL;
9239 
9240 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
9241 
9242 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
9243 
9244 count--;
9245 while (count-- > 0)
9246   {
9247   offset = GET2(slot, 0) << 1;
9248   GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9249   add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9250   slot += common->name_entry_size;
9251   }
9252 
9253 offset = GET2(slot, 0) << 1;
9254 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9255 if (backtracks != NULL && !common->unset_backref)
9256   add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9257 
9258 set_jumps(found, LABEL());
9259 }
9260 
compile_ref_matchingpath(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks,BOOL withchecks,BOOL emptyfail)9261 static void compile_ref_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
9262 {
9263 DEFINE_COMPILER;
9264 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9265 int offset = 0;
9266 struct sljit_jump *jump = NULL;
9267 struct sljit_jump *partial;
9268 struct sljit_jump *nopartial;
9269 #if defined SUPPORT_UNICODE
9270 struct sljit_label *loop;
9271 struct sljit_label *caseless_loop;
9272 jump_list *no_match = NULL;
9273 int source_reg = COUNT_MATCH;
9274 int source_end_reg = ARGUMENTS;
9275 int char1_reg = STACK_LIMIT;
9276 #endif /* SUPPORT_UNICODE */
9277 
9278 if (ref)
9279   {
9280   offset = GET2(cc, 1) << 1;
9281   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9282   /* OVECTOR(1) contains the "string begin - 1" constant. */
9283   if (withchecks && !common->unset_backref)
9284     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9285   }
9286 else
9287   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9288 
9289 #if defined SUPPORT_UNICODE
9290 if (common->utf && *cc == OP_REFI)
9291   {
9292   SLJIT_ASSERT(common->iref_ptr != 0);
9293 
9294   if (ref)
9295     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9296   else
9297     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9298 
9299   if (withchecks && emptyfail)
9300     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0));
9301 
9302   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr, source_reg, 0);
9303   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw), source_end_reg, 0);
9304   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2, char1_reg, 0);
9305 
9306   OP1(SLJIT_MOV, source_reg, 0, TMP1, 0);
9307   OP1(SLJIT_MOV, source_end_reg, 0, TMP2, 0);
9308 
9309   loop = LABEL();
9310   jump = CMP(SLJIT_GREATER_EQUAL, source_reg, 0, source_end_reg, 0);
9311   partial = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
9312 
9313   /* Read original character. It must be a valid UTF character. */
9314   OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
9315   OP1(SLJIT_MOV, STR_PTR, 0, source_reg, 0);
9316 
9317   read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR | READ_CHAR_VALID_UTF);
9318 
9319   OP1(SLJIT_MOV, source_reg, 0, STR_PTR, 0);
9320   OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
9321   OP1(SLJIT_MOV, char1_reg, 0, TMP1, 0);
9322 
9323   /* Read second character. */
9324   read_char(common, 0, READ_CHAR_MAX, &no_match, READ_CHAR_UPDATE_STR_PTR);
9325 
9326   CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9327 
9328   OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
9329 
9330   add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
9331 
9332   OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
9333   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
9334   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
9335 
9336   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records));
9337 
9338   OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, other_case));
9339   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, caseset));
9340   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
9341   CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9342 
9343   add_jump(compiler, &no_match, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9344   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
9345   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_caseless_sets));
9346 
9347   caseless_loop = LABEL();
9348   OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9349   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(uint32_t));
9350   OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, TMP1, 0, char1_reg, 0);
9351   JUMPTO(SLJIT_EQUAL, loop);
9352   JUMPTO(SLJIT_LESS, caseless_loop);
9353 
9354   set_jumps(no_match, LABEL());
9355   if (common->mode == PCRE2_JIT_COMPLETE)
9356     JUMPHERE(partial);
9357 
9358   OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9359   OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9360   OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9361   add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9362 
9363   if (common->mode != PCRE2_JIT_COMPLETE)
9364     {
9365     JUMPHERE(partial);
9366     OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9367     OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9368     OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9369 
9370     check_partial(common, FALSE);
9371     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9372     }
9373 
9374   JUMPHERE(jump);
9375   OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9376   OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9377   OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9378   return;
9379   }
9380 else
9381 #endif /* SUPPORT_UNICODE */
9382   {
9383   if (ref)
9384     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
9385   else
9386     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
9387 
9388   if (withchecks)
9389     jump = JUMP(SLJIT_ZERO);
9390 
9391   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
9392   partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
9393   if (common->mode == PCRE2_JIT_COMPLETE)
9394     add_jump(compiler, backtracks, partial);
9395 
9396   add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9397   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9398 
9399   if (common->mode != PCRE2_JIT_COMPLETE)
9400     {
9401     nopartial = JUMP(SLJIT_JUMP);
9402     JUMPHERE(partial);
9403     /* TMP2 -= STR_END - STR_PTR */
9404     OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
9405     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
9406     partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
9407     OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
9408     add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9409     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9410     JUMPHERE(partial);
9411     check_partial(common, FALSE);
9412     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9413     JUMPHERE(nopartial);
9414     }
9415   }
9416 
9417 if (jump != NULL)
9418   {
9419   if (emptyfail)
9420     add_jump(compiler, backtracks, jump);
9421   else
9422     JUMPHERE(jump);
9423   }
9424 }
9425 
compile_ref_iterator_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9426 static SLJIT_INLINE PCRE2_SPTR compile_ref_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9427 {
9428 DEFINE_COMPILER;
9429 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9430 backtrack_common *backtrack;
9431 PCRE2_UCHAR type;
9432 int offset = 0;
9433 struct sljit_label *label;
9434 struct sljit_jump *zerolength;
9435 struct sljit_jump *jump = NULL;
9436 PCRE2_SPTR ccbegin = cc;
9437 int min = 0, max = 0;
9438 BOOL minimize;
9439 
9440 PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
9441 
9442 if (ref)
9443   offset = GET2(cc, 1) << 1;
9444 else
9445   cc += IMM2_SIZE;
9446 type = cc[1 + IMM2_SIZE];
9447 
9448 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
9449 minimize = (type & 0x1) != 0;
9450 switch(type)
9451   {
9452   case OP_CRSTAR:
9453   case OP_CRMINSTAR:
9454   min = 0;
9455   max = 0;
9456   cc += 1 + IMM2_SIZE + 1;
9457   break;
9458   case OP_CRPLUS:
9459   case OP_CRMINPLUS:
9460   min = 1;
9461   max = 0;
9462   cc += 1 + IMM2_SIZE + 1;
9463   break;
9464   case OP_CRQUERY:
9465   case OP_CRMINQUERY:
9466   min = 0;
9467   max = 1;
9468   cc += 1 + IMM2_SIZE + 1;
9469   break;
9470   case OP_CRRANGE:
9471   case OP_CRMINRANGE:
9472   min = GET2(cc, 1 + IMM2_SIZE + 1);
9473   max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
9474   cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
9475   break;
9476   default:
9477   SLJIT_UNREACHABLE();
9478   break;
9479   }
9480 
9481 if (!minimize)
9482   {
9483   if (min == 0)
9484     {
9485     allocate_stack(common, 2);
9486     if (ref)
9487       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9488     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9489     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9490     /* Temporary release of STR_PTR. */
9491     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9492     /* Handles both invalid and empty cases. Since the minimum repeat,
9493     is zero the invalid case is basically the same as an empty case. */
9494     if (ref)
9495       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9496     else
9497       {
9498       compile_dnref_search(common, ccbegin, NULL);
9499       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9500       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9501       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9502       }
9503     /* Restore if not zero length. */
9504     OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9505     }
9506   else
9507     {
9508     allocate_stack(common, 1);
9509     if (ref)
9510       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9511     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9512     if (ref)
9513       {
9514       add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9515       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9516       }
9517     else
9518       {
9519       compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
9520       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9521       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9522       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9523       }
9524     }
9525 
9526   if (min > 1 || max > 1)
9527     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
9528 
9529   label = LABEL();
9530   if (!ref)
9531     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
9532   compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
9533 
9534   if (min > 1 || max > 1)
9535     {
9536     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
9537     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9538     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
9539     if (min > 1)
9540       CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
9541     if (max > 1)
9542       {
9543       jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
9544       allocate_stack(common, 1);
9545       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9546       JUMPTO(SLJIT_JUMP, label);
9547       JUMPHERE(jump);
9548       }
9549     }
9550 
9551   if (max == 0)
9552     {
9553     /* Includes min > 1 case as well. */
9554     allocate_stack(common, 1);
9555     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9556     JUMPTO(SLJIT_JUMP, label);
9557     }
9558 
9559   JUMPHERE(zerolength);
9560   BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9561 
9562   count_match(common);
9563   return cc;
9564   }
9565 
9566 allocate_stack(common, ref ? 2 : 3);
9567 if (ref)
9568   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9569 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9570 if (type != OP_CRMINSTAR)
9571   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9572 
9573 if (min == 0)
9574   {
9575   /* Handles both invalid and empty cases. Since the minimum repeat,
9576   is zero the invalid case is basically the same as an empty case. */
9577   if (ref)
9578     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9579   else
9580     {
9581     compile_dnref_search(common, ccbegin, NULL);
9582     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9583     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9584     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9585     }
9586   /* Length is non-zero, we can match real repeats. */
9587   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9588   jump = JUMP(SLJIT_JUMP);
9589   }
9590 else
9591   {
9592   if (ref)
9593     {
9594     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9595     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9596     }
9597   else
9598     {
9599     compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
9600     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9601     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9602     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9603     }
9604   }
9605 
9606 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9607 if (max > 0)
9608   add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
9609 
9610 if (!ref)
9611   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
9612 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
9613 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9614 
9615 if (min > 1)
9616   {
9617   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9618   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9619   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9620   CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
9621   }
9622 else if (max > 0)
9623   OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
9624 
9625 if (jump != NULL)
9626   JUMPHERE(jump);
9627 JUMPHERE(zerolength);
9628 
9629 count_match(common);
9630 return cc;
9631 }
9632 
compile_recurse_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9633 static SLJIT_INLINE PCRE2_SPTR compile_recurse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9634 {
9635 DEFINE_COMPILER;
9636 backtrack_common *backtrack;
9637 recurse_entry *entry = common->entries;
9638 recurse_entry *prev = NULL;
9639 sljit_sw start = GET(cc, 1);
9640 PCRE2_SPTR start_cc;
9641 BOOL needs_control_head;
9642 
9643 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
9644 
9645 /* Inlining simple patterns. */
9646 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
9647   {
9648   start_cc = common->start + start;
9649   compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
9650   BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
9651   return cc + 1 + LINK_SIZE;
9652   }
9653 
9654 while (entry != NULL)
9655   {
9656   if (entry->start == start)
9657     break;
9658   prev = entry;
9659   entry = entry->next;
9660   }
9661 
9662 if (entry == NULL)
9663   {
9664   entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
9665   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9666     return NULL;
9667   entry->next = NULL;
9668   entry->entry_label = NULL;
9669   entry->backtrack_label = NULL;
9670   entry->entry_calls = NULL;
9671   entry->backtrack_calls = NULL;
9672   entry->start = start;
9673 
9674   if (prev != NULL)
9675     prev->next = entry;
9676   else
9677     common->entries = entry;
9678   }
9679 
9680 BACKTRACK_AS(recurse_backtrack)->entry = entry;
9681 
9682 if (entry->entry_label == NULL)
9683   add_jump(compiler, &entry->entry_calls, JUMP(SLJIT_FAST_CALL));
9684 else
9685   JUMPTO(SLJIT_FAST_CALL, entry->entry_label);
9686 /* Leave if the match is failed. */
9687 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
9688 BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL();
9689 return cc + 1 + LINK_SIZE;
9690 }
9691 
do_callout(struct jit_arguments * arguments,pcre2_callout_block * callout_block,PCRE2_SPTR * jit_ovector)9692 static sljit_s32 SLJIT_FUNC do_callout(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
9693 {
9694 PCRE2_SPTR begin;
9695 PCRE2_SIZE *ovector;
9696 sljit_u32 oveccount, capture_top;
9697 
9698 if (arguments->callout == NULL)
9699   return 0;
9700 
9701 SLJIT_COMPILE_ASSERT(sizeof (PCRE2_SIZE) <= sizeof (sljit_sw), pcre2_size_must_be_lower_than_sljit_sw_size);
9702 
9703 begin = arguments->begin;
9704 ovector = (PCRE2_SIZE*)(callout_block + 1);
9705 oveccount = callout_block->capture_top;
9706 
9707 SLJIT_ASSERT(oveccount >= 1);
9708 
9709 callout_block->version = 2;
9710 callout_block->callout_flags = 0;
9711 
9712 /* Offsets in subject. */
9713 callout_block->subject_length = arguments->end - arguments->begin;
9714 callout_block->start_match = jit_ovector[0] - begin;
9715 callout_block->current_position = (PCRE2_SPTR)callout_block->offset_vector - begin;
9716 callout_block->subject = begin;
9717 
9718 /* Convert and copy the JIT offset vector to the ovector array. */
9719 callout_block->capture_top = 1;
9720 callout_block->offset_vector = ovector;
9721 
9722 ovector[0] = PCRE2_UNSET;
9723 ovector[1] = PCRE2_UNSET;
9724 ovector += 2;
9725 jit_ovector += 2;
9726 capture_top = 1;
9727 
9728 /* Convert pointers to sizes. */
9729 while (--oveccount != 0)
9730   {
9731   capture_top++;
9732 
9733   ovector[0] = (PCRE2_SIZE)(jit_ovector[0] - begin);
9734   ovector[1] = (PCRE2_SIZE)(jit_ovector[1] - begin);
9735 
9736   if (ovector[0] != PCRE2_UNSET)
9737     callout_block->capture_top = capture_top;
9738 
9739   ovector += 2;
9740   jit_ovector += 2;
9741   }
9742 
9743 return (arguments->callout)(callout_block, arguments->callout_data);
9744 }
9745 
9746 #define CALLOUT_ARG_OFFSET(arg) \
9747     SLJIT_OFFSETOF(pcre2_callout_block, arg)
9748 
compile_callout_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9749 static SLJIT_INLINE PCRE2_SPTR compile_callout_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9750 {
9751 DEFINE_COMPILER;
9752 backtrack_common *backtrack;
9753 sljit_s32 mov_opcode;
9754 unsigned int callout_length = (*cc == OP_CALLOUT)
9755     ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);
9756 sljit_sw value1;
9757 sljit_sw value2;
9758 sljit_sw value3;
9759 sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * sizeof(sljit_sw);
9760 
9761 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
9762 
9763 callout_arg_size = (sizeof(pcre2_callout_block) + callout_arg_size + sizeof(sljit_sw) - 1) / sizeof(sljit_sw);
9764 
9765 allocate_stack(common, callout_arg_size);
9766 
9767 SLJIT_ASSERT(common->capture_last_ptr != 0);
9768 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
9769 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9770 value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0;
9771 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1);
9772 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
9773 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_top), SLJIT_IMM, common->re->top_bracket + 1);
9774 
9775 /* These pointer sized fields temporarly stores internal variables. */
9776 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
9777 
9778 if (common->mark_ptr != 0)
9779   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
9780 mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
9781 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 1));
9782 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 1 + LINK_SIZE));
9783 
9784 if (*cc == OP_CALLOUT)
9785   {
9786   value1 = 0;
9787   value2 = 0;
9788   value3 = 0;
9789   }
9790 else
9791   {
9792   value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1);
9793   value2 = (callout_length - (1 + 4*LINK_SIZE + 2));
9794   value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE));
9795   }
9796 
9797 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);
9798 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);
9799 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3);
9800 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
9801 
9802 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
9803 
9804 /* Needed to save important temporary registers. */
9805 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
9806 /* SLJIT_R0 = arguments */
9807 OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
9808 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
9809 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(32, W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_callout));
9810 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
9811 free_stack(common, callout_arg_size);
9812 
9813 /* Check return value. */
9814 OP2U(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
9815 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER));
9816 if (common->abort_label == NULL)
9817   add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL) /* SIG_LESS */);
9818 else
9819   JUMPTO(SLJIT_NOT_EQUAL /* SIG_LESS */, common->abort_label);
9820 return cc + callout_length;
9821 }
9822 
9823 #undef CALLOUT_ARG_SIZE
9824 #undef CALLOUT_ARG_OFFSET
9825 
assert_needs_str_ptr_saving(PCRE2_SPTR cc)9826 static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(PCRE2_SPTR cc)
9827 {
9828 while (TRUE)
9829   {
9830   switch (*cc)
9831     {
9832     case OP_CALLOUT_STR:
9833     cc += GET(cc, 1 + 2*LINK_SIZE);
9834     break;
9835 
9836     case OP_NOT_WORD_BOUNDARY:
9837     case OP_WORD_BOUNDARY:
9838     case OP_CIRC:
9839     case OP_CIRCM:
9840     case OP_DOLL:
9841     case OP_DOLLM:
9842     case OP_CALLOUT:
9843     case OP_ALT:
9844     cc += PRIV(OP_lengths)[*cc];
9845     break;
9846 
9847     case OP_KET:
9848     return FALSE;
9849 
9850     default:
9851     return TRUE;
9852     }
9853   }
9854 }
9855 
compile_assert_matchingpath(compiler_common * common,PCRE2_SPTR cc,assert_backtrack * backtrack,BOOL conditional)9856 static PCRE2_SPTR compile_assert_matchingpath(compiler_common *common, PCRE2_SPTR cc, assert_backtrack *backtrack, BOOL conditional)
9857 {
9858 DEFINE_COMPILER;
9859 int framesize;
9860 int extrasize;
9861 BOOL local_quit_available = FALSE;
9862 BOOL needs_control_head;
9863 int private_data_ptr;
9864 backtrack_common altbacktrack;
9865 PCRE2_SPTR ccbegin;
9866 PCRE2_UCHAR opcode;
9867 PCRE2_UCHAR bra = OP_BRA;
9868 jump_list *tmp = NULL;
9869 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
9870 jump_list **found;
9871 /* Saving previous accept variables. */
9872 BOOL save_local_quit_available = common->local_quit_available;
9873 BOOL save_in_positive_assertion = common->in_positive_assertion;
9874 then_trap_backtrack *save_then_trap = common->then_trap;
9875 struct sljit_label *save_quit_label = common->quit_label;
9876 struct sljit_label *save_accept_label = common->accept_label;
9877 jump_list *save_quit = common->quit;
9878 jump_list *save_positive_assertion_quit = common->positive_assertion_quit;
9879 jump_list *save_accept = common->accept;
9880 struct sljit_jump *jump;
9881 struct sljit_jump *brajump = NULL;
9882 
9883 /* Assert captures then. */
9884 common->then_trap = NULL;
9885 
9886 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
9887   {
9888   SLJIT_ASSERT(!conditional);
9889   bra = *cc;
9890   cc++;
9891   }
9892 private_data_ptr = PRIVATE_DATA(cc);
9893 SLJIT_ASSERT(private_data_ptr != 0);
9894 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
9895 backtrack->framesize = framesize;
9896 backtrack->private_data_ptr = private_data_ptr;
9897 opcode = *cc;
9898 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
9899 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
9900 ccbegin = cc;
9901 cc += GET(cc, 1);
9902 
9903 if (bra == OP_BRAMINZERO)
9904   {
9905   /* This is a braminzero backtrack path. */
9906   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9907   free_stack(common, 1);
9908   brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9909   }
9910 
9911 if (framesize < 0)
9912   {
9913   extrasize = 1;
9914   if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
9915     extrasize = 0;
9916 
9917   if (needs_control_head)
9918     extrasize++;
9919 
9920   if (framesize == no_frame)
9921     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
9922 
9923   if (extrasize > 0)
9924     allocate_stack(common, extrasize);
9925 
9926   if (needs_control_head)
9927     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9928 
9929   if (extrasize > 0)
9930     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9931 
9932   if (needs_control_head)
9933     {
9934     SLJIT_ASSERT(extrasize == 2);
9935     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9936     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9937     }
9938   }
9939 else
9940   {
9941   extrasize = needs_control_head ? 3 : 2;
9942   allocate_stack(common, framesize + extrasize);
9943 
9944   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9945   OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
9946   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
9947   if (needs_control_head)
9948     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9949   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9950 
9951   if (needs_control_head)
9952     {
9953     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
9954     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
9955     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9956     }
9957   else
9958     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9959 
9960   init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize);
9961   }
9962 
9963 memset(&altbacktrack, 0, sizeof(backtrack_common));
9964 if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT))
9965   {
9966   /* Control verbs cannot escape from these asserts. */
9967   local_quit_available = TRUE;
9968   common->local_quit_available = TRUE;
9969   common->quit_label = NULL;
9970   common->quit = NULL;
9971   }
9972 
9973 common->in_positive_assertion = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK);
9974 common->positive_assertion_quit = NULL;
9975 
9976 while (1)
9977   {
9978   common->accept_label = NULL;
9979   common->accept = NULL;
9980   altbacktrack.top = NULL;
9981   altbacktrack.topbacktracks = NULL;
9982 
9983   if (*ccbegin == OP_ALT && extrasize > 0)
9984     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9985 
9986   altbacktrack.cc = ccbegin;
9987   compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
9988   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9989     {
9990     if (local_quit_available)
9991       {
9992       common->local_quit_available = save_local_quit_available;
9993       common->quit_label = save_quit_label;
9994       common->quit = save_quit;
9995       }
9996     common->in_positive_assertion = save_in_positive_assertion;
9997     common->then_trap = save_then_trap;
9998     common->accept_label = save_accept_label;
9999     common->positive_assertion_quit = save_positive_assertion_quit;
10000     common->accept = save_accept;
10001     return NULL;
10002     }
10003   common->accept_label = LABEL();
10004   if (common->accept != NULL)
10005     set_jumps(common->accept, common->accept_label);
10006 
10007   /* Reset stack. */
10008   if (framesize < 0)
10009     {
10010     if (framesize == no_frame)
10011       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10012     else if (extrasize > 0)
10013       free_stack(common, extrasize);
10014 
10015     if (needs_control_head)
10016       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10017     }
10018   else
10019     {
10020     if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
10021       {
10022       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10023       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
10024       if (needs_control_head)
10025         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10026       }
10027     else
10028       {
10029       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10030       if (needs_control_head)
10031         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));
10032       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10033       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
10034       }
10035     }
10036 
10037   if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
10038     {
10039     /* We know that STR_PTR was stored on the top of the stack. */
10040     if (conditional)
10041       {
10042       if (extrasize > 0)
10043         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? STACK(-2) : STACK(-1));
10044       }
10045     else if (bra == OP_BRAZERO)
10046       {
10047       if (framesize < 0)
10048         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
10049       else
10050         {
10051         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
10052         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize));
10053         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10054         }
10055       OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10056       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10057       }
10058     else if (framesize >= 0)
10059       {
10060       /* For OP_BRA and OP_BRAMINZERO. */
10061       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
10062       }
10063     }
10064   add_jump(compiler, found, JUMP(SLJIT_JUMP));
10065 
10066   compile_backtrackingpath(common, altbacktrack.top);
10067   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10068     {
10069     if (local_quit_available)
10070       {
10071       common->local_quit_available = save_local_quit_available;
10072       common->quit_label = save_quit_label;
10073       common->quit = save_quit;
10074       }
10075     common->in_positive_assertion = save_in_positive_assertion;
10076     common->then_trap = save_then_trap;
10077     common->accept_label = save_accept_label;
10078     common->positive_assertion_quit = save_positive_assertion_quit;
10079     common->accept = save_accept;
10080     return NULL;
10081     }
10082   set_jumps(altbacktrack.topbacktracks, LABEL());
10083 
10084   if (*cc != OP_ALT)
10085     break;
10086 
10087   ccbegin = cc;
10088   cc += GET(cc, 1);
10089   }
10090 
10091 if (local_quit_available)
10092   {
10093   SLJIT_ASSERT(common->positive_assertion_quit == NULL);
10094   /* Makes the check less complicated below. */
10095   common->positive_assertion_quit = common->quit;
10096   }
10097 
10098 /* None of them matched. */
10099 if (common->positive_assertion_quit != NULL)
10100   {
10101   jump = JUMP(SLJIT_JUMP);
10102   set_jumps(common->positive_assertion_quit, LABEL());
10103   SLJIT_ASSERT(framesize != no_stack);
10104   if (framesize < 0)
10105     OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
10106   else
10107     {
10108     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10109     add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10110     OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (extrasize + 1) * sizeof(sljit_sw));
10111     }
10112   JUMPHERE(jump);
10113   }
10114 
10115 if (needs_control_head)
10116   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
10117 
10118 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
10119   {
10120   /* Assert is failed. */
10121   if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
10122     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10123 
10124   if (framesize < 0)
10125     {
10126     /* The topmost item should be 0. */
10127     if (bra == OP_BRAZERO)
10128       {
10129       if (extrasize == 2)
10130         free_stack(common, 1);
10131       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10132       }
10133     else if (extrasize > 0)
10134       free_stack(common, extrasize);
10135     }
10136   else
10137     {
10138     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
10139     /* The topmost item should be 0. */
10140     if (bra == OP_BRAZERO)
10141       {
10142       free_stack(common, framesize + extrasize - 1);
10143       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10144       }
10145     else
10146       free_stack(common, framesize + extrasize);
10147     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10148     }
10149   jump = JUMP(SLJIT_JUMP);
10150   if (bra != OP_BRAZERO)
10151     add_jump(compiler, target, jump);
10152 
10153   /* Assert is successful. */
10154   set_jumps(tmp, LABEL());
10155   if (framesize < 0)
10156     {
10157     /* We know that STR_PTR was stored on the top of the stack. */
10158     if (extrasize > 0)
10159       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
10160 
10161     /* Keep the STR_PTR on the top of the stack. */
10162     if (bra == OP_BRAZERO)
10163       {
10164       OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10165       if (extrasize == 2)
10166         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10167       }
10168     else if (bra == OP_BRAMINZERO)
10169       {
10170       OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10171       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10172       }
10173     }
10174   else
10175     {
10176     if (bra == OP_BRA)
10177       {
10178       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10179       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
10180       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
10181       }
10182     else
10183       {
10184       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10185       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
10186       if (extrasize == 2)
10187         {
10188         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10189         if (bra == OP_BRAMINZERO)
10190           OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10191         }
10192       else
10193         {
10194         SLJIT_ASSERT(extrasize == 3);
10195         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10196         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
10197         }
10198       }
10199     }
10200 
10201   if (bra == OP_BRAZERO)
10202     {
10203     backtrack->matchingpath = LABEL();
10204     SET_LABEL(jump, backtrack->matchingpath);
10205     }
10206   else if (bra == OP_BRAMINZERO)
10207     {
10208     JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
10209     JUMPHERE(brajump);
10210     if (framesize >= 0)
10211       {
10212       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10213       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10214       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
10215       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
10216       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10217       }
10218     set_jumps(backtrack->common.topbacktracks, LABEL());
10219     }
10220   }
10221 else
10222   {
10223   /* AssertNot is successful. */
10224   if (framesize < 0)
10225     {
10226     if (extrasize > 0)
10227       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10228 
10229     if (bra != OP_BRA)
10230       {
10231       if (extrasize == 2)
10232         free_stack(common, 1);
10233       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10234       }
10235     else if (extrasize > 0)
10236       free_stack(common, extrasize);
10237     }
10238   else
10239     {
10240     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10241     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
10242     /* The topmost item should be 0. */
10243     if (bra != OP_BRA)
10244       {
10245       free_stack(common, framesize + extrasize - 1);
10246       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10247       }
10248     else
10249       free_stack(common, framesize + extrasize);
10250     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10251     }
10252 
10253   if (bra == OP_BRAZERO)
10254     backtrack->matchingpath = LABEL();
10255   else if (bra == OP_BRAMINZERO)
10256     {
10257     JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
10258     JUMPHERE(brajump);
10259     }
10260 
10261   if (bra != OP_BRA)
10262     {
10263     SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
10264     set_jumps(backtrack->common.topbacktracks, LABEL());
10265     backtrack->common.topbacktracks = NULL;
10266     }
10267   }
10268 
10269 if (local_quit_available)
10270   {
10271   common->local_quit_available = save_local_quit_available;
10272   common->quit_label = save_quit_label;
10273   common->quit = save_quit;
10274   }
10275 common->in_positive_assertion = save_in_positive_assertion;
10276 common->then_trap = save_then_trap;
10277 common->accept_label = save_accept_label;
10278 common->positive_assertion_quit = save_positive_assertion_quit;
10279 common->accept = save_accept;
10280 return cc + 1 + LINK_SIZE;
10281 }
10282 
match_once_common(compiler_common * common,PCRE2_UCHAR ket,int framesize,int private_data_ptr,BOOL has_alternatives,BOOL needs_control_head)10283 static SLJIT_INLINE void match_once_common(compiler_common *common, PCRE2_UCHAR ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
10284 {
10285 DEFINE_COMPILER;
10286 int stacksize;
10287 
10288 if (framesize < 0)
10289   {
10290   if (framesize == no_frame)
10291     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10292   else
10293     {
10294     stacksize = needs_control_head ? 1 : 0;
10295     if (ket != OP_KET || has_alternatives)
10296       stacksize++;
10297 
10298     if (stacksize > 0)
10299       free_stack(common, stacksize);
10300     }
10301 
10302   if (needs_control_head)
10303     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1));
10304 
10305   /* TMP2 which is set here used by OP_KETRMAX below. */
10306   if (ket == OP_KETRMAX)
10307     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10308   else if (ket == OP_KETRMIN)
10309     {
10310     /* Move the STR_PTR to the private_data_ptr. */
10311     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10312     }
10313   }
10314 else
10315   {
10316   stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
10317   OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
10318   if (needs_control_head)
10319     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10320 
10321   if (ket == OP_KETRMAX)
10322     {
10323     /* TMP2 which is set here used by OP_KETRMAX below. */
10324     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10325     }
10326   }
10327 if (needs_control_head)
10328   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
10329 }
10330 
match_capture_common(compiler_common * common,int stacksize,int offset,int private_data_ptr)10331 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
10332 {
10333 DEFINE_COMPILER;
10334 
10335 if (common->capture_last_ptr != 0)
10336   {
10337   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
10338   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10339   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10340   stacksize++;
10341   }
10342 if (common->optimized_cbracket[offset >> 1] == 0)
10343   {
10344   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
10345   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
10346   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10347   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10348   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
10349   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10350   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10351   stacksize += 2;
10352   }
10353 return stacksize;
10354 }
10355 
do_script_run(PCRE2_SPTR ptr,PCRE2_SPTR endptr)10356 static PCRE2_SPTR SLJIT_FUNC do_script_run(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10357 {
10358   if (PRIV(script_run)(ptr, endptr, FALSE))
10359     return endptr;
10360   return NULL;
10361 }
10362 
10363 #ifdef SUPPORT_UNICODE
10364 
do_script_run_utf(PCRE2_SPTR ptr,PCRE2_SPTR endptr)10365 static PCRE2_SPTR SLJIT_FUNC do_script_run_utf(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10366 {
10367   if (PRIV(script_run)(ptr, endptr, TRUE))
10368     return endptr;
10369   return NULL;
10370 }
10371 
10372 #endif /* SUPPORT_UNICODE */
10373 
match_script_run_common(compiler_common * common,int private_data_ptr,backtrack_common * parent)10374 static SLJIT_INLINE void match_script_run_common(compiler_common *common, int private_data_ptr, backtrack_common *parent)
10375 {
10376 DEFINE_COMPILER;
10377 
10378 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
10379 
10380 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10381 #ifdef SUPPORT_UNICODE
10382 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
10383   common->utf ? SLJIT_FUNC_ADDR(do_script_run_utf) : SLJIT_FUNC_ADDR(do_script_run));
10384 #else
10385 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_script_run));
10386 #endif
10387 
10388 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
10389 add_jump(compiler, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
10390 }
10391 
10392 /*
10393   Handling bracketed expressions is probably the most complex part.
10394 
10395   Stack layout naming characters:
10396     S - Push the current STR_PTR
10397     0 - Push a 0 (NULL)
10398     A - Push the current STR_PTR. Needed for restoring the STR_PTR
10399         before the next alternative. Not pushed if there are no alternatives.
10400     M - Any values pushed by the current alternative. Can be empty, or anything.
10401     C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
10402     L - Push the previous local (pointed by localptr) to the stack
10403    () - opional values stored on the stack
10404   ()* - optonal, can be stored multiple times
10405 
10406   The following list shows the regular expression templates, their PCRE byte codes
10407   and stack layout supported by pcre-sljit.
10408 
10409   (?:)                     OP_BRA     | OP_KET                A M
10410   ()                       OP_CBRA    | OP_KET                C M
10411   (?:)+                    OP_BRA     | OP_KETRMAX        0   A M S   ( A M S )*
10412                            OP_SBRA    | OP_KETRMAX        0   L M S   ( L M S )*
10413   (?:)+?                   OP_BRA     | OP_KETRMIN        0   A M S   ( A M S )*
10414                            OP_SBRA    | OP_KETRMIN        0   L M S   ( L M S )*
10415   ()+                      OP_CBRA    | OP_KETRMAX        0   C M S   ( C M S )*
10416                            OP_SCBRA   | OP_KETRMAX        0   C M S   ( C M S )*
10417   ()+?                     OP_CBRA    | OP_KETRMIN        0   C M S   ( C M S )*
10418                            OP_SCBRA   | OP_KETRMIN        0   C M S   ( C M S )*
10419   (?:)?    OP_BRAZERO    | OP_BRA     | OP_KET            S ( A M 0 )
10420   (?:)??   OP_BRAMINZERO | OP_BRA     | OP_KET            S ( A M 0 )
10421   ()?      OP_BRAZERO    | OP_CBRA    | OP_KET            S ( C M 0 )
10422   ()??     OP_BRAMINZERO | OP_CBRA    | OP_KET            S ( C M 0 )
10423   (?:)*    OP_BRAZERO    | OP_BRA     | OP_KETRMAX      S 0 ( A M S )*
10424            OP_BRAZERO    | OP_SBRA    | OP_KETRMAX      S 0 ( L M S )*
10425   (?:)*?   OP_BRAMINZERO | OP_BRA     | OP_KETRMIN      S 0 ( A M S )*
10426            OP_BRAMINZERO | OP_SBRA    | OP_KETRMIN      S 0 ( L M S )*
10427   ()*      OP_BRAZERO    | OP_CBRA    | OP_KETRMAX      S 0 ( C M S )*
10428            OP_BRAZERO    | OP_SCBRA   | OP_KETRMAX      S 0 ( C M S )*
10429   ()*?     OP_BRAMINZERO | OP_CBRA    | OP_KETRMIN      S 0 ( C M S )*
10430            OP_BRAMINZERO | OP_SCBRA   | OP_KETRMIN      S 0 ( C M S )*
10431 
10432 
10433   Stack layout naming characters:
10434     A - Push the alternative index (starting from 0) on the stack.
10435         Not pushed if there is no alternatives.
10436     M - Any values pushed by the current alternative. Can be empty, or anything.
10437 
10438   The next list shows the possible content of a bracket:
10439   (|)     OP_*BRA    | OP_ALT ...         M A
10440   (?()|)  OP_*COND   | OP_ALT             M A
10441   (?>|)   OP_ONCE    | OP_ALT ...         [stack trace] M A
10442                                           Or nothing, if trace is unnecessary
10443 */
10444 
compile_bracket_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)10445 static PCRE2_SPTR compile_bracket_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10446 {
10447 DEFINE_COMPILER;
10448 backtrack_common *backtrack;
10449 PCRE2_UCHAR opcode;
10450 int private_data_ptr = 0;
10451 int offset = 0;
10452 int i, stacksize;
10453 int repeat_ptr = 0, repeat_length = 0;
10454 int repeat_type = 0, repeat_count = 0;
10455 PCRE2_SPTR ccbegin;
10456 PCRE2_SPTR matchingpath;
10457 PCRE2_SPTR slot;
10458 PCRE2_UCHAR bra = OP_BRA;
10459 PCRE2_UCHAR ket;
10460 assert_backtrack *assert;
10461 BOOL has_alternatives;
10462 BOOL needs_control_head = FALSE;
10463 struct sljit_jump *jump;
10464 struct sljit_jump *skip;
10465 struct sljit_label *rmax_label = NULL;
10466 struct sljit_jump *braminzero = NULL;
10467 
10468 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
10469 
10470 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
10471   {
10472   bra = *cc;
10473   cc++;
10474   opcode = *cc;
10475   }
10476 
10477 opcode = *cc;
10478 ccbegin = cc;
10479 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
10480 ket = *matchingpath;
10481 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
10482   {
10483   repeat_ptr = PRIVATE_DATA(matchingpath);
10484   repeat_length = PRIVATE_DATA(matchingpath + 1);
10485   repeat_type = PRIVATE_DATA(matchingpath + 2);
10486   repeat_count = PRIVATE_DATA(matchingpath + 3);
10487   SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
10488   if (repeat_type == OP_UPTO)
10489     ket = OP_KETRMAX;
10490   if (repeat_type == OP_MINUPTO)
10491     ket = OP_KETRMIN;
10492   }
10493 
10494 matchingpath = ccbegin + 1 + LINK_SIZE;
10495 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
10496 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
10497 cc += GET(cc, 1);
10498 
10499 has_alternatives = *cc == OP_ALT;
10500 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
10501   {
10502   SLJIT_COMPILE_ASSERT(OP_DNRREF == OP_RREF + 1 && OP_FALSE == OP_RREF + 2 && OP_TRUE == OP_RREF + 3,
10503     compile_time_checks_must_be_grouped_together);
10504   has_alternatives = ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL) ? FALSE : TRUE;
10505   }
10506 
10507 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
10508   opcode = OP_SCOND;
10509 
10510 if (opcode == OP_CBRA || opcode == OP_SCBRA)
10511   {
10512   /* Capturing brackets has a pre-allocated space. */
10513   offset = GET2(ccbegin, 1 + LINK_SIZE);
10514   if (common->optimized_cbracket[offset] == 0)
10515     {
10516     private_data_ptr = OVECTOR_PRIV(offset);
10517     offset <<= 1;
10518     }
10519   else
10520     {
10521     offset <<= 1;
10522     private_data_ptr = OVECTOR(offset);
10523     }
10524   BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10525   matchingpath += IMM2_SIZE;
10526   }
10527 else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_ONCE || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
10528   {
10529   /* Other brackets simply allocate the next entry. */
10530   private_data_ptr = PRIVATE_DATA(ccbegin);
10531   SLJIT_ASSERT(private_data_ptr != 0);
10532   BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10533   if (opcode == OP_ONCE)
10534     BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
10535   }
10536 
10537 /* Instructions before the first alternative. */
10538 stacksize = 0;
10539 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10540   stacksize++;
10541 if (bra == OP_BRAZERO)
10542   stacksize++;
10543 
10544 if (stacksize > 0)
10545   allocate_stack(common, stacksize);
10546 
10547 stacksize = 0;
10548 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10549   {
10550   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10551   stacksize++;
10552   }
10553 
10554 if (bra == OP_BRAZERO)
10555   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10556 
10557 if (bra == OP_BRAMINZERO)
10558   {
10559   /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
10560   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10561   if (ket != OP_KETRMIN)
10562     {
10563     free_stack(common, 1);
10564     braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10565     }
10566   else if (opcode == OP_ONCE || opcode >= OP_SBRA)
10567     {
10568     jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10569     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10570     /* Nothing stored during the first run. */
10571     skip = JUMP(SLJIT_JUMP);
10572     JUMPHERE(jump);
10573     /* Checking zero-length iteration. */
10574     if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10575       {
10576       /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
10577       braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10578       }
10579     else
10580       {
10581       /* Except when the whole stack frame must be saved. */
10582       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10583       braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));
10584       }
10585     JUMPHERE(skip);
10586     }
10587   else
10588     {
10589     jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10590     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10591     JUMPHERE(jump);
10592     }
10593   }
10594 
10595 if (repeat_type != 0)
10596   {
10597   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
10598   if (repeat_type == OP_EXACT)
10599     rmax_label = LABEL();
10600   }
10601 
10602 if (ket == OP_KETRMIN)
10603   BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10604 
10605 if (ket == OP_KETRMAX)
10606   {
10607   rmax_label = LABEL();
10608   if (has_alternatives && opcode >= OP_BRA && opcode < OP_SBRA && repeat_type == 0)
10609     BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
10610   }
10611 
10612 /* Handling capturing brackets and alternatives. */
10613 if (opcode == OP_ONCE)
10614   {
10615   stacksize = 0;
10616   if (needs_control_head)
10617     {
10618     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10619     stacksize++;
10620     }
10621 
10622   if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10623     {
10624     /* Neither capturing brackets nor recursions are found in the block. */
10625     if (ket == OP_KETRMIN)
10626       {
10627       stacksize += 2;
10628       if (!needs_control_head)
10629         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10630       }
10631     else
10632       {
10633       if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
10634         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10635       if (ket == OP_KETRMAX || has_alternatives)
10636         stacksize++;
10637       }
10638 
10639     if (stacksize > 0)
10640       allocate_stack(common, stacksize);
10641 
10642     stacksize = 0;
10643     if (needs_control_head)
10644       {
10645       stacksize++;
10646       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10647       }
10648 
10649     if (ket == OP_KETRMIN)
10650       {
10651       if (needs_control_head)
10652         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10653       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10654       if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
10655         OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
10656       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
10657       }
10658     else if (ket == OP_KETRMAX || has_alternatives)
10659       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10660     }
10661   else
10662     {
10663     if (ket != OP_KET || has_alternatives)
10664       stacksize++;
10665 
10666     stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
10667     allocate_stack(common, stacksize);
10668 
10669     if (needs_control_head)
10670       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10671 
10672     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10673     OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10674 
10675     stacksize = needs_control_head ? 1 : 0;
10676     if (ket != OP_KET || has_alternatives)
10677       {
10678       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10679       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10680       stacksize++;
10681       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10682       }
10683     else
10684       {
10685       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10686       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10687       }
10688     init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1);
10689     }
10690   }
10691 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
10692   {
10693   /* Saving the previous values. */
10694   if (common->optimized_cbracket[offset >> 1] != 0)
10695     {
10696     SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
10697     allocate_stack(common, 2);
10698     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10699     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
10700     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10701     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
10702     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
10703     }
10704   else
10705     {
10706     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10707     allocate_stack(common, 1);
10708     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10709     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10710     }
10711   }
10712 else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
10713   {
10714   /* Saving the previous value. */
10715   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10716   allocate_stack(common, 1);
10717   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10718   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10719   }
10720 else if (has_alternatives)
10721   {
10722   /* Pushing the starting string pointer. */
10723   allocate_stack(common, 1);
10724   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10725   }
10726 
10727 /* Generating code for the first alternative. */
10728 if (opcode == OP_COND || opcode == OP_SCOND)
10729   {
10730   if (*matchingpath == OP_CREF)
10731     {
10732     SLJIT_ASSERT(has_alternatives);
10733     add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
10734       CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
10735     matchingpath += 1 + IMM2_SIZE;
10736     }
10737   else if (*matchingpath == OP_DNCREF)
10738     {
10739     SLJIT_ASSERT(has_alternatives);
10740 
10741     i = GET2(matchingpath, 1 + IMM2_SIZE);
10742     slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
10743     OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
10744     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
10745     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
10746     slot += common->name_entry_size;
10747     i--;
10748     while (i-- > 0)
10749       {
10750       OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
10751       OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);
10752       slot += common->name_entry_size;
10753       }
10754     OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
10755     add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
10756     matchingpath += 1 + 2 * IMM2_SIZE;
10757     }
10758   else if ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL)
10759     {
10760     /* Never has other case. */
10761     BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
10762     SLJIT_ASSERT(!has_alternatives);
10763 
10764     if (*matchingpath == OP_TRUE)
10765       {
10766       stacksize = 1;
10767       matchingpath++;
10768       }
10769     else if (*matchingpath == OP_FALSE || *matchingpath == OP_FAIL)
10770       stacksize = 0;
10771     else if (*matchingpath == OP_RREF)
10772       {
10773       stacksize = GET2(matchingpath, 1);
10774       if (common->currententry == NULL)
10775         stacksize = 0;
10776       else if (stacksize == RREF_ANY)
10777         stacksize = 1;
10778       else if (common->currententry->start == 0)
10779         stacksize = stacksize == 0;
10780       else
10781         stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
10782 
10783       if (stacksize != 0)
10784         matchingpath += 1 + IMM2_SIZE;
10785       }
10786     else
10787       {
10788       if (common->currententry == NULL || common->currententry->start == 0)
10789         stacksize = 0;
10790       else
10791         {
10792         stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
10793         slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
10794         i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
10795         while (stacksize > 0)
10796           {
10797           if ((int)GET2(slot, 0) == i)
10798             break;
10799           slot += common->name_entry_size;
10800           stacksize--;
10801           }
10802         }
10803 
10804       if (stacksize != 0)
10805         matchingpath += 1 + 2 * IMM2_SIZE;
10806       }
10807 
10808       /* The stacksize == 0 is a common "else" case. */
10809       if (stacksize == 0)
10810         {
10811         if (*cc == OP_ALT)
10812           {
10813           matchingpath = cc + 1 + LINK_SIZE;
10814           cc += GET(cc, 1);
10815           }
10816         else
10817           matchingpath = cc;
10818         }
10819     }
10820   else
10821     {
10822     SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
10823     /* Similar code as PUSH_BACKTRACK macro. */
10824     assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
10825     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10826       return NULL;
10827     memset(assert, 0, sizeof(assert_backtrack));
10828     assert->common.cc = matchingpath;
10829     BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
10830     matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
10831     }
10832   }
10833 
10834 compile_matchingpath(common, matchingpath, cc, backtrack);
10835 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10836   return NULL;
10837 
10838 if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA)
10839   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10840 
10841 if (opcode == OP_ONCE)
10842   match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
10843 
10844 if (opcode == OP_SCRIPT_RUN)
10845   match_script_run_common(common, private_data_ptr, backtrack);
10846 
10847 stacksize = 0;
10848 if (repeat_type == OP_MINUPTO)
10849   {
10850   /* We need to preserve the counter. TMP2 will be used below. */
10851   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10852   stacksize++;
10853   }
10854 if (ket != OP_KET || bra != OP_BRA)
10855   stacksize++;
10856 if (offset != 0)
10857   {
10858   if (common->capture_last_ptr != 0)
10859     stacksize++;
10860   if (common->optimized_cbracket[offset >> 1] == 0)
10861     stacksize += 2;
10862   }
10863 if (has_alternatives && opcode != OP_ONCE)
10864   stacksize++;
10865 
10866 if (stacksize > 0)
10867   allocate_stack(common, stacksize);
10868 
10869 stacksize = 0;
10870 if (repeat_type == OP_MINUPTO)
10871   {
10872   /* TMP2 was set above. */
10873   OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
10874   stacksize++;
10875   }
10876 
10877 if (ket != OP_KET || bra != OP_BRA)
10878   {
10879   if (ket != OP_KET)
10880     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10881   else
10882     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10883   stacksize++;
10884   }
10885 
10886 if (offset != 0)
10887   stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
10888 
10889 /* Skip and count the other alternatives. */
10890 i = 1;
10891 while (*cc == OP_ALT)
10892   {
10893   cc += GET(cc, 1);
10894   i++;
10895   }
10896 
10897 if (has_alternatives)
10898   {
10899   if (opcode != OP_ONCE)
10900     {
10901     if (i <= 3)
10902       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10903     else
10904       BACKTRACK_AS(bracket_backtrack)->u.matching_put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
10905     }
10906   if (ket != OP_KETRMAX)
10907     BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10908   }
10909 
10910 /* Must be after the matchingpath label. */
10911 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
10912   {
10913   SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
10914   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10915   }
10916 
10917 if (ket == OP_KETRMAX)
10918   {
10919   if (repeat_type != 0)
10920     {
10921     if (has_alternatives)
10922       BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10923     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10924     JUMPTO(SLJIT_NOT_ZERO, rmax_label);
10925     /* Drop STR_PTR for greedy plus quantifier. */
10926     if (opcode != OP_ONCE)
10927       free_stack(common, 1);
10928     }
10929   else if (opcode < OP_BRA || opcode >= OP_SBRA)
10930     {
10931     if (has_alternatives)
10932       BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10933 
10934     /* Checking zero-length iteration. */
10935     if (opcode != OP_ONCE)
10936       {
10937       /* This case includes opcodes such as OP_SCRIPT_RUN. */
10938       CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
10939       /* Drop STR_PTR for greedy plus quantifier. */
10940       if (bra != OP_BRAZERO)
10941         free_stack(common, 1);
10942       }
10943     else
10944       /* TMP2 must contain the starting STR_PTR. */
10945       CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
10946     }
10947   else
10948     JUMPTO(SLJIT_JUMP, rmax_label);
10949   BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10950   }
10951 
10952 if (repeat_type == OP_EXACT)
10953   {
10954   count_match(common);
10955   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10956   JUMPTO(SLJIT_NOT_ZERO, rmax_label);
10957   }
10958 else if (repeat_type == OP_UPTO)
10959   {
10960   /* We need to preserve the counter. */
10961   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10962   allocate_stack(common, 1);
10963   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10964   }
10965 
10966 if (bra == OP_BRAZERO)
10967   BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
10968 
10969 if (bra == OP_BRAMINZERO)
10970   {
10971   /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
10972   JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
10973   if (braminzero != NULL)
10974     {
10975     JUMPHERE(braminzero);
10976     /* We need to release the end pointer to perform the
10977     backtrack for the zero-length iteration. When
10978     framesize is < 0, OP_ONCE will do the release itself. */
10979     if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
10980       {
10981       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10982       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10983       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
10984       }
10985     else if (ket == OP_KETRMIN && opcode != OP_ONCE)
10986       free_stack(common, 1);
10987     }
10988   /* Continue to the normal backtrack. */
10989   }
10990 
10991 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
10992   count_match(common);
10993 
10994 cc += 1 + LINK_SIZE;
10995 
10996 if (opcode == OP_ONCE)
10997   {
10998   /* We temporarily encode the needs_control_head in the lowest bit.
10999      Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns
11000      the same value for small signed numbers (including negative numbers). */
11001   BACKTRACK_AS(bracket_backtrack)->u.framesize = (int)((unsigned)BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
11002   }
11003 return cc + repeat_length;
11004 }
11005 
compile_bracketpos_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11006 static PCRE2_SPTR compile_bracketpos_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11007 {
11008 DEFINE_COMPILER;
11009 backtrack_common *backtrack;
11010 PCRE2_UCHAR opcode;
11011 int private_data_ptr;
11012 int cbraprivptr = 0;
11013 BOOL needs_control_head;
11014 int framesize;
11015 int stacksize;
11016 int offset = 0;
11017 BOOL zero = FALSE;
11018 PCRE2_SPTR ccbegin = NULL;
11019 int stack; /* Also contains the offset of control head. */
11020 struct sljit_label *loop = NULL;
11021 struct jump_list *emptymatch = NULL;
11022 
11023 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
11024 if (*cc == OP_BRAPOSZERO)
11025   {
11026   zero = TRUE;
11027   cc++;
11028   }
11029 
11030 opcode = *cc;
11031 private_data_ptr = PRIVATE_DATA(cc);
11032 SLJIT_ASSERT(private_data_ptr != 0);
11033 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
11034 switch(opcode)
11035   {
11036   case OP_BRAPOS:
11037   case OP_SBRAPOS:
11038   ccbegin = cc + 1 + LINK_SIZE;
11039   break;
11040 
11041   case OP_CBRAPOS:
11042   case OP_SCBRAPOS:
11043   offset = GET2(cc, 1 + LINK_SIZE);
11044   /* This case cannot be optimized in the same was as
11045   normal capturing brackets. */
11046   SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
11047   cbraprivptr = OVECTOR_PRIV(offset);
11048   offset <<= 1;
11049   ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
11050   break;
11051 
11052   default:
11053   SLJIT_UNREACHABLE();
11054   break;
11055   }
11056 
11057 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
11058 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
11059 if (framesize < 0)
11060   {
11061   if (offset != 0)
11062     {
11063     stacksize = 2;
11064     if (common->capture_last_ptr != 0)
11065       stacksize++;
11066     }
11067   else
11068     stacksize = 1;
11069 
11070   if (needs_control_head)
11071     stacksize++;
11072   if (!zero)
11073     stacksize++;
11074 
11075   BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
11076   allocate_stack(common, stacksize);
11077   if (framesize == no_frame)
11078     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
11079 
11080   stack = 0;
11081   if (offset != 0)
11082     {
11083     stack = 2;
11084     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
11085     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
11086     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
11087     if (common->capture_last_ptr != 0)
11088       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
11089     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
11090     if (needs_control_head)
11091       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11092     if (common->capture_last_ptr != 0)
11093       {
11094       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
11095       stack = 3;
11096       }
11097     }
11098   else
11099     {
11100     if (needs_control_head)
11101       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11102     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11103     stack = 1;
11104     }
11105 
11106   if (needs_control_head)
11107     stack++;
11108   if (!zero)
11109     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
11110   if (needs_control_head)
11111     {
11112     stack--;
11113     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
11114     }
11115   }
11116 else
11117   {
11118   stacksize = framesize + 1;
11119   if (!zero)
11120     stacksize++;
11121   if (needs_control_head)
11122     stacksize++;
11123   if (offset == 0)
11124     stacksize++;
11125   BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
11126 
11127   allocate_stack(common, stacksize);
11128   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11129   if (needs_control_head)
11130     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11131   OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11132 
11133   stack = 0;
11134   if (!zero)
11135     {
11136     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
11137     stack = 1;
11138     }
11139   if (needs_control_head)
11140     {
11141     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
11142     stack++;
11143     }
11144   if (offset == 0)
11145     {
11146     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
11147     stack++;
11148     }
11149   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
11150   init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize);
11151   stack -= 1 + (offset == 0);
11152   }
11153 
11154 if (offset != 0)
11155   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11156 
11157 loop = LABEL();
11158 while (*cc != OP_KETRPOS)
11159   {
11160   backtrack->top = NULL;
11161   backtrack->topbacktracks = NULL;
11162   cc += GET(cc, 1);
11163 
11164   compile_matchingpath(common, ccbegin, cc, backtrack);
11165   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11166     return NULL;
11167 
11168   if (framesize < 0)
11169     {
11170     if (framesize == no_frame)
11171       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11172 
11173     if (offset != 0)
11174       {
11175       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11176       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11177       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11178       if (common->capture_last_ptr != 0)
11179         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
11180       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11181       }
11182     else
11183       {
11184       if (opcode == OP_SBRAPOS)
11185         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11186       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11187       }
11188 
11189     /* Even if the match is empty, we need to reset the control head. */
11190     if (needs_control_head)
11191       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
11192 
11193     if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
11194       add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
11195 
11196     if (!zero)
11197       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
11198     }
11199   else
11200     {
11201     if (offset != 0)
11202       {
11203       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11204       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11205       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11206       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11207       if (common->capture_last_ptr != 0)
11208         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
11209       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11210       }
11211     else
11212       {
11213       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11214       OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11215       if (opcode == OP_SBRAPOS)
11216         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
11217       OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0);
11218       }
11219 
11220     /* Even if the match is empty, we need to reset the control head. */
11221     if (needs_control_head)
11222       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
11223 
11224     if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
11225       add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
11226 
11227     if (!zero)
11228       {
11229       if (framesize < 0)
11230         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
11231       else
11232         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
11233       }
11234     }
11235 
11236   JUMPTO(SLJIT_JUMP, loop);
11237   flush_stubs(common);
11238 
11239   compile_backtrackingpath(common, backtrack->top);
11240   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11241     return NULL;
11242   set_jumps(backtrack->topbacktracks, LABEL());
11243 
11244   if (framesize < 0)
11245     {
11246     if (offset != 0)
11247       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11248     else
11249       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11250     }
11251   else
11252     {
11253     if (offset != 0)
11254       {
11255       /* Last alternative. */
11256       if (*cc == OP_KETRPOS)
11257         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11258       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11259       }
11260     else
11261       {
11262       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11263       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
11264       }
11265     }
11266 
11267   if (*cc == OP_KETRPOS)
11268     break;
11269   ccbegin = cc + 1 + LINK_SIZE;
11270   }
11271 
11272 /* We don't have to restore the control head in case of a failed match. */
11273 
11274 backtrack->topbacktracks = NULL;
11275 if (!zero)
11276   {
11277   if (framesize < 0)
11278     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
11279   else /* TMP2 is set to [private_data_ptr] above. */
11280     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));
11281   }
11282 
11283 /* None of them matched. */
11284 set_jumps(emptymatch, LABEL());
11285 count_match(common);
11286 return cc + 1 + LINK_SIZE;
11287 }
11288 
get_iterator_parameters(compiler_common * common,PCRE2_SPTR cc,PCRE2_UCHAR * opcode,PCRE2_UCHAR * type,sljit_u32 * max,sljit_u32 * exact,PCRE2_SPTR * end)11289 static SLJIT_INLINE PCRE2_SPTR get_iterator_parameters(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *opcode, PCRE2_UCHAR *type, sljit_u32 *max, sljit_u32 *exact, PCRE2_SPTR *end)
11290 {
11291 int class_len;
11292 
11293 *opcode = *cc;
11294 *exact = 0;
11295 
11296 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
11297   {
11298   cc++;
11299   *type = OP_CHAR;
11300   }
11301 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
11302   {
11303   cc++;
11304   *type = OP_CHARI;
11305   *opcode -= OP_STARI - OP_STAR;
11306   }
11307 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
11308   {
11309   cc++;
11310   *type = OP_NOT;
11311   *opcode -= OP_NOTSTAR - OP_STAR;
11312   }
11313 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
11314   {
11315   cc++;
11316   *type = OP_NOTI;
11317   *opcode -= OP_NOTSTARI - OP_STAR;
11318   }
11319 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
11320   {
11321   cc++;
11322   *opcode -= OP_TYPESTAR - OP_STAR;
11323   *type = OP_END;
11324   }
11325 else
11326   {
11327   SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
11328   *type = *opcode;
11329   cc++;
11330   class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(PCRE2_UCHAR))) : GET(cc, 0);
11331   *opcode = cc[class_len - 1];
11332 
11333   if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
11334     {
11335     *opcode -= OP_CRSTAR - OP_STAR;
11336     *end = cc + class_len;
11337 
11338     if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
11339       {
11340       *exact = 1;
11341       *opcode -= OP_PLUS - OP_STAR;
11342       }
11343     }
11344   else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
11345     {
11346     *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
11347     *end = cc + class_len;
11348 
11349     if (*opcode == OP_POSPLUS)
11350       {
11351       *exact = 1;
11352       *opcode = OP_POSSTAR;
11353       }
11354     }
11355   else
11356     {
11357     SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
11358     *max = GET2(cc, (class_len + IMM2_SIZE));
11359     *exact = GET2(cc, class_len);
11360 
11361     if (*max == 0)
11362       {
11363       if (*opcode == OP_CRPOSRANGE)
11364         *opcode = OP_POSSTAR;
11365       else
11366         *opcode -= OP_CRRANGE - OP_STAR;
11367       }
11368     else
11369       {
11370       *max -= *exact;
11371       if (*max == 0)
11372         *opcode = OP_EXACT;
11373       else if (*max == 1)
11374         {
11375         if (*opcode == OP_CRPOSRANGE)
11376           *opcode = OP_POSQUERY;
11377         else
11378           *opcode -= OP_CRRANGE - OP_QUERY;
11379         }
11380       else
11381         {
11382         if (*opcode == OP_CRPOSRANGE)
11383           *opcode = OP_POSUPTO;
11384         else
11385           *opcode -= OP_CRRANGE - OP_UPTO;
11386         }
11387       }
11388     *end = cc + class_len + 2 * IMM2_SIZE;
11389     }
11390   return cc;
11391   }
11392 
11393 switch(*opcode)
11394   {
11395   case OP_EXACT:
11396   *exact = GET2(cc, 0);
11397   cc += IMM2_SIZE;
11398   break;
11399 
11400   case OP_PLUS:
11401   case OP_MINPLUS:
11402   *exact = 1;
11403   *opcode -= OP_PLUS - OP_STAR;
11404   break;
11405 
11406   case OP_POSPLUS:
11407   *exact = 1;
11408   *opcode = OP_POSSTAR;
11409   break;
11410 
11411   case OP_UPTO:
11412   case OP_MINUPTO:
11413   case OP_POSUPTO:
11414   *max = GET2(cc, 0);
11415   cc += IMM2_SIZE;
11416   break;
11417   }
11418 
11419 if (*type == OP_END)
11420   {
11421   *type = *cc;
11422   *end = next_opcode(common, cc);
11423   cc++;
11424   return cc;
11425   }
11426 
11427 *end = cc + 1;
11428 #ifdef SUPPORT_UNICODE
11429 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
11430 #endif
11431 return cc;
11432 }
11433 
compile_iterator_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11434 static PCRE2_SPTR compile_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11435 {
11436 DEFINE_COMPILER;
11437 backtrack_common *backtrack;
11438 PCRE2_UCHAR opcode;
11439 PCRE2_UCHAR type;
11440 sljit_u32 max = 0, exact;
11441 sljit_s32 early_fail_ptr = PRIVATE_DATA(cc + 1);
11442 sljit_s32 early_fail_type;
11443 BOOL charpos_enabled;
11444 PCRE2_UCHAR charpos_char;
11445 unsigned int charpos_othercasebit;
11446 PCRE2_SPTR end;
11447 jump_list *no_match = NULL;
11448 jump_list *no_char1_match = NULL;
11449 struct sljit_jump *jump = NULL;
11450 struct sljit_label *label;
11451 int private_data_ptr = PRIVATE_DATA(cc);
11452 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
11453 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
11454 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
11455 int tmp_base, tmp_offset;
11456 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11457 BOOL use_tmp;
11458 #endif
11459 
11460 PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL);
11461 
11462 early_fail_type = (early_fail_ptr & 0x7);
11463 early_fail_ptr >>= 3;
11464 
11465 /* During recursion, these optimizations are disabled. */
11466 if (common->early_fail_start_ptr == 0 && common->fast_forward_bc_ptr == NULL)
11467   {
11468   early_fail_ptr = 0;
11469   early_fail_type = type_skip;
11470   }
11471 
11472 SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || early_fail_ptr == 0
11473   || (early_fail_ptr >= common->early_fail_start_ptr && early_fail_ptr <= common->early_fail_end_ptr));
11474 
11475 if (early_fail_type == type_fail)
11476   add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr));
11477 
11478 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
11479 
11480 if (type != OP_EXTUNI)
11481   {
11482   tmp_base = TMP3;
11483   tmp_offset = 0;
11484   }
11485 else
11486   {
11487   tmp_base = SLJIT_MEM1(SLJIT_SP);
11488   tmp_offset = POSSESSIVE0;
11489   }
11490 
11491 /* Handle fixed part first. */
11492 if (exact > 1)
11493   {
11494   SLJIT_ASSERT(early_fail_ptr == 0);
11495 
11496   if (common->mode == PCRE2_JIT_COMPLETE
11497 #ifdef SUPPORT_UNICODE
11498       && !common->utf
11499 #endif
11500       && type != OP_ANYNL && type != OP_EXTUNI)
11501     {
11502     OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
11503     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
11504     OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11505     label = LABEL();
11506     compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
11507     OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11508     JUMPTO(SLJIT_NOT_ZERO, label);
11509     }
11510   else
11511     {
11512     OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11513     label = LABEL();
11514     compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
11515     OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11516     JUMPTO(SLJIT_NOT_ZERO, label);
11517     }
11518   }
11519 else if (exact == 1)
11520   compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
11521 
11522 if (early_fail_type == type_fail_range)
11523   {
11524   /* Range end first, followed by range start. */
11525   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr);
11526   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw));
11527   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0);
11528   OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0);
11529   add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0));
11530 
11531   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11532   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw), STR_PTR, 0);
11533   }
11534 
11535 switch(opcode)
11536   {
11537   case OP_STAR:
11538   case OP_UPTO:
11539   SLJIT_ASSERT(early_fail_ptr == 0 || opcode == OP_STAR);
11540 
11541   if (type == OP_ANYNL || type == OP_EXTUNI)
11542     {
11543     SLJIT_ASSERT(private_data_ptr == 0);
11544     SLJIT_ASSERT(early_fail_ptr == 0);
11545 
11546     allocate_stack(common, 2);
11547     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11548     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
11549 
11550     if (opcode == OP_UPTO)
11551       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max);
11552 
11553     label = LABEL();
11554     compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
11555     if (opcode == OP_UPTO)
11556       {
11557       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
11558       OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
11559       jump = JUMP(SLJIT_ZERO);
11560       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
11561       }
11562 
11563     /* We cannot use TMP3 because of allocate_stack. */
11564     allocate_stack(common, 1);
11565     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11566     JUMPTO(SLJIT_JUMP, label);
11567     if (jump != NULL)
11568       JUMPHERE(jump);
11569     BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11570     break;
11571     }
11572 #ifdef SUPPORT_UNICODE
11573   else if (type == OP_ALLANY && !common->invalid_utf)
11574 #else
11575   else if (type == OP_ALLANY)
11576 #endif
11577     {
11578     if (opcode == OP_STAR)
11579       {
11580       if (private_data_ptr == 0)
11581         allocate_stack(common, 2);
11582 
11583       OP1(SLJIT_MOV, base, offset0, STR_END, 0);
11584       OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11585 
11586       OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
11587       process_partial_match(common);
11588 
11589       if (early_fail_ptr != 0)
11590         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
11591       BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11592       break;
11593       }
11594 #ifdef SUPPORT_UNICODE
11595     else if (!common->utf)
11596 #else
11597     else
11598 #endif
11599       {
11600       if (private_data_ptr == 0)
11601         allocate_stack(common, 2);
11602 
11603       OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11604       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
11605 
11606       if (common->mode == PCRE2_JIT_COMPLETE)
11607         {
11608         OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
11609         CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
11610         }
11611       else
11612         {
11613         jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
11614         process_partial_match(common);
11615         JUMPHERE(jump);
11616         }
11617 
11618       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11619 
11620       if (early_fail_ptr != 0)
11621         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11622       BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11623       break;
11624       }
11625     }
11626 
11627   charpos_enabled = FALSE;
11628   charpos_char = 0;
11629   charpos_othercasebit = 0;
11630 
11631   if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
11632     {
11633 #ifdef SUPPORT_UNICODE
11634     charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
11635 #else
11636     charpos_enabled = TRUE;
11637 #endif
11638     if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
11639       {
11640       charpos_othercasebit = char_get_othercase_bit(common, end + 1);
11641       if (charpos_othercasebit == 0)
11642         charpos_enabled = FALSE;
11643       }
11644 
11645     if (charpos_enabled)
11646       {
11647       charpos_char = end[1];
11648       /* Consume the OP_CHAR opcode. */
11649       end += 2;
11650 #if PCRE2_CODE_UNIT_WIDTH == 8
11651       SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
11652 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
11653       SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
11654       if ((charpos_othercasebit & 0x100) != 0)
11655         charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
11656 #endif
11657       if (charpos_othercasebit != 0)
11658         charpos_char |= charpos_othercasebit;
11659 
11660       BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE;
11661       BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char;
11662       BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit;
11663       }
11664     }
11665 
11666   if (charpos_enabled)
11667     {
11668     if (opcode == OP_UPTO)
11669       OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1);
11670 
11671     /* Search the first instance of charpos_char. */
11672     jump = JUMP(SLJIT_JUMP);
11673     label = LABEL();
11674     if (opcode == OP_UPTO)
11675       {
11676       OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11677       add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO));
11678       }
11679     compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
11680     if (early_fail_ptr != 0)
11681       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11682     JUMPHERE(jump);
11683 
11684     detect_partial_match(common, &backtrack->topbacktracks);
11685     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
11686     if (charpos_othercasebit != 0)
11687       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
11688     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
11689 
11690     if (private_data_ptr == 0)
11691       allocate_stack(common, 2);
11692     OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11693     OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11694 
11695     if (opcode == OP_UPTO)
11696       {
11697       OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11698       add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11699       }
11700 
11701     /* Search the last instance of charpos_char. */
11702     label = LABEL();
11703     compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11704     if (early_fail_ptr != 0)
11705       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11706     detect_partial_match(common, &no_match);
11707     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
11708     if (charpos_othercasebit != 0)
11709       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
11710 
11711     if (opcode == OP_STAR)
11712       {
11713       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
11714       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11715       JUMPTO(SLJIT_JUMP, label);
11716       }
11717     else
11718       {
11719       jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char);
11720       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11721       JUMPHERE(jump);
11722       OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11723       JUMPTO(SLJIT_NOT_ZERO, label);
11724       }
11725 
11726     set_jumps(no_match, LABEL());
11727     OP2(SLJIT_ADD, STR_PTR, 0, base, offset0, SLJIT_IMM, IN_UCHARS(1));
11728     OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11729     }
11730   else
11731     {
11732     if (private_data_ptr == 0)
11733       allocate_stack(common, 2);
11734 
11735     OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11736 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11737     use_tmp = (!HAS_VIRTUAL_REGISTERS && opcode == OP_STAR);
11738     SLJIT_ASSERT(!use_tmp || tmp_base == TMP3);
11739 
11740     if (common->utf)
11741       OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
11742 #endif
11743     if (opcode == OP_UPTO)
11744       OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11745 
11746     detect_partial_match(common, &no_match);
11747     label = LABEL();
11748     compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11749 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11750     if (common->utf)
11751       OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
11752 #endif
11753 
11754     if (opcode == OP_UPTO)
11755       {
11756       OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11757       add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11758       }
11759 
11760     detect_partial_match_to(common, label);
11761     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11762 
11763     set_jumps(no_char1_match, LABEL());
11764 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11765     if (common->utf)
11766       {
11767       set_jumps(no_match, LABEL());
11768       if (use_tmp)
11769         {
11770         OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
11771         OP1(SLJIT_MOV, base, offset0, TMP3, 0);
11772         }
11773       else
11774         OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
11775       }
11776     else
11777 #endif
11778       {
11779       OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11780       set_jumps(no_match, LABEL());
11781       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11782       }
11783 
11784     if (early_fail_ptr != 0)
11785       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11786     }
11787 
11788   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11789   break;
11790 
11791   case OP_MINSTAR:
11792   if (private_data_ptr == 0)
11793     allocate_stack(common, 1);
11794   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11795   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11796   if (early_fail_ptr != 0)
11797     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11798   break;
11799 
11800   case OP_MINUPTO:
11801   SLJIT_ASSERT(early_fail_ptr == 0);
11802   if (private_data_ptr == 0)
11803     allocate_stack(common, 2);
11804   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11805   OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
11806   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11807   break;
11808 
11809   case OP_QUERY:
11810   case OP_MINQUERY:
11811   SLJIT_ASSERT(early_fail_ptr == 0);
11812   if (private_data_ptr == 0)
11813     allocate_stack(common, 1);
11814   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11815   if (opcode == OP_QUERY)
11816     compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
11817   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11818   break;
11819 
11820   case OP_EXACT:
11821   break;
11822 
11823   case OP_POSSTAR:
11824 #if defined SUPPORT_UNICODE
11825   if (type == OP_ALLANY && !common->invalid_utf)
11826 #else
11827   if (type == OP_ALLANY)
11828 #endif
11829     {
11830     OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
11831     process_partial_match(common);
11832     if (early_fail_ptr != 0)
11833       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
11834     break;
11835     }
11836 
11837 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11838   if (common->utf)
11839     {
11840     OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11841     detect_partial_match(common, &no_match);
11842     label = LABEL();
11843     compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11844     OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11845     detect_partial_match_to(common, label);
11846 
11847     set_jumps(no_match, LABEL());
11848     OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
11849     if (early_fail_ptr != 0)
11850       {
11851       if (!HAS_VIRTUAL_REGISTERS && tmp_base == TMP3)
11852         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, TMP3, 0);
11853       else
11854         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11855       }
11856     break;
11857     }
11858 #endif
11859 
11860   detect_partial_match(common, &no_match);
11861   label = LABEL();
11862   compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11863   detect_partial_match_to(common, label);
11864   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11865 
11866   set_jumps(no_char1_match, LABEL());
11867   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11868   set_jumps(no_match, LABEL());
11869   if (early_fail_ptr != 0)
11870     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11871   break;
11872 
11873   case OP_POSUPTO:
11874   SLJIT_ASSERT(early_fail_ptr == 0);
11875 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11876   if (common->utf)
11877     {
11878     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
11879     OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11880 
11881     detect_partial_match(common, &no_match);
11882     label = LABEL();
11883     compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11884     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
11885     OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11886     add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11887     detect_partial_match_to(common, label);
11888 
11889     set_jumps(no_match, LABEL());
11890     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
11891     break;
11892     }
11893 #endif
11894 
11895   if (type == OP_ALLANY)
11896     {
11897     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
11898 
11899     if (common->mode == PCRE2_JIT_COMPLETE)
11900       {
11901       OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
11902       CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
11903       }
11904     else
11905       {
11906       jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
11907       process_partial_match(common);
11908       JUMPHERE(jump);
11909       }
11910     break;
11911     }
11912 
11913   OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11914 
11915   detect_partial_match(common, &no_match);
11916   label = LABEL();
11917   compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11918   OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11919   add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11920   detect_partial_match_to(common, label);
11921   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11922 
11923   set_jumps(no_char1_match, LABEL());
11924   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11925   set_jumps(no_match, LABEL());
11926   break;
11927 
11928   case OP_POSQUERY:
11929   SLJIT_ASSERT(early_fail_ptr == 0);
11930   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11931   compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
11932   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11933   set_jumps(no_match, LABEL());
11934   OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
11935   break;
11936 
11937   default:
11938   SLJIT_UNREACHABLE();
11939   break;
11940   }
11941 
11942 count_match(common);
11943 return end;
11944 }
11945 
compile_fail_accept_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11946 static SLJIT_INLINE PCRE2_SPTR compile_fail_accept_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11947 {
11948 DEFINE_COMPILER;
11949 backtrack_common *backtrack;
11950 
11951 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
11952 
11953 if (*cc == OP_FAIL)
11954   {
11955   add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
11956   return cc + 1;
11957   }
11958 
11959 if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0)
11960   add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
11961 
11962 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
11963   {
11964   /* No need to check notempty conditions. */
11965   if (common->accept_label == NULL)
11966     add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
11967   else
11968     JUMPTO(SLJIT_JUMP, common->accept_label);
11969   return cc + 1;
11970   }
11971 
11972 if (common->accept_label == NULL)
11973   add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
11974 else
11975   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
11976 
11977 if (HAS_VIRTUAL_REGISTERS)
11978   {
11979   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11980   OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
11981   }
11982 else
11983   OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options));
11984 
11985 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
11986 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_NOT_ZERO));
11987 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
11988 if (common->accept_label == NULL)
11989   add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO));
11990 else
11991   JUMPTO(SLJIT_ZERO, common->accept_label);
11992 
11993 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
11994 if (common->accept_label == NULL)
11995   add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
11996 else
11997   CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
11998 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
11999 return cc + 1;
12000 }
12001 
compile_close_matchingpath(compiler_common * common,PCRE2_SPTR cc)12002 static SLJIT_INLINE PCRE2_SPTR compile_close_matchingpath(compiler_common *common, PCRE2_SPTR cc)
12003 {
12004 DEFINE_COMPILER;
12005 int offset = GET2(cc, 1);
12006 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
12007 
12008 /* Data will be discarded anyway... */
12009 if (common->currententry != NULL)
12010   return cc + 1 + IMM2_SIZE;
12011 
12012 if (!optimized_cbracket)
12013   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
12014 offset <<= 1;
12015 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
12016 if (!optimized_cbracket)
12017   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12018 return cc + 1 + IMM2_SIZE;
12019 }
12020 
compile_control_verb_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)12021 static SLJIT_INLINE PCRE2_SPTR compile_control_verb_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
12022 {
12023 DEFINE_COMPILER;
12024 backtrack_common *backtrack;
12025 PCRE2_UCHAR opcode = *cc;
12026 PCRE2_SPTR ccend = cc + 1;
12027 
12028 if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG ||
12029     opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
12030   ccend += 2 + cc[1];
12031 
12032 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
12033 
12034 if (opcode == OP_SKIP)
12035   {
12036   allocate_stack(common, 1);
12037   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
12038   return ccend;
12039   }
12040 
12041 if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
12042   {
12043   if (HAS_VIRTUAL_REGISTERS)
12044     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12045   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
12046   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
12047   OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
12048   }
12049 
12050 return ccend;
12051 }
12052 
12053 static PCRE2_UCHAR then_trap_opcode[1] = { OP_THEN_TRAP };
12054 
compile_then_trap_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,backtrack_common * parent)12055 static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
12056 {
12057 DEFINE_COMPILER;
12058 backtrack_common *backtrack;
12059 BOOL needs_control_head;
12060 int size;
12061 
12062 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
12063 common->then_trap = BACKTRACK_AS(then_trap_backtrack);
12064 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
12065 BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
12066 BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
12067 
12068 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
12069 size = 3 + (size < 0 ? 0 : size);
12070 
12071 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12072 allocate_stack(common, size);
12073 if (size > 3)
12074   OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
12075 else
12076   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
12077 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
12078 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
12079 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
12080 
12081 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
12082 if (size >= 0)
12083   init_frame(common, cc, ccend, size - 1, 0);
12084 }
12085 
compile_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,backtrack_common * parent)12086 static void compile_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
12087 {
12088 DEFINE_COMPILER;
12089 backtrack_common *backtrack;
12090 BOOL has_then_trap = FALSE;
12091 then_trap_backtrack *save_then_trap = NULL;
12092 
12093 SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
12094 
12095 if (common->has_then && common->then_offsets[cc - common->start] != 0)
12096   {
12097   SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
12098   has_then_trap = TRUE;
12099   save_then_trap = common->then_trap;
12100   /* Tail item on backtrack. */
12101   compile_then_trap_matchingpath(common, cc, ccend, parent);
12102   }
12103 
12104 while (cc < ccend)
12105   {
12106   switch(*cc)
12107     {
12108     case OP_SOD:
12109     case OP_SOM:
12110     case OP_NOT_WORD_BOUNDARY:
12111     case OP_WORD_BOUNDARY:
12112     case OP_EODN:
12113     case OP_EOD:
12114     case OP_DOLL:
12115     case OP_DOLLM:
12116     case OP_CIRC:
12117     case OP_CIRCM:
12118     case OP_REVERSE:
12119     cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
12120     break;
12121 
12122     case OP_NOT_DIGIT:
12123     case OP_DIGIT:
12124     case OP_NOT_WHITESPACE:
12125     case OP_WHITESPACE:
12126     case OP_NOT_WORDCHAR:
12127     case OP_WORDCHAR:
12128     case OP_ANY:
12129     case OP_ALLANY:
12130     case OP_ANYBYTE:
12131     case OP_NOTPROP:
12132     case OP_PROP:
12133     case OP_ANYNL:
12134     case OP_NOT_HSPACE:
12135     case OP_HSPACE:
12136     case OP_NOT_VSPACE:
12137     case OP_VSPACE:
12138     case OP_EXTUNI:
12139     case OP_NOT:
12140     case OP_NOTI:
12141     cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
12142     break;
12143 
12144     case OP_SET_SOM:
12145     PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
12146     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
12147     allocate_stack(common, 1);
12148     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
12149     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
12150     cc++;
12151     break;
12152 
12153     case OP_CHAR:
12154     case OP_CHARI:
12155     if (common->mode == PCRE2_JIT_COMPLETE)
12156       cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
12157     else
12158       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
12159     break;
12160 
12161     case OP_STAR:
12162     case OP_MINSTAR:
12163     case OP_PLUS:
12164     case OP_MINPLUS:
12165     case OP_QUERY:
12166     case OP_MINQUERY:
12167     case OP_UPTO:
12168     case OP_MINUPTO:
12169     case OP_EXACT:
12170     case OP_POSSTAR:
12171     case OP_POSPLUS:
12172     case OP_POSQUERY:
12173     case OP_POSUPTO:
12174     case OP_STARI:
12175     case OP_MINSTARI:
12176     case OP_PLUSI:
12177     case OP_MINPLUSI:
12178     case OP_QUERYI:
12179     case OP_MINQUERYI:
12180     case OP_UPTOI:
12181     case OP_MINUPTOI:
12182     case OP_EXACTI:
12183     case OP_POSSTARI:
12184     case OP_POSPLUSI:
12185     case OP_POSQUERYI:
12186     case OP_POSUPTOI:
12187     case OP_NOTSTAR:
12188     case OP_NOTMINSTAR:
12189     case OP_NOTPLUS:
12190     case OP_NOTMINPLUS:
12191     case OP_NOTQUERY:
12192     case OP_NOTMINQUERY:
12193     case OP_NOTUPTO:
12194     case OP_NOTMINUPTO:
12195     case OP_NOTEXACT:
12196     case OP_NOTPOSSTAR:
12197     case OP_NOTPOSPLUS:
12198     case OP_NOTPOSQUERY:
12199     case OP_NOTPOSUPTO:
12200     case OP_NOTSTARI:
12201     case OP_NOTMINSTARI:
12202     case OP_NOTPLUSI:
12203     case OP_NOTMINPLUSI:
12204     case OP_NOTQUERYI:
12205     case OP_NOTMINQUERYI:
12206     case OP_NOTUPTOI:
12207     case OP_NOTMINUPTOI:
12208     case OP_NOTEXACTI:
12209     case OP_NOTPOSSTARI:
12210     case OP_NOTPOSPLUSI:
12211     case OP_NOTPOSQUERYI:
12212     case OP_NOTPOSUPTOI:
12213     case OP_TYPESTAR:
12214     case OP_TYPEMINSTAR:
12215     case OP_TYPEPLUS:
12216     case OP_TYPEMINPLUS:
12217     case OP_TYPEQUERY:
12218     case OP_TYPEMINQUERY:
12219     case OP_TYPEUPTO:
12220     case OP_TYPEMINUPTO:
12221     case OP_TYPEEXACT:
12222     case OP_TYPEPOSSTAR:
12223     case OP_TYPEPOSPLUS:
12224     case OP_TYPEPOSQUERY:
12225     case OP_TYPEPOSUPTO:
12226     cc = compile_iterator_matchingpath(common, cc, parent);
12227     break;
12228 
12229     case OP_CLASS:
12230     case OP_NCLASS:
12231     if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE)
12232       cc = compile_iterator_matchingpath(common, cc, parent);
12233     else
12234       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
12235     break;
12236 
12237 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
12238     case OP_XCLASS:
12239     if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
12240       cc = compile_iterator_matchingpath(common, cc, parent);
12241     else
12242       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
12243     break;
12244 #endif
12245 
12246     case OP_REF:
12247     case OP_REFI:
12248     if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
12249       cc = compile_ref_iterator_matchingpath(common, cc, parent);
12250     else
12251       {
12252       compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
12253       cc += 1 + IMM2_SIZE;
12254       }
12255     break;
12256 
12257     case OP_DNREF:
12258     case OP_DNREFI:
12259     if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
12260       cc = compile_ref_iterator_matchingpath(common, cc, parent);
12261     else
12262       {
12263       compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
12264       compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
12265       cc += 1 + 2 * IMM2_SIZE;
12266       }
12267     break;
12268 
12269     case OP_RECURSE:
12270     cc = compile_recurse_matchingpath(common, cc, parent);
12271     break;
12272 
12273     case OP_CALLOUT:
12274     case OP_CALLOUT_STR:
12275     cc = compile_callout_matchingpath(common, cc, parent);
12276     break;
12277 
12278     case OP_ASSERT:
12279     case OP_ASSERT_NOT:
12280     case OP_ASSERTBACK:
12281     case OP_ASSERTBACK_NOT:
12282     PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12283     cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12284     break;
12285 
12286     case OP_BRAMINZERO:
12287     PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
12288     cc = bracketend(cc + 1);
12289     if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
12290       {
12291       allocate_stack(common, 1);
12292       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
12293       }
12294     else
12295       {
12296       allocate_stack(common, 2);
12297       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12298       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
12299       }
12300     BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
12301     count_match(common);
12302     break;
12303 
12304     case OP_ASSERT_NA:
12305     case OP_ASSERTBACK_NA:
12306     case OP_ONCE:
12307     case OP_SCRIPT_RUN:
12308     case OP_BRA:
12309     case OP_CBRA:
12310     case OP_COND:
12311     case OP_SBRA:
12312     case OP_SCBRA:
12313     case OP_SCOND:
12314     cc = compile_bracket_matchingpath(common, cc, parent);
12315     break;
12316 
12317     case OP_BRAZERO:
12318     if (cc[1] > OP_ASSERTBACK_NOT)
12319       cc = compile_bracket_matchingpath(common, cc, parent);
12320     else
12321       {
12322       PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12323       cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12324       }
12325     break;
12326 
12327     case OP_BRAPOS:
12328     case OP_CBRAPOS:
12329     case OP_SBRAPOS:
12330     case OP_SCBRAPOS:
12331     case OP_BRAPOSZERO:
12332     cc = compile_bracketpos_matchingpath(common, cc, parent);
12333     break;
12334 
12335     case OP_MARK:
12336     PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
12337     SLJIT_ASSERT(common->mark_ptr != 0);
12338     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
12339     allocate_stack(common, common->has_skip_arg ? 5 : 1);
12340     if (HAS_VIRTUAL_REGISTERS)
12341       OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12342     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
12343     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
12344     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
12345     OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
12346     if (common->has_skip_arg)
12347       {
12348       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12349       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
12350       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
12351       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
12352       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
12353       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
12354       }
12355     cc += 1 + 2 + cc[1];
12356     break;
12357 
12358     case OP_PRUNE:
12359     case OP_PRUNE_ARG:
12360     case OP_SKIP:
12361     case OP_SKIP_ARG:
12362     case OP_THEN:
12363     case OP_THEN_ARG:
12364     case OP_COMMIT:
12365     case OP_COMMIT_ARG:
12366     cc = compile_control_verb_matchingpath(common, cc, parent);
12367     break;
12368 
12369     case OP_FAIL:
12370     case OP_ACCEPT:
12371     case OP_ASSERT_ACCEPT:
12372     cc = compile_fail_accept_matchingpath(common, cc, parent);
12373     break;
12374 
12375     case OP_CLOSE:
12376     cc = compile_close_matchingpath(common, cc);
12377     break;
12378 
12379     case OP_SKIPZERO:
12380     cc = bracketend(cc + 1);
12381     break;
12382 
12383     default:
12384     SLJIT_UNREACHABLE();
12385     return;
12386     }
12387   if (cc == NULL)
12388     return;
12389   }
12390 
12391 if (has_then_trap)
12392   {
12393   /* Head item on backtrack. */
12394   PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
12395   BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
12396   BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
12397   common->then_trap = save_then_trap;
12398   }
12399 SLJIT_ASSERT(cc == ccend);
12400 }
12401 
12402 #undef PUSH_BACKTRACK
12403 #undef PUSH_BACKTRACK_NOVALUE
12404 #undef BACKTRACK_AS
12405 
12406 #define COMPILE_BACKTRACKINGPATH(current) \
12407   do \
12408     { \
12409     compile_backtrackingpath(common, (current)); \
12410     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
12411       return; \
12412     } \
12413   while (0)
12414 
12415 #define CURRENT_AS(type) ((type *)current)
12416 
compile_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)12417 static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12418 {
12419 DEFINE_COMPILER;
12420 PCRE2_SPTR cc = current->cc;
12421 PCRE2_UCHAR opcode;
12422 PCRE2_UCHAR type;
12423 sljit_u32 max = 0, exact;
12424 struct sljit_label *label = NULL;
12425 struct sljit_jump *jump = NULL;
12426 jump_list *jumplist = NULL;
12427 PCRE2_SPTR end;
12428 int private_data_ptr = PRIVATE_DATA(cc);
12429 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
12430 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
12431 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
12432 
12433 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
12434 
12435 switch(opcode)
12436   {
12437   case OP_STAR:
12438   case OP_UPTO:
12439   if (type == OP_ANYNL || type == OP_EXTUNI)
12440     {
12441     SLJIT_ASSERT(private_data_ptr == 0);
12442     set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12443     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12444     free_stack(common, 1);
12445     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12446     }
12447   else
12448     {
12449     if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled)
12450       {
12451       OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12452       OP1(SLJIT_MOV, TMP2, 0, base, offset1);
12453       OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12454 
12455       jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
12456       label = LABEL();
12457       OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
12458       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12459       if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0)
12460         OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit);
12461       CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12462       move_back(common, NULL, TRUE);
12463       CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label);
12464       }
12465     else
12466       {
12467       OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12468       jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
12469       move_back(common, NULL, TRUE);
12470       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12471       JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12472       }
12473     JUMPHERE(jump);
12474     if (private_data_ptr == 0)
12475       free_stack(common, 2);
12476     }
12477   break;
12478 
12479   case OP_MINSTAR:
12480   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12481   compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12482   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12483   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12484   set_jumps(jumplist, LABEL());
12485   if (private_data_ptr == 0)
12486     free_stack(common, 1);
12487   break;
12488 
12489   case OP_MINUPTO:
12490   OP1(SLJIT_MOV, TMP1, 0, base, offset1);
12491   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12492   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
12493   add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
12494 
12495   OP1(SLJIT_MOV, base, offset1, TMP1, 0);
12496   compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12497   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12498   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12499 
12500   set_jumps(jumplist, LABEL());
12501   if (private_data_ptr == 0)
12502     free_stack(common, 2);
12503   break;
12504 
12505   case OP_QUERY:
12506   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12507   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12508   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12509   jump = JUMP(SLJIT_JUMP);
12510   set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12511   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12512   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12513   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12514   JUMPHERE(jump);
12515   if (private_data_ptr == 0)
12516     free_stack(common, 1);
12517   break;
12518 
12519   case OP_MINQUERY:
12520   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12521   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12522   jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12523   compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12524   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12525   set_jumps(jumplist, LABEL());
12526   JUMPHERE(jump);
12527   if (private_data_ptr == 0)
12528     free_stack(common, 1);
12529   break;
12530 
12531   case OP_EXACT:
12532   case OP_POSSTAR:
12533   case OP_POSQUERY:
12534   case OP_POSUPTO:
12535   break;
12536 
12537   default:
12538   SLJIT_UNREACHABLE();
12539   break;
12540   }
12541 
12542 set_jumps(current->topbacktracks, LABEL());
12543 }
12544 
compile_ref_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)12545 static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12546 {
12547 DEFINE_COMPILER;
12548 PCRE2_SPTR cc = current->cc;
12549 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
12550 PCRE2_UCHAR type;
12551 
12552 type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
12553 
12554 if ((type & 0x1) == 0)
12555   {
12556   /* Maximize case. */
12557   set_jumps(current->topbacktracks, LABEL());
12558   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12559   free_stack(common, 1);
12560   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12561   return;
12562   }
12563 
12564 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12565 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12566 set_jumps(current->topbacktracks, LABEL());
12567 free_stack(common, ref ? 2 : 3);
12568 }
12569 
compile_recurse_backtrackingpath(compiler_common * common,struct backtrack_common * current)12570 static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12571 {
12572 DEFINE_COMPILER;
12573 recurse_entry *entry;
12574 
12575 if (!CURRENT_AS(recurse_backtrack)->inlined_pattern)
12576   {
12577   entry = CURRENT_AS(recurse_backtrack)->entry;
12578   if (entry->backtrack_label == NULL)
12579     add_jump(compiler, &entry->backtrack_calls, JUMP(SLJIT_FAST_CALL));
12580   else
12581     JUMPTO(SLJIT_FAST_CALL, entry->backtrack_label);
12582   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(recurse_backtrack)->matchingpath);
12583   }
12584 else
12585   compile_backtrackingpath(common, current->top);
12586 
12587 set_jumps(current->topbacktracks, LABEL());
12588 }
12589 
compile_assert_backtrackingpath(compiler_common * common,struct backtrack_common * current)12590 static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12591 {
12592 DEFINE_COMPILER;
12593 PCRE2_SPTR cc = current->cc;
12594 PCRE2_UCHAR bra = OP_BRA;
12595 struct sljit_jump *brajump = NULL;
12596 
12597 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
12598 if (*cc == OP_BRAZERO)
12599   {
12600   bra = *cc;
12601   cc++;
12602   }
12603 
12604 if (bra == OP_BRAZERO)
12605   {
12606   SLJIT_ASSERT(current->topbacktracks == NULL);
12607   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12608   }
12609 
12610 if (CURRENT_AS(assert_backtrack)->framesize < 0)
12611   {
12612   set_jumps(current->topbacktracks, LABEL());
12613 
12614   if (bra == OP_BRAZERO)
12615     {
12616     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12617     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12618     free_stack(common, 1);
12619     }
12620   return;
12621   }
12622 
12623 if (bra == OP_BRAZERO)
12624   {
12625   if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
12626     {
12627     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12628     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12629     free_stack(common, 1);
12630     return;
12631     }
12632   free_stack(common, 1);
12633   brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12634   }
12635 
12636 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
12637   {
12638   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
12639   add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12640   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12641   OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(assert_backtrack)->framesize - 1) * sizeof(sljit_sw));
12642   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, TMP1, 0);
12643 
12644   set_jumps(current->topbacktracks, LABEL());
12645   }
12646 else
12647   set_jumps(current->topbacktracks, LABEL());
12648 
12649 if (bra == OP_BRAZERO)
12650   {
12651   /* We know there is enough place on the stack. */
12652   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
12653   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12654   JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
12655   JUMPHERE(brajump);
12656   }
12657 }
12658 
compile_bracket_backtrackingpath(compiler_common * common,struct backtrack_common * current)12659 static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12660 {
12661 DEFINE_COMPILER;
12662 int opcode, stacksize, alt_count, alt_max;
12663 int offset = 0;
12664 int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
12665 int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
12666 PCRE2_SPTR cc = current->cc;
12667 PCRE2_SPTR ccbegin;
12668 PCRE2_SPTR ccprev;
12669 PCRE2_UCHAR bra = OP_BRA;
12670 PCRE2_UCHAR ket;
12671 assert_backtrack *assert;
12672 BOOL has_alternatives;
12673 BOOL needs_control_head = FALSE;
12674 struct sljit_jump *brazero = NULL;
12675 struct sljit_jump *next_alt = NULL;
12676 struct sljit_jump *once = NULL;
12677 struct sljit_jump *cond = NULL;
12678 struct sljit_label *rmin_label = NULL;
12679 struct sljit_label *exact_label = NULL;
12680 struct sljit_put_label *put_label = NULL;
12681 
12682 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
12683   {
12684   bra = *cc;
12685   cc++;
12686   }
12687 
12688 opcode = *cc;
12689 ccbegin = bracketend(cc) - 1 - LINK_SIZE;
12690 ket = *ccbegin;
12691 if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
12692   {
12693   repeat_ptr = PRIVATE_DATA(ccbegin);
12694   repeat_type = PRIVATE_DATA(ccbegin + 2);
12695   repeat_count = PRIVATE_DATA(ccbegin + 3);
12696   SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
12697   if (repeat_type == OP_UPTO)
12698     ket = OP_KETRMAX;
12699   if (repeat_type == OP_MINUPTO)
12700     ket = OP_KETRMIN;
12701   }
12702 ccbegin = cc;
12703 cc += GET(cc, 1);
12704 has_alternatives = *cc == OP_ALT;
12705 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12706   has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
12707 if (opcode == OP_CBRA || opcode == OP_SCBRA)
12708   offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
12709 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
12710   opcode = OP_SCOND;
12711 
12712 alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
12713 
12714 /* Decoding the needs_control_head in framesize. */
12715 if (opcode == OP_ONCE)
12716   {
12717   needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
12718   CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
12719   }
12720 
12721 if (ket != OP_KET && repeat_type != 0)
12722   {
12723   /* TMP1 is used in OP_KETRMIN below. */
12724   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12725   free_stack(common, 1);
12726   if (repeat_type == OP_UPTO)
12727     OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
12728   else
12729     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
12730   }
12731 
12732 if (ket == OP_KETRMAX)
12733   {
12734   if (bra == OP_BRAZERO)
12735     {
12736     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12737     free_stack(common, 1);
12738     brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12739     }
12740   }
12741 else if (ket == OP_KETRMIN)
12742   {
12743   if (bra != OP_BRAMINZERO)
12744     {
12745     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12746     if (repeat_type != 0)
12747       {
12748       /* TMP1 was set a few lines above. */
12749       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12750       /* Drop STR_PTR for non-greedy plus quantifier. */
12751       if (opcode != OP_ONCE)
12752         free_stack(common, 1);
12753       }
12754     else if (opcode >= OP_SBRA || opcode == OP_ONCE)
12755       {
12756       /* Checking zero-length iteration. */
12757       if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
12758         CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12759       else
12760         {
12761         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12762         CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12763         }
12764       /* Drop STR_PTR for non-greedy plus quantifier. */
12765       if (opcode != OP_ONCE)
12766         free_stack(common, 1);
12767       }
12768     else
12769       JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12770     }
12771   rmin_label = LABEL();
12772   if (repeat_type != 0)
12773     OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12774   }
12775 else if (bra == OP_BRAZERO)
12776   {
12777   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12778   free_stack(common, 1);
12779   brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12780   }
12781 else if (repeat_type == OP_EXACT)
12782   {
12783   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12784   exact_label = LABEL();
12785   }
12786 
12787 if (offset != 0)
12788   {
12789   if (common->capture_last_ptr != 0)
12790     {
12791     SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
12792     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12793     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12794     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
12795     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12796     free_stack(common, 3);
12797     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
12798     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
12799     }
12800   else if (common->optimized_cbracket[offset >> 1] == 0)
12801     {
12802     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12803     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12804     free_stack(common, 2);
12805     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12806     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12807     }
12808   }
12809 
12810 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
12811   {
12812   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12813     {
12814     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12815     add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12816     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
12817     }
12818   once = JUMP(SLJIT_JUMP);
12819   }
12820 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12821   {
12822   if (has_alternatives)
12823     {
12824     /* Always exactly one alternative. */
12825     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12826     free_stack(common, 1);
12827 
12828     alt_max = 2;
12829     next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12830     }
12831   }
12832 else if (has_alternatives)
12833   {
12834   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12835   free_stack(common, 1);
12836 
12837   if (alt_max > 3)
12838     {
12839     sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
12840 
12841     SLJIT_ASSERT(CURRENT_AS(bracket_backtrack)->u.matching_put_label);
12842     sljit_set_put_label(CURRENT_AS(bracket_backtrack)->u.matching_put_label, LABEL());
12843     sljit_emit_op0(compiler, SLJIT_ENDBR);
12844     }
12845   else
12846     next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12847   }
12848 
12849 COMPILE_BACKTRACKINGPATH(current->top);
12850 if (current->topbacktracks)
12851   set_jumps(current->topbacktracks, LABEL());
12852 
12853 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12854   {
12855   /* Conditional block always has at most one alternative. */
12856   if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
12857     {
12858     SLJIT_ASSERT(has_alternatives);
12859     assert = CURRENT_AS(bracket_backtrack)->u.assert;
12860     if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
12861       {
12862       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
12863       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12864       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12865       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
12866       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
12867       }
12868     cond = JUMP(SLJIT_JUMP);
12869     set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
12870     }
12871   else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
12872     {
12873     SLJIT_ASSERT(has_alternatives);
12874     cond = JUMP(SLJIT_JUMP);
12875     set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
12876     }
12877   else
12878     SLJIT_ASSERT(!has_alternatives);
12879   }
12880 
12881 if (has_alternatives)
12882   {
12883   alt_count = 1;
12884   do
12885     {
12886     current->top = NULL;
12887     current->topbacktracks = NULL;
12888     current->nextbacktracks = NULL;
12889     /* Conditional blocks always have an additional alternative, even if it is empty. */
12890     if (*cc == OP_ALT)
12891       {
12892       ccprev = cc + 1 + LINK_SIZE;
12893       cc += GET(cc, 1);
12894       if (opcode != OP_COND && opcode != OP_SCOND)
12895         {
12896         if (opcode != OP_ONCE)
12897           {
12898           if (private_data_ptr != 0)
12899             OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12900           else
12901             OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12902           }
12903         else
12904           OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
12905         }
12906       compile_matchingpath(common, ccprev, cc, current);
12907       if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
12908         return;
12909 
12910       if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA)
12911         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12912 
12913       if (opcode == OP_SCRIPT_RUN)
12914         match_script_run_common(common, private_data_ptr, current);
12915       }
12916 
12917     /* Instructions after the current alternative is successfully matched. */
12918     /* There is a similar code in compile_bracket_matchingpath. */
12919     if (opcode == OP_ONCE)
12920       match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
12921 
12922     stacksize = 0;
12923     if (repeat_type == OP_MINUPTO)
12924       {
12925       /* We need to preserve the counter. TMP2 will be used below. */
12926       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
12927       stacksize++;
12928       }
12929     if (ket != OP_KET || bra != OP_BRA)
12930       stacksize++;
12931     if (offset != 0)
12932       {
12933       if (common->capture_last_ptr != 0)
12934         stacksize++;
12935       if (common->optimized_cbracket[offset >> 1] == 0)
12936         stacksize += 2;
12937       }
12938     if (opcode != OP_ONCE)
12939       stacksize++;
12940 
12941     if (stacksize > 0)
12942       allocate_stack(common, stacksize);
12943 
12944     stacksize = 0;
12945     if (repeat_type == OP_MINUPTO)
12946       {
12947       /* TMP2 was set above. */
12948       OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
12949       stacksize++;
12950       }
12951 
12952     if (ket != OP_KET || bra != OP_BRA)
12953       {
12954       if (ket != OP_KET)
12955         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
12956       else
12957         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
12958       stacksize++;
12959       }
12960 
12961     if (offset != 0)
12962       stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
12963 
12964     if (opcode != OP_ONCE)
12965       {
12966       if (alt_max <= 3)
12967         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
12968       else
12969         put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
12970       }
12971 
12972     if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
12973       {
12974       /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
12975       SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
12976       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
12977       }
12978 
12979     JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
12980 
12981     if (opcode != OP_ONCE)
12982       {
12983       if (alt_max <= 3)
12984         {
12985         JUMPHERE(next_alt);
12986         alt_count++;
12987         if (alt_count < alt_max)
12988           {
12989           SLJIT_ASSERT(alt_count == 2 && alt_max == 3);
12990           next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
12991           }
12992         }
12993       else
12994         {
12995         sljit_set_put_label(put_label, LABEL());
12996         sljit_emit_op0(compiler, SLJIT_ENDBR);
12997         }
12998       }
12999 
13000     COMPILE_BACKTRACKINGPATH(current->top);
13001     if (current->topbacktracks)
13002       set_jumps(current->topbacktracks, LABEL());
13003     SLJIT_ASSERT(!current->nextbacktracks);
13004     }
13005   while (*cc == OP_ALT);
13006 
13007   if (cond != NULL)
13008     {
13009     SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
13010     assert = CURRENT_AS(bracket_backtrack)->u.assert;
13011     if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
13012       {
13013       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
13014       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13015       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
13016       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
13017       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
13018       }
13019     JUMPHERE(cond);
13020     }
13021 
13022   /* Free the STR_PTR. */
13023   if (private_data_ptr == 0)
13024     free_stack(common, 1);
13025   }
13026 
13027 if (offset != 0)
13028   {
13029   /* Using both tmp register is better for instruction scheduling. */
13030   if (common->optimized_cbracket[offset >> 1] != 0)
13031     {
13032     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13033     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13034     free_stack(common, 2);
13035     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
13036     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
13037     }
13038   else
13039     {
13040     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13041     free_stack(common, 1);
13042     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
13043     }
13044   }
13045 else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
13046   {
13047   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
13048   free_stack(common, 1);
13049   }
13050 else if (opcode == OP_ONCE)
13051   {
13052   cc = ccbegin + GET(ccbegin, 1);
13053   stacksize = needs_control_head ? 1 : 0;
13054 
13055   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
13056     {
13057     /* Reset head and drop saved frame. */
13058     stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
13059     }
13060   else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
13061     {
13062     /* The STR_PTR must be released. */
13063     stacksize++;
13064     }
13065 
13066   if (stacksize > 0)
13067     free_stack(common, stacksize);
13068 
13069   JUMPHERE(once);
13070   /* Restore previous private_data_ptr */
13071   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
13072     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1));
13073   else if (ket == OP_KETRMIN)
13074     {
13075     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13076     /* See the comment below. */
13077     free_stack(common, 2);
13078     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
13079     }
13080   }
13081 
13082 if (repeat_type == OP_EXACT)
13083   {
13084   OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
13085   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
13086   CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
13087   }
13088 else if (ket == OP_KETRMAX)
13089   {
13090   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13091   if (bra != OP_BRAZERO)
13092     free_stack(common, 1);
13093 
13094   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
13095   if (bra == OP_BRAZERO)
13096     {
13097     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13098     JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
13099     JUMPHERE(brazero);
13100     free_stack(common, 1);
13101     }
13102   }
13103 else if (ket == OP_KETRMIN)
13104   {
13105   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13106 
13107   /* OP_ONCE removes everything in case of a backtrack, so we don't
13108   need to explicitly release the STR_PTR. The extra release would
13109   affect badly the free_stack(2) above. */
13110   if (opcode != OP_ONCE)
13111     free_stack(common, 1);
13112   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
13113   if (opcode == OP_ONCE)
13114     free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
13115   else if (bra == OP_BRAMINZERO)
13116     free_stack(common, 1);
13117   }
13118 else if (bra == OP_BRAZERO)
13119   {
13120   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13121   JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
13122   JUMPHERE(brazero);
13123   }
13124 }
13125 
compile_bracketpos_backtrackingpath(compiler_common * common,struct backtrack_common * current)13126 static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13127 {
13128 DEFINE_COMPILER;
13129 int offset;
13130 struct sljit_jump *jump;
13131 
13132 if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
13133   {
13134   if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
13135     {
13136     offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
13137     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13138     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13139     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
13140     if (common->capture_last_ptr != 0)
13141       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
13142     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
13143     if (common->capture_last_ptr != 0)
13144       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
13145     }
13146   set_jumps(current->topbacktracks, LABEL());
13147   free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
13148   return;
13149   }
13150 
13151 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
13152 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13153 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracketpos_backtrack)->framesize - 1) * sizeof(sljit_sw));
13154 
13155 if (current->topbacktracks)
13156   {
13157   jump = JUMP(SLJIT_JUMP);
13158   set_jumps(current->topbacktracks, LABEL());
13159   /* Drop the stack frame. */
13160   free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
13161   JUMPHERE(jump);
13162   }
13163 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1));
13164 }
13165 
compile_braminzero_backtrackingpath(compiler_common * common,struct backtrack_common * current)13166 static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13167 {
13168 assert_backtrack backtrack;
13169 
13170 current->top = NULL;
13171 current->topbacktracks = NULL;
13172 current->nextbacktracks = NULL;
13173 if (current->cc[1] > OP_ASSERTBACK_NOT)
13174   {
13175   /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
13176   compile_bracket_matchingpath(common, current->cc, current);
13177   compile_bracket_backtrackingpath(common, current->top);
13178   }
13179 else
13180   {
13181   memset(&backtrack, 0, sizeof(backtrack));
13182   backtrack.common.cc = current->cc;
13183   backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
13184   /* Manual call of compile_assert_matchingpath. */
13185   compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
13186   }
13187 SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
13188 }
13189 
compile_control_verb_backtrackingpath(compiler_common * common,struct backtrack_common * current)13190 static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13191 {
13192 DEFINE_COMPILER;
13193 PCRE2_UCHAR opcode = *current->cc;
13194 struct sljit_label *loop;
13195 struct sljit_jump *jump;
13196 
13197 if (opcode == OP_THEN || opcode == OP_THEN_ARG)
13198   {
13199   if (common->then_trap != NULL)
13200     {
13201     SLJIT_ASSERT(common->control_head_ptr != 0);
13202 
13203     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
13204     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
13205     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
13206     jump = JUMP(SLJIT_JUMP);
13207 
13208     loop = LABEL();
13209     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13210     JUMPHERE(jump);
13211     CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop);
13212     CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop);
13213     add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
13214     return;
13215     }
13216   else if (!common->local_quit_available && common->in_positive_assertion)
13217     {
13218     add_jump(compiler, &common->positive_assertion_quit, JUMP(SLJIT_JUMP));
13219     return;
13220     }
13221   }
13222 
13223 if (common->local_quit_available)
13224   {
13225   /* Abort match with a fail. */
13226   if (common->quit_label == NULL)
13227     add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
13228   else
13229     JUMPTO(SLJIT_JUMP, common->quit_label);
13230   return;
13231   }
13232 
13233 if (opcode == OP_SKIP_ARG)
13234   {
13235   SLJIT_ASSERT(common->control_head_ptr != 0 && TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
13236   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
13237   OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
13238   sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_search_mark));
13239 
13240   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0);
13241   add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0));
13242   return;
13243   }
13244 
13245 if (opcode == OP_SKIP)
13246   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13247 else
13248   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
13249 add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
13250 }
13251 
compile_then_trap_backtrackingpath(compiler_common * common,struct backtrack_common * current)13252 static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13253 {
13254 DEFINE_COMPILER;
13255 struct sljit_jump *jump;
13256 int size;
13257 
13258 if (CURRENT_AS(then_trap_backtrack)->then_trap)
13259   {
13260   common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
13261   return;
13262   }
13263 
13264 size = CURRENT_AS(then_trap_backtrack)->framesize;
13265 size = 3 + (size < 0 ? 0 : size);
13266 
13267 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
13268 free_stack(common, size);
13269 jump = JUMP(SLJIT_JUMP);
13270 
13271 set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
13272 /* STACK_TOP is set by THEN. */
13273 if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
13274   {
13275   add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13276   OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(then_trap_backtrack)->framesize - 1) * sizeof(sljit_sw));
13277   }
13278 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13279 free_stack(common, 3);
13280 
13281 JUMPHERE(jump);
13282 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
13283 }
13284 
compile_backtrackingpath(compiler_common * common,struct backtrack_common * current)13285 static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13286 {
13287 DEFINE_COMPILER;
13288 then_trap_backtrack *save_then_trap = common->then_trap;
13289 
13290 while (current)
13291   {
13292   if (current->nextbacktracks != NULL)
13293     set_jumps(current->nextbacktracks, LABEL());
13294   switch(*current->cc)
13295     {
13296     case OP_SET_SOM:
13297     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13298     free_stack(common, 1);
13299     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
13300     break;
13301 
13302     case OP_STAR:
13303     case OP_MINSTAR:
13304     case OP_PLUS:
13305     case OP_MINPLUS:
13306     case OP_QUERY:
13307     case OP_MINQUERY:
13308     case OP_UPTO:
13309     case OP_MINUPTO:
13310     case OP_EXACT:
13311     case OP_POSSTAR:
13312     case OP_POSPLUS:
13313     case OP_POSQUERY:
13314     case OP_POSUPTO:
13315     case OP_STARI:
13316     case OP_MINSTARI:
13317     case OP_PLUSI:
13318     case OP_MINPLUSI:
13319     case OP_QUERYI:
13320     case OP_MINQUERYI:
13321     case OP_UPTOI:
13322     case OP_MINUPTOI:
13323     case OP_EXACTI:
13324     case OP_POSSTARI:
13325     case OP_POSPLUSI:
13326     case OP_POSQUERYI:
13327     case OP_POSUPTOI:
13328     case OP_NOTSTAR:
13329     case OP_NOTMINSTAR:
13330     case OP_NOTPLUS:
13331     case OP_NOTMINPLUS:
13332     case OP_NOTQUERY:
13333     case OP_NOTMINQUERY:
13334     case OP_NOTUPTO:
13335     case OP_NOTMINUPTO:
13336     case OP_NOTEXACT:
13337     case OP_NOTPOSSTAR:
13338     case OP_NOTPOSPLUS:
13339     case OP_NOTPOSQUERY:
13340     case OP_NOTPOSUPTO:
13341     case OP_NOTSTARI:
13342     case OP_NOTMINSTARI:
13343     case OP_NOTPLUSI:
13344     case OP_NOTMINPLUSI:
13345     case OP_NOTQUERYI:
13346     case OP_NOTMINQUERYI:
13347     case OP_NOTUPTOI:
13348     case OP_NOTMINUPTOI:
13349     case OP_NOTEXACTI:
13350     case OP_NOTPOSSTARI:
13351     case OP_NOTPOSPLUSI:
13352     case OP_NOTPOSQUERYI:
13353     case OP_NOTPOSUPTOI:
13354     case OP_TYPESTAR:
13355     case OP_TYPEMINSTAR:
13356     case OP_TYPEPLUS:
13357     case OP_TYPEMINPLUS:
13358     case OP_TYPEQUERY:
13359     case OP_TYPEMINQUERY:
13360     case OP_TYPEUPTO:
13361     case OP_TYPEMINUPTO:
13362     case OP_TYPEEXACT:
13363     case OP_TYPEPOSSTAR:
13364     case OP_TYPEPOSPLUS:
13365     case OP_TYPEPOSQUERY:
13366     case OP_TYPEPOSUPTO:
13367     case OP_CLASS:
13368     case OP_NCLASS:
13369 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
13370     case OP_XCLASS:
13371 #endif
13372     compile_iterator_backtrackingpath(common, current);
13373     break;
13374 
13375     case OP_REF:
13376     case OP_REFI:
13377     case OP_DNREF:
13378     case OP_DNREFI:
13379     compile_ref_iterator_backtrackingpath(common, current);
13380     break;
13381 
13382     case OP_RECURSE:
13383     compile_recurse_backtrackingpath(common, current);
13384     break;
13385 
13386     case OP_ASSERT:
13387     case OP_ASSERT_NOT:
13388     case OP_ASSERTBACK:
13389     case OP_ASSERTBACK_NOT:
13390     compile_assert_backtrackingpath(common, current);
13391     break;
13392 
13393     case OP_ASSERT_NA:
13394     case OP_ASSERTBACK_NA:
13395     case OP_ONCE:
13396     case OP_SCRIPT_RUN:
13397     case OP_BRA:
13398     case OP_CBRA:
13399     case OP_COND:
13400     case OP_SBRA:
13401     case OP_SCBRA:
13402     case OP_SCOND:
13403     compile_bracket_backtrackingpath(common, current);
13404     break;
13405 
13406     case OP_BRAZERO:
13407     if (current->cc[1] > OP_ASSERTBACK_NOT)
13408       compile_bracket_backtrackingpath(common, current);
13409     else
13410       compile_assert_backtrackingpath(common, current);
13411     break;
13412 
13413     case OP_BRAPOS:
13414     case OP_CBRAPOS:
13415     case OP_SBRAPOS:
13416     case OP_SCBRAPOS:
13417     case OP_BRAPOSZERO:
13418     compile_bracketpos_backtrackingpath(common, current);
13419     break;
13420 
13421     case OP_BRAMINZERO:
13422     compile_braminzero_backtrackingpath(common, current);
13423     break;
13424 
13425     case OP_MARK:
13426     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
13427     if (common->has_skip_arg)
13428       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13429     free_stack(common, common->has_skip_arg ? 5 : 1);
13430     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
13431     if (common->has_skip_arg)
13432       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
13433     break;
13434 
13435     case OP_THEN:
13436     case OP_THEN_ARG:
13437     case OP_PRUNE:
13438     case OP_PRUNE_ARG:
13439     case OP_SKIP:
13440     case OP_SKIP_ARG:
13441     compile_control_verb_backtrackingpath(common, current);
13442     break;
13443 
13444     case OP_COMMIT:
13445     case OP_COMMIT_ARG:
13446     if (!common->local_quit_available)
13447       OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13448     if (common->quit_label == NULL)
13449       add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
13450     else
13451       JUMPTO(SLJIT_JUMP, common->quit_label);
13452     break;
13453 
13454     case OP_CALLOUT:
13455     case OP_CALLOUT_STR:
13456     case OP_FAIL:
13457     case OP_ACCEPT:
13458     case OP_ASSERT_ACCEPT:
13459     set_jumps(current->topbacktracks, LABEL());
13460     break;
13461 
13462     case OP_THEN_TRAP:
13463     /* A virtual opcode for then traps. */
13464     compile_then_trap_backtrackingpath(common, current);
13465     break;
13466 
13467     default:
13468     SLJIT_UNREACHABLE();
13469     break;
13470     }
13471   current = current->prev;
13472   }
13473 common->then_trap = save_then_trap;
13474 }
13475 
compile_recurse(compiler_common * common)13476 static SLJIT_INLINE void compile_recurse(compiler_common *common)
13477 {
13478 DEFINE_COMPILER;
13479 PCRE2_SPTR cc = common->start + common->currententry->start;
13480 PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
13481 PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE);
13482 uint32_t recurse_flags = 0;
13483 int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &recurse_flags);
13484 int alt_count, alt_max, local_size;
13485 backtrack_common altbacktrack;
13486 jump_list *match = NULL;
13487 struct sljit_jump *next_alt = NULL;
13488 struct sljit_jump *accept_exit = NULL;
13489 struct sljit_label *quit;
13490 struct sljit_put_label *put_label = NULL;
13491 
13492 /* Recurse captures then. */
13493 common->then_trap = NULL;
13494 
13495 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
13496 
13497 alt_max = no_alternatives(cc);
13498 alt_count = 0;
13499 
13500 /* Matching path. */
13501 SLJIT_ASSERT(common->currententry->entry_label == NULL && common->recursive_head_ptr != 0);
13502 common->currententry->entry_label = LABEL();
13503 set_jumps(common->currententry->entry_calls, common->currententry->entry_label);
13504 
13505 sljit_emit_fast_enter(compiler, TMP2, 0);
13506 count_match(common);
13507 
13508 local_size = (alt_max > 1) ? 2 : 1;
13509 
13510 /* (Reversed) stack layout:
13511    [private data][return address][optional: str ptr] ... [optional: alternative index][recursive_head_ptr] */
13512 
13513 allocate_stack(common, private_data_size + local_size);
13514 /* Save return address. */
13515 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0);
13516 
13517 copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, recurse_flags);
13518 
13519 /* This variable is saved and restored all time when we enter or exit from a recursive context. */
13520 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
13521 
13522 if (recurse_flags & recurse_flag_control_head_found)
13523   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13524 
13525 if (alt_max > 1)
13526   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
13527 
13528 memset(&altbacktrack, 0, sizeof(backtrack_common));
13529 common->quit_label = NULL;
13530 common->accept_label = NULL;
13531 common->quit = NULL;
13532 common->accept = NULL;
13533 altbacktrack.cc = ccbegin;
13534 cc += GET(cc, 1);
13535 while (1)
13536   {
13537   altbacktrack.top = NULL;
13538   altbacktrack.topbacktracks = NULL;
13539 
13540   if (altbacktrack.cc != ccbegin)
13541     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13542 
13543   compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
13544   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13545     return;
13546 
13547   allocate_stack(common, (alt_max > 1 || (recurse_flags & recurse_flag_accept_found)) ? 2 : 1);
13548   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13549 
13550   if (alt_max > 1 || (recurse_flags & recurse_flag_accept_found))
13551     {
13552     if (alt_max > 3)
13553       put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(1));
13554     else
13555       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
13556     }
13557 
13558   add_jump(compiler, &match, JUMP(SLJIT_JUMP));
13559 
13560   if (alt_count == 0)
13561     {
13562     /* Backtracking path entry. */
13563     SLJIT_ASSERT(common->currententry->backtrack_label == NULL);
13564     common->currententry->backtrack_label = LABEL();
13565     set_jumps(common->currententry->backtrack_calls, common->currententry->backtrack_label);
13566 
13567     sljit_emit_fast_enter(compiler, TMP1, 0);
13568 
13569     if (recurse_flags & recurse_flag_accept_found)
13570       accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
13571 
13572     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13573     /* Save return address. */
13574     OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0);
13575 
13576     copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags);
13577 
13578     if (alt_max > 1)
13579       {
13580       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13581       free_stack(common, 2);
13582 
13583       if (alt_max > 3)
13584         {
13585         sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
13586         sljit_set_put_label(put_label, LABEL());
13587         sljit_emit_op0(compiler, SLJIT_ENDBR);
13588         }
13589       else
13590         next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13591       }
13592     else
13593       free_stack(common, (recurse_flags & recurse_flag_accept_found) ? 2 : 1);
13594     }
13595   else if (alt_max > 3)
13596     {
13597     sljit_set_put_label(put_label, LABEL());
13598     sljit_emit_op0(compiler, SLJIT_ENDBR);
13599     }
13600   else
13601     {
13602     JUMPHERE(next_alt);
13603     if (alt_count + 1 < alt_max)
13604       {
13605       SLJIT_ASSERT(alt_count == 1 && alt_max == 3);
13606       next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
13607       }
13608     }
13609 
13610   alt_count++;
13611 
13612   compile_backtrackingpath(common, altbacktrack.top);
13613   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13614     return;
13615   set_jumps(altbacktrack.topbacktracks, LABEL());
13616 
13617   if (*cc != OP_ALT)
13618     break;
13619 
13620   altbacktrack.cc = cc + 1 + LINK_SIZE;
13621   cc += GET(cc, 1);
13622   }
13623 
13624 /* No alternative is matched. */
13625 
13626 quit = LABEL();
13627 
13628 copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, recurse_flags);
13629 
13630 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
13631 free_stack(common, private_data_size + local_size);
13632 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
13633 OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13634 
13635 if (common->quit != NULL)
13636   {
13637   SLJIT_ASSERT(recurse_flags & recurse_flag_quit_found);
13638 
13639   set_jumps(common->quit, LABEL());
13640   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13641   copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, recurse_flags);
13642   JUMPTO(SLJIT_JUMP, quit);
13643   }
13644 
13645 if (recurse_flags & recurse_flag_accept_found)
13646   {
13647   JUMPHERE(accept_exit);
13648   free_stack(common, 2);
13649 
13650   /* Save return address. */
13651   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0);
13652 
13653   copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, recurse_flags);
13654 
13655   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
13656   free_stack(common, private_data_size + local_size);
13657   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
13658   OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13659   }
13660 
13661 if (common->accept != NULL)
13662   {
13663   SLJIT_ASSERT(recurse_flags & recurse_flag_accept_found);
13664 
13665   set_jumps(common->accept, LABEL());
13666 
13667   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13668   OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
13669 
13670   allocate_stack(common, 2);
13671   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
13672   }
13673 
13674 set_jumps(match, LABEL());
13675 
13676 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
13677 
13678 copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags);
13679 
13680 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1));
13681 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
13682 OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13683 }
13684 
13685 #undef COMPILE_BACKTRACKINGPATH
13686 #undef CURRENT_AS
13687 
13688 #define PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS \
13689   (PCRE2_JIT_INVALID_UTF)
13690 
jit_compile(pcre2_code * code,sljit_u32 mode)13691 static int jit_compile(pcre2_code *code, sljit_u32 mode)
13692 {
13693 pcre2_real_code *re = (pcre2_real_code *)code;
13694 struct sljit_compiler *compiler;
13695 backtrack_common rootbacktrack;
13696 compiler_common common_data;
13697 compiler_common *common = &common_data;
13698 const sljit_u8 *tables = re->tables;
13699 void *allocator_data = &re->memctl;
13700 int private_data_size;
13701 PCRE2_SPTR ccend;
13702 executable_functions *functions;
13703 void *executable_func;
13704 sljit_uw executable_size;
13705 sljit_uw total_length;
13706 struct sljit_label *mainloop_label = NULL;
13707 struct sljit_label *continue_match_label;
13708 struct sljit_label *empty_match_found_label = NULL;
13709 struct sljit_label *empty_match_backtrack_label = NULL;
13710 struct sljit_label *reset_match_label;
13711 struct sljit_label *quit_label;
13712 struct sljit_jump *jump;
13713 struct sljit_jump *minlength_check_failed = NULL;
13714 struct sljit_jump *empty_match = NULL;
13715 struct sljit_jump *end_anchor_failed = NULL;
13716 jump_list *reqcu_not_found = NULL;
13717 
13718 SLJIT_ASSERT(tables);
13719 
13720 #if HAS_VIRTUAL_REGISTERS == 1
13721 SLJIT_ASSERT(sljit_get_register_index(TMP3) < 0 && sljit_get_register_index(ARGUMENTS) < 0 && sljit_get_register_index(RETURN_ADDR) < 0);
13722 #elif HAS_VIRTUAL_REGISTERS == 0
13723 SLJIT_ASSERT(sljit_get_register_index(TMP3) >= 0 && sljit_get_register_index(ARGUMENTS) >= 0 && sljit_get_register_index(RETURN_ADDR) >= 0);
13724 #else
13725 #error "Invalid value for HAS_VIRTUAL_REGISTERS"
13726 #endif
13727 
13728 memset(&rootbacktrack, 0, sizeof(backtrack_common));
13729 memset(common, 0, sizeof(compiler_common));
13730 common->re = re;
13731 common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
13732 rootbacktrack.cc = common->name_table + re->name_count * re->name_entry_size;
13733 
13734 #ifdef SUPPORT_UNICODE
13735 common->invalid_utf = (mode & PCRE2_JIT_INVALID_UTF) != 0;
13736 #endif /* SUPPORT_UNICODE */
13737 mode &= ~PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS;
13738 
13739 common->start = rootbacktrack.cc;
13740 common->read_only_data_head = NULL;
13741 common->fcc = tables + fcc_offset;
13742 common->lcc = (sljit_sw)(tables + lcc_offset);
13743 common->mode = mode;
13744 common->might_be_empty = (re->minlength == 0) || (re->flags & PCRE2_MATCH_EMPTY);
13745 common->allow_empty_partial = (re->max_lookbehind > 0) || (re->flags & PCRE2_MATCH_EMPTY);
13746 common->nltype = NLTYPE_FIXED;
13747 switch(re->newline_convention)
13748   {
13749   case PCRE2_NEWLINE_CR: common->newline = CHAR_CR; break;
13750   case PCRE2_NEWLINE_LF: common->newline = CHAR_NL; break;
13751   case PCRE2_NEWLINE_CRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
13752   case PCRE2_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
13753   case PCRE2_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
13754   case PCRE2_NEWLINE_NUL: common->newline = CHAR_NUL; break;
13755   default: return PCRE2_ERROR_INTERNAL;
13756   }
13757 common->nlmax = READ_CHAR_MAX;
13758 common->nlmin = 0;
13759 if (re->bsr_convention == PCRE2_BSR_UNICODE)
13760   common->bsr_nltype = NLTYPE_ANY;
13761 else if (re->bsr_convention == PCRE2_BSR_ANYCRLF)
13762   common->bsr_nltype = NLTYPE_ANYCRLF;
13763 else
13764   {
13765 #ifdef BSR_ANYCRLF
13766   common->bsr_nltype = NLTYPE_ANYCRLF;
13767 #else
13768   common->bsr_nltype = NLTYPE_ANY;
13769 #endif
13770   }
13771 common->bsr_nlmax = READ_CHAR_MAX;
13772 common->bsr_nlmin = 0;
13773 common->endonly = (re->overall_options & PCRE2_DOLLAR_ENDONLY) != 0;
13774 common->ctypes = (sljit_sw)(tables + ctypes_offset);
13775 common->name_count = re->name_count;
13776 common->name_entry_size = re->name_entry_size;
13777 common->unset_backref = (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) != 0;
13778 common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0;
13779 #ifdef SUPPORT_UNICODE
13780 /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
13781 common->utf = (re->overall_options & PCRE2_UTF) != 0;
13782 common->ucp = (re->overall_options & PCRE2_UCP) != 0;
13783 if (common->utf)
13784   {
13785   if (common->nltype == NLTYPE_ANY)
13786     common->nlmax = 0x2029;
13787   else if (common->nltype == NLTYPE_ANYCRLF)
13788     common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
13789   else
13790     {
13791     /* We only care about the first newline character. */
13792     common->nlmax = common->newline & 0xff;
13793     }
13794 
13795   if (common->nltype == NLTYPE_FIXED)
13796     common->nlmin = common->newline & 0xff;
13797   else
13798     common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
13799 
13800   if (common->bsr_nltype == NLTYPE_ANY)
13801     common->bsr_nlmax = 0x2029;
13802   else
13803     common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
13804   common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
13805   }
13806 else
13807   common->invalid_utf = FALSE;
13808 #endif /* SUPPORT_UNICODE */
13809 ccend = bracketend(common->start);
13810 
13811 /* Calculate the local space size on the stack. */
13812 common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
13813 common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, allocator_data);
13814 if (!common->optimized_cbracket)
13815   return PCRE2_ERROR_NOMEMORY;
13816 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
13817 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
13818 #else
13819 memset(common->optimized_cbracket, 1, re->top_bracket + 1);
13820 #endif
13821 
13822 SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
13823 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
13824 common->capture_last_ptr = common->ovector_start;
13825 common->ovector_start += sizeof(sljit_sw);
13826 #endif
13827 if (!check_opcode_types(common, common->start, ccend))
13828   {
13829   SLJIT_FREE(common->optimized_cbracket, allocator_data);
13830   return PCRE2_ERROR_NOMEMORY;
13831   }
13832 
13833 /* Checking flags and updating ovector_start. */
13834 if (mode == PCRE2_JIT_COMPLETE && (re->flags & PCRE2_LASTSET) != 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13835   {
13836   common->req_char_ptr = common->ovector_start;
13837   common->ovector_start += sizeof(sljit_sw);
13838   }
13839 if (mode != PCRE2_JIT_COMPLETE)
13840   {
13841   common->start_used_ptr = common->ovector_start;
13842   common->ovector_start += sizeof(sljit_sw);
13843   if (mode == PCRE2_JIT_PARTIAL_SOFT)
13844     {
13845     common->hit_start = common->ovector_start;
13846     common->ovector_start += sizeof(sljit_sw);
13847     }
13848   }
13849 if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0)
13850   {
13851   common->match_end_ptr = common->ovector_start;
13852   common->ovector_start += sizeof(sljit_sw);
13853   }
13854 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
13855 common->control_head_ptr = 1;
13856 #endif
13857 if (common->control_head_ptr != 0)
13858   {
13859   common->control_head_ptr = common->ovector_start;
13860   common->ovector_start += sizeof(sljit_sw);
13861   }
13862 if (common->has_set_som)
13863   {
13864   /* Saving the real start pointer is necessary. */
13865   common->start_ptr = common->ovector_start;
13866   common->ovector_start += sizeof(sljit_sw);
13867   }
13868 
13869 /* Aligning ovector to even number of sljit words. */
13870 if ((common->ovector_start & sizeof(sljit_sw)) != 0)
13871   common->ovector_start += sizeof(sljit_sw);
13872 
13873 if (common->start_ptr == 0)
13874   common->start_ptr = OVECTOR(0);
13875 
13876 /* Capturing brackets cannot be optimized if callouts are allowed. */
13877 if (common->capture_last_ptr != 0)
13878   memset(common->optimized_cbracket, 0, re->top_bracket + 1);
13879 
13880 SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
13881 common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
13882 
13883 total_length = ccend - common->start;
13884 common->private_data_ptrs = (sljit_s32*)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data);
13885 if (!common->private_data_ptrs)
13886   {
13887   SLJIT_FREE(common->optimized_cbracket, allocator_data);
13888   return PCRE2_ERROR_NOMEMORY;
13889   }
13890 memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
13891 
13892 private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
13893 
13894 if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && !common->has_skip_in_assert_back)
13895   detect_early_fail(common, common->start, &private_data_size, 0, 0, TRUE);
13896 
13897 set_private_data_ptrs(common, &private_data_size, ccend);
13898 
13899 SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr);
13900 
13901 if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
13902   {
13903   SLJIT_FREE(common->private_data_ptrs, allocator_data);
13904   SLJIT_FREE(common->optimized_cbracket, allocator_data);
13905   return PCRE2_ERROR_NOMEMORY;
13906   }
13907 
13908 if (common->has_then)
13909   {
13910   common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length);
13911   memset(common->then_offsets, 0, total_length);
13912   set_then_offsets(common, common->start, NULL);
13913   }
13914 
13915 compiler = sljit_create_compiler(allocator_data, NULL);
13916 if (!compiler)
13917   {
13918   SLJIT_FREE(common->optimized_cbracket, allocator_data);
13919   SLJIT_FREE(common->private_data_ptrs, allocator_data);
13920   return PCRE2_ERROR_NOMEMORY;
13921   }
13922 common->compiler = compiler;
13923 
13924 /* Main pcre2_jit_exec entry. */
13925 SLJIT_ASSERT((private_data_size & (sizeof(sljit_sw) - 1)) == 0);
13926 sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, W), 5, 5, 0, 0, private_data_size);
13927 
13928 /* Register init. */
13929 reset_ovector(common, (re->top_bracket + 1) * 2);
13930 if (common->req_char_ptr != 0)
13931   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
13932 
13933 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
13934 OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
13935 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
13936 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
13937 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
13938 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
13939 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, end));
13940 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, start));
13941 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
13942 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
13943 
13944 if (common->early_fail_start_ptr < common->early_fail_end_ptr)
13945   reset_early_fail(common);
13946 
13947 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13948   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
13949 if (common->mark_ptr != 0)
13950   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
13951 if (common->control_head_ptr != 0)
13952   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13953 
13954 /* Main part of the matching */
13955 if ((re->overall_options & PCRE2_ANCHORED) == 0)
13956   {
13957   mainloop_label = mainloop_entry(common);
13958   continue_match_label = LABEL();
13959   /* Forward search if possible. */
13960   if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13961     {
13962     if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common))
13963       ;
13964     else if ((re->flags & PCRE2_FIRSTSET) != 0)
13965       fast_forward_first_char(common);
13966     else if ((re->flags & PCRE2_STARTLINE) != 0)
13967       fast_forward_newline(common);
13968     else if ((re->flags & PCRE2_FIRSTMAPSET) != 0)
13969       fast_forward_start_bits(common);
13970     }
13971   }
13972 else
13973   continue_match_label = LABEL();
13974 
13975 if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13976   {
13977   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13978   OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(re->minlength));
13979   minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
13980   }
13981 if (common->req_char_ptr != 0)
13982   reqcu_not_found = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0);
13983 
13984 /* Store the current STR_PTR in OVECTOR(0). */
13985 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
13986 /* Copy the limit of allowed recursions. */
13987 OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
13988 if (common->capture_last_ptr != 0)
13989   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, 0);
13990 if (common->fast_forward_bc_ptr != NULL)
13991   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3, STR_PTR, 0);
13992 
13993 if (common->start_ptr != OVECTOR(0))
13994   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
13995 
13996 /* Copy the beginning of the string. */
13997 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13998   {
13999   jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
14000   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
14001   JUMPHERE(jump);
14002   }
14003 else if (mode == PCRE2_JIT_PARTIAL_HARD)
14004   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
14005 
14006 compile_matchingpath(common, common->start, ccend, &rootbacktrack);
14007 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14008   {
14009   sljit_free_compiler(compiler);
14010   SLJIT_FREE(common->optimized_cbracket, allocator_data);
14011   SLJIT_FREE(common->private_data_ptrs, allocator_data);
14012   PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14013   return PCRE2_ERROR_NOMEMORY;
14014   }
14015 
14016 if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
14017   end_anchor_failed = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0);
14018 
14019 if (common->might_be_empty)
14020   {
14021   empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
14022   empty_match_found_label = LABEL();
14023   }
14024 
14025 common->accept_label = LABEL();
14026 if (common->accept != NULL)
14027   set_jumps(common->accept, common->accept_label);
14028 
14029 /* This means we have a match. Update the ovector. */
14030 copy_ovector(common, re->top_bracket + 1);
14031 common->quit_label = common->abort_label = LABEL();
14032 if (common->quit != NULL)
14033   set_jumps(common->quit, common->quit_label);
14034 if (common->abort != NULL)
14035   set_jumps(common->abort, common->abort_label);
14036 if (minlength_check_failed != NULL)
14037   SET_LABEL(minlength_check_failed, common->abort_label);
14038 
14039 sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN);
14040 sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
14041 
14042 if (common->failed_match != NULL)
14043   {
14044   SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
14045   set_jumps(common->failed_match, LABEL());
14046   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
14047   JUMPTO(SLJIT_JUMP, common->abort_label);
14048   }
14049 
14050 if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
14051   JUMPHERE(end_anchor_failed);
14052 
14053 if (mode != PCRE2_JIT_COMPLETE)
14054   {
14055   common->partialmatchlabel = LABEL();
14056   set_jumps(common->partialmatch, common->partialmatchlabel);
14057   return_with_partial_match(common, common->quit_label);
14058   }
14059 
14060 if (common->might_be_empty)
14061   empty_match_backtrack_label = LABEL();
14062 compile_backtrackingpath(common, rootbacktrack.top);
14063 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14064   {
14065   sljit_free_compiler(compiler);
14066   SLJIT_FREE(common->optimized_cbracket, allocator_data);
14067   SLJIT_FREE(common->private_data_ptrs, allocator_data);
14068   PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14069   return PCRE2_ERROR_NOMEMORY;
14070   }
14071 
14072 SLJIT_ASSERT(rootbacktrack.prev == NULL);
14073 reset_match_label = LABEL();
14074 
14075 if (mode == PCRE2_JIT_PARTIAL_SOFT)
14076   {
14077   /* Update hit_start only in the first time. */
14078   jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
14079   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
14080   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
14081   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
14082   JUMPHERE(jump);
14083   }
14084 
14085 /* Check we have remaining characters. */
14086 if ((re->overall_options & PCRE2_ANCHORED) == 0 && common->match_end_ptr != 0)
14087   {
14088   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
14089   }
14090 
14091 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
14092     (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3) : common->start_ptr);
14093 
14094 if ((re->overall_options & PCRE2_ANCHORED) == 0)
14095   {
14096   if (common->ff_newline_shortcut != NULL)
14097     {
14098     /* There cannot be more newlines if PCRE2_FIRSTLINE is set. */
14099     if ((re->overall_options & PCRE2_FIRSTLINE) == 0)
14100       {
14101       if (common->match_end_ptr != 0)
14102         {
14103         OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
14104         OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
14105         CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, common->ff_newline_shortcut);
14106         OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
14107         }
14108       else
14109         CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
14110       }
14111     }
14112   else
14113     CMPTO(SLJIT_LESS, STR_PTR, 0, (common->match_end_ptr == 0) ? STR_END : TMP1, 0, mainloop_label);
14114   }
14115 
14116 /* No more remaining characters. */
14117 if (reqcu_not_found != NULL)
14118   set_jumps(reqcu_not_found, LABEL());
14119 
14120 if (mode == PCRE2_JIT_PARTIAL_SOFT)
14121   CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
14122 
14123 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
14124 JUMPTO(SLJIT_JUMP, common->quit_label);
14125 
14126 flush_stubs(common);
14127 
14128 if (common->might_be_empty)
14129   {
14130   JUMPHERE(empty_match);
14131   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
14132   OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
14133   OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
14134   JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label);
14135   OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
14136   JUMPTO(SLJIT_ZERO, empty_match_found_label);
14137   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
14138   CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
14139   JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
14140   }
14141 
14142 common->fast_forward_bc_ptr = NULL;
14143 common->early_fail_start_ptr = 0;
14144 common->early_fail_end_ptr = 0;
14145 common->currententry = common->entries;
14146 common->local_quit_available = TRUE;
14147 quit_label = common->quit_label;
14148 if (common->currententry != NULL)
14149   {
14150   /* A free bit for each private data. */
14151   common->recurse_bitset_size = ((private_data_size / (int)sizeof(sljit_sw)) + 7) >> 3;
14152   SLJIT_ASSERT(common->recurse_bitset_size > 0);
14153   common->recurse_bitset = (sljit_u8*)SLJIT_MALLOC(common->recurse_bitset_size, allocator_data);;
14154 
14155   if (common->recurse_bitset != NULL)
14156     {
14157     do
14158       {
14159       /* Might add new entries. */
14160       compile_recurse(common);
14161       if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14162         break;
14163       flush_stubs(common);
14164       common->currententry = common->currententry->next;
14165       }
14166     while (common->currententry != NULL);
14167 
14168     SLJIT_FREE(common->recurse_bitset, allocator_data);
14169     }
14170 
14171   if (common->currententry != NULL)
14172     {
14173     /* The common->recurse_bitset has been freed. */
14174     SLJIT_ASSERT(sljit_get_compiler_error(compiler) || common->recurse_bitset == NULL);
14175 
14176     sljit_free_compiler(compiler);
14177     SLJIT_FREE(common->optimized_cbracket, allocator_data);
14178     SLJIT_FREE(common->private_data_ptrs, allocator_data);
14179     PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14180     return PCRE2_ERROR_NOMEMORY;
14181     }
14182   }
14183 common->local_quit_available = FALSE;
14184 common->quit_label = quit_label;
14185 
14186 /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
14187 /* This is a (really) rare case. */
14188 set_jumps(common->stackalloc, LABEL());
14189 /* RETURN_ADDR is not a saved register. */
14190 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
14191 
14192 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
14193 
14194 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
14195 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
14196 OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE);
14197 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack));
14198 OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0);
14199 
14200 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(sljit_stack_resize));
14201 
14202 jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
14203 OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0);
14204 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0);
14205 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
14206 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
14207 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
14208 
14209 /* Allocation failed. */
14210 JUMPHERE(jump);
14211 /* We break the return address cache here, but this is a really rare case. */
14212 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_JIT_STACKLIMIT);
14213 JUMPTO(SLJIT_JUMP, common->quit_label);
14214 
14215 /* Call limit reached. */
14216 set_jumps(common->calllimit, LABEL());
14217 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_MATCHLIMIT);
14218 JUMPTO(SLJIT_JUMP, common->quit_label);
14219 
14220 if (common->revertframes != NULL)
14221   {
14222   set_jumps(common->revertframes, LABEL());
14223   do_revertframes(common);
14224   }
14225 if (common->wordboundary != NULL)
14226   {
14227   set_jumps(common->wordboundary, LABEL());
14228   check_wordboundary(common);
14229   }
14230 if (common->anynewline != NULL)
14231   {
14232   set_jumps(common->anynewline, LABEL());
14233   check_anynewline(common);
14234   }
14235 if (common->hspace != NULL)
14236   {
14237   set_jumps(common->hspace, LABEL());
14238   check_hspace(common);
14239   }
14240 if (common->vspace != NULL)
14241   {
14242   set_jumps(common->vspace, LABEL());
14243   check_vspace(common);
14244   }
14245 if (common->casefulcmp != NULL)
14246   {
14247   set_jumps(common->casefulcmp, LABEL());
14248   do_casefulcmp(common);
14249   }
14250 if (common->caselesscmp != NULL)
14251   {
14252   set_jumps(common->caselesscmp, LABEL());
14253   do_caselesscmp(common);
14254   }
14255 if (common->reset_match != NULL)
14256   {
14257   set_jumps(common->reset_match, LABEL());
14258   do_reset_match(common, (re->top_bracket + 1) * 2);
14259   CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
14260   OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
14261   JUMPTO(SLJIT_JUMP, reset_match_label);
14262   }
14263 #ifdef SUPPORT_UNICODE
14264 #if PCRE2_CODE_UNIT_WIDTH == 8
14265 if (common->utfreadchar != NULL)
14266   {
14267   set_jumps(common->utfreadchar, LABEL());
14268   do_utfreadchar(common);
14269   }
14270 if (common->utfreadtype8 != NULL)
14271   {
14272   set_jumps(common->utfreadtype8, LABEL());
14273   do_utfreadtype8(common);
14274   }
14275 if (common->utfpeakcharback != NULL)
14276   {
14277   set_jumps(common->utfpeakcharback, LABEL());
14278   do_utfpeakcharback(common);
14279   }
14280 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
14281 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
14282 if (common->utfreadchar_invalid != NULL)
14283   {
14284   set_jumps(common->utfreadchar_invalid, LABEL());
14285   do_utfreadchar_invalid(common);
14286   }
14287 if (common->utfreadnewline_invalid != NULL)
14288   {
14289   set_jumps(common->utfreadnewline_invalid, LABEL());
14290   do_utfreadnewline_invalid(common);
14291   }
14292 if (common->utfmoveback_invalid)
14293   {
14294   set_jumps(common->utfmoveback_invalid, LABEL());
14295   do_utfmoveback_invalid(common);
14296   }
14297 if (common->utfpeakcharback_invalid)
14298   {
14299   set_jumps(common->utfpeakcharback_invalid, LABEL());
14300   do_utfpeakcharback_invalid(common);
14301   }
14302 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 */
14303 if (common->getucd != NULL)
14304   {
14305   set_jumps(common->getucd, LABEL());
14306   do_getucd(common);
14307   }
14308 if (common->getucdtype != NULL)
14309   {
14310   set_jumps(common->getucdtype, LABEL());
14311   do_getucdtype(common);
14312   }
14313 #endif /* SUPPORT_UNICODE */
14314 
14315 SLJIT_FREE(common->optimized_cbracket, allocator_data);
14316 SLJIT_FREE(common->private_data_ptrs, allocator_data);
14317 
14318 executable_func = sljit_generate_code(compiler);
14319 executable_size = sljit_get_generated_code_size(compiler);
14320 sljit_free_compiler(compiler);
14321 
14322 if (executable_func == NULL)
14323   {
14324   PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14325   return PCRE2_ERROR_NOMEMORY;
14326   }
14327 
14328 /* Reuse the function descriptor if possible. */
14329 if (re->executable_jit != NULL)
14330   functions = (executable_functions *)re->executable_jit;
14331 else
14332   {
14333   functions = SLJIT_MALLOC(sizeof(executable_functions), allocator_data);
14334   if (functions == NULL)
14335     {
14336     /* This case is highly unlikely since we just recently
14337     freed a lot of memory. Not impossible though. */
14338     sljit_free_code(executable_func, NULL);
14339     PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14340     return PCRE2_ERROR_NOMEMORY;
14341     }
14342   memset(functions, 0, sizeof(executable_functions));
14343   functions->top_bracket = re->top_bracket + 1;
14344   functions->limit_match = re->limit_match;
14345   re->executable_jit = functions;
14346   }
14347 
14348 /* Turn mode into an index. */
14349 if (mode == PCRE2_JIT_COMPLETE)
14350   mode = 0;
14351 else
14352   mode = (mode == PCRE2_JIT_PARTIAL_SOFT) ? 1 : 2;
14353 
14354 SLJIT_ASSERT(mode < JIT_NUMBER_OF_COMPILE_MODES);
14355 functions->executable_funcs[mode] = executable_func;
14356 functions->read_only_data_heads[mode] = common->read_only_data_head;
14357 functions->executable_sizes[mode] = executable_size;
14358 return 0;
14359 }
14360 
14361 #endif
14362 
14363 /*************************************************
14364 *        JIT compile a Regular Expression        *
14365 *************************************************/
14366 
14367 /* This function used JIT to convert a previously-compiled pattern into machine
14368 code.
14369 
14370 Arguments:
14371   code          a compiled pattern
14372   options       JIT option bits
14373 
14374 Returns:        0: success or (*NOJIT) was used
14375                <0: an error code
14376 */
14377 
14378 #define PUBLIC_JIT_COMPILE_OPTIONS \
14379   (PCRE2_JIT_COMPLETE|PCRE2_JIT_PARTIAL_SOFT|PCRE2_JIT_PARTIAL_HARD|PCRE2_JIT_INVALID_UTF)
14380 
14381 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_jit_compile(pcre2_code * code,uint32_t options)14382 pcre2_jit_compile(pcre2_code *code, uint32_t options)
14383 {
14384 pcre2_real_code *re = (pcre2_real_code *)code;
14385 #ifdef SUPPORT_JIT
14386 executable_functions *functions;
14387 static int executable_allocator_is_working = 0;
14388 #endif
14389 
14390 if (code == NULL)
14391   return PCRE2_ERROR_NULL;
14392 
14393 if ((options & ~PUBLIC_JIT_COMPILE_OPTIONS) != 0)
14394   return PCRE2_ERROR_JIT_BADOPTION;
14395 
14396 /* Support for invalid UTF was first introduced in JIT, with the option
14397 PCRE2_JIT_INVALID_UTF. Later, support was added to the interpreter, and the
14398 compile-time option PCRE2_MATCH_INVALID_UTF was created. This is now the
14399 preferred feature, with the earlier option deprecated. However, for backward
14400 compatibility, if the earlier option is set, it forces the new option so that
14401 if JIT matching falls back to the interpreter, there is still support for
14402 invalid UTF. However, if this function has already been successfully called
14403 without PCRE2_JIT_INVALID_UTF and without PCRE2_MATCH_INVALID_UTF (meaning that
14404 non-invalid-supporting JIT code was compiled), give an error.
14405 
14406 If in the future support for PCRE2_JIT_INVALID_UTF is withdrawn, the following
14407 actions are needed:
14408 
14409   1. Remove the definition from pcre2.h.in and from the list in
14410      PUBLIC_JIT_COMPILE_OPTIONS above.
14411 
14412   2. Replace PCRE2_JIT_INVALID_UTF with a local flag in this module.
14413 
14414   3. Replace PCRE2_JIT_INVALID_UTF in pcre2_jit_test.c.
14415 
14416   4. Delete the following short block of code. The setting of "re" and
14417      "functions" can be moved into the JIT-only block below, but if that is
14418      done, (void)re and (void)functions will be needed in the non-JIT case, to
14419      avoid compiler warnings.
14420 */
14421 
14422 #ifdef SUPPORT_JIT
14423 functions = (executable_functions *)re->executable_jit;
14424 #endif
14425 
14426 if ((options & PCRE2_JIT_INVALID_UTF) != 0)
14427   {
14428   if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) == 0)
14429     {
14430 #ifdef SUPPORT_JIT
14431     if (functions != NULL) return PCRE2_ERROR_JIT_BADOPTION;
14432 #endif
14433     re->overall_options |= PCRE2_MATCH_INVALID_UTF;
14434     }
14435   }
14436 
14437 /* The above tests are run with and without JIT support. This means that
14438 PCRE2_JIT_INVALID_UTF propagates back into the regex options (ensuring
14439 interpreter support) even in the absence of JIT. But now, if there is no JIT
14440 support, give an error return. */
14441 
14442 #ifndef SUPPORT_JIT
14443 return PCRE2_ERROR_JIT_BADOPTION;
14444 #else  /* SUPPORT_JIT */
14445 
14446 /* There is JIT support. Do the necessary. */
14447 
14448 if ((re->flags & PCRE2_NOJIT) != 0) return 0;
14449 
14450 if (executable_allocator_is_working == 0)
14451   {
14452   /* Checks whether the executable allocator is working. This check
14453      might run multiple times in multi-threaded environments, but the
14454      result should not be affected by it. */
14455   void *ptr = SLJIT_MALLOC_EXEC(32, NULL);
14456 
14457   executable_allocator_is_working = -1;
14458 
14459   if (ptr != NULL)
14460     {
14461     SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr), NULL);
14462     executable_allocator_is_working = 1;
14463     }
14464   }
14465 
14466 if (executable_allocator_is_working < 0)
14467   return PCRE2_ERROR_NOMEMORY;
14468 
14469 if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0)
14470   options |= PCRE2_JIT_INVALID_UTF;
14471 
14472 if ((options & PCRE2_JIT_COMPLETE) != 0 && (functions == NULL
14473     || functions->executable_funcs[0] == NULL)) {
14474   uint32_t excluded_options = (PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD);
14475   int result = jit_compile(code, options & ~excluded_options);
14476   if (result != 0)
14477     return result;
14478   }
14479 
14480 if ((options & PCRE2_JIT_PARTIAL_SOFT) != 0 && (functions == NULL
14481     || functions->executable_funcs[1] == NULL)) {
14482   uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_HARD);
14483   int result = jit_compile(code, options & ~excluded_options);
14484   if (result != 0)
14485     return result;
14486   }
14487 
14488 if ((options & PCRE2_JIT_PARTIAL_HARD) != 0 && (functions == NULL
14489     || functions->executable_funcs[2] == NULL)) {
14490   uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT);
14491   int result = jit_compile(code, options & ~excluded_options);
14492   if (result != 0)
14493     return result;
14494   }
14495 
14496 return 0;
14497 
14498 #endif  /* SUPPORT_JIT */
14499 }
14500 
14501 /* JIT compiler uses an all-in-one approach. This improves security,
14502    since the code generator functions are not exported. */
14503 
14504 #define INCLUDED_FROM_PCRE2_JIT_COMPILE
14505 
14506 #include "pcre2_jit_match.c"
14507 #include "pcre2_jit_misc.c"
14508 
14509 /* End of pcre2_jit_compile.c */
14510