1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9                     This module by Zoltan Herczeg
10      Original API code Copyright (c) 1997-2012 University of Cambridge
11           New API code Copyright (c) 2016-2021 University of Cambridge
12 
13 -----------------------------------------------------------------------------
14 Redistribution and use in source and binary forms, with or without
15 modification, are permitted provided that the following conditions are met:
16 
17     * Redistributions of source code must retain the above copyright notice,
18       this list of conditions and the following disclaimer.
19 
20     * Redistributions in binary form must reproduce the above copyright
21       notice, this list of conditions and the following disclaimer in the
22       documentation and/or other materials provided with the distribution.
23 
24     * Neither the name of the University of Cambridge nor the names of its
25       contributors may be used to endorse or promote products derived from
26       this software without specific prior written permission.
27 
28 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 POSSIBILITY OF SUCH DAMAGE.
39 -----------------------------------------------------------------------------
40 */
41 
42 #ifdef HAVE_CONFIG_H
43 #include "config.h"
44 #endif
45 
46 #if defined(__has_feature)
47 #if __has_feature(memory_sanitizer)
48 #include <sanitizer/msan_interface.h>
49 #endif /* __has_feature(memory_sanitizer) */
50 #endif /* defined(__has_feature) */
51 
52 #include "pcre2_internal.h"
53 
54 #ifdef SUPPORT_JIT
55 
56 /* All-in-one: Since we use the JIT compiler only from here,
57 we just include it. This way we don't need to touch the build
58 system files. */
59 
60 #define SLJIT_CONFIG_AUTO 1
61 #define SLJIT_CONFIG_STATIC 1
62 #define SLJIT_VERBOSE 0
63 
64 #ifdef PCRE2_DEBUG
65 #define SLJIT_DEBUG 1
66 #else
67 #define SLJIT_DEBUG 0
68 #endif
69 
70 #define SLJIT_MALLOC(size, allocator_data) pcre2_jit_malloc(size, allocator_data)
71 #define SLJIT_FREE(ptr, allocator_data) pcre2_jit_free(ptr, allocator_data)
72 
pcre2_jit_malloc(size_t size,void * allocator_data)73 static void * pcre2_jit_malloc(size_t size, void *allocator_data)
74 {
75 pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
76 return allocator->malloc(size, allocator->memory_data);
77 }
78 
pcre2_jit_free(void * ptr,void * allocator_data)79 static void pcre2_jit_free(void *ptr, void *allocator_data)
80 {
81 pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
82 allocator->free(ptr, allocator->memory_data);
83 }
84 
85 #include "sljit/sljitLir.c"
86 
87 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
88 #error Unsupported architecture
89 #endif
90 
91 /* Defines for debugging purposes. */
92 
93 /* 1 - Use unoptimized capturing brackets.
94    2 - Enable capture_last_ptr (includes option 1). */
95 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
96 
97 /* 1 - Always have a control head. */
98 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
99 
100 /* Allocate memory for the regex stack on the real machine stack.
101 Fast, but limited size. */
102 #define MACHINE_STACK_SIZE 32768
103 
104 /* Growth rate for stack allocated by the OS. Should be the multiply
105 of page size. */
106 #define STACK_GROWTH_RATE 8192
107 
108 /* Enable to check that the allocation could destroy temporaries. */
109 #if defined SLJIT_DEBUG && SLJIT_DEBUG
110 #define DESTROY_REGISTERS 1
111 #endif
112 
113 /*
114 Short summary about the backtracking mechanism empolyed by the jit code generator:
115 
116 The code generator follows the recursive nature of the PERL compatible regular
117 expressions. The basic blocks of regular expressions are condition checkers
118 whose execute different commands depending on the result of the condition check.
119 The relationship between the operators can be horizontal (concatenation) and
120 vertical (sub-expression) (See struct backtrack_common for more details).
121 
122   'ab' - 'a' and 'b' regexps are concatenated
123   'a+' - 'a' is the sub-expression of the '+' operator
124 
125 The condition checkers are boolean (true/false) checkers. Machine code is generated
126 for the checker itself and for the actions depending on the result of the checker.
127 The 'true' case is called as the matching path (expected path), and the other is called as
128 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
129 branches on the matching path.
130 
131  Greedy star operator (*) :
132    Matching path: match happens.
133    Backtrack path: match failed.
134  Non-greedy star operator (*?) :
135    Matching path: no need to perform a match.
136    Backtrack path: match is required.
137 
138 The following example shows how the code generated for a capturing bracket
139 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
140 we have the following regular expression:
141 
142    A(B|C)D
143 
144 The generated code will be the following:
145 
146  A matching path
147  '(' matching path (pushing arguments to the stack)
148  B matching path
149  ')' matching path (pushing arguments to the stack)
150  D matching path
151  return with successful match
152 
153  D backtrack path
154  ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
155  B backtrack path
156  C expected path
157  jump to D matching path
158  C backtrack path
159  A backtrack path
160 
161  Notice, that the order of backtrack code paths are the opposite of the fast
162  code paths. In this way the topmost value on the stack is always belong
163  to the current backtrack code path. The backtrack path must check
164  whether there is a next alternative. If so, it needs to jump back to
165  the matching path eventually. Otherwise it needs to clear out its own stack
166  frame and continue the execution on the backtrack code paths.
167 */
168 
169 /*
170 Saved stack frames:
171 
172 Atomic blocks and asserts require reloading the values of private data
173 when the backtrack mechanism performed. Because of OP_RECURSE, the data
174 are not necessarly known in compile time, thus we need a dynamic restore
175 mechanism.
176 
177 The stack frames are stored in a chain list, and have the following format:
178 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
179 
180 Thus we can restore the private data to a particular point in the stack.
181 */
182 
183 typedef struct jit_arguments {
184   /* Pointers first. */
185   struct sljit_stack *stack;
186   PCRE2_SPTR str;
187   PCRE2_SPTR begin;
188   PCRE2_SPTR end;
189   pcre2_match_data *match_data;
190   PCRE2_SPTR startchar_ptr;
191   PCRE2_UCHAR *mark_ptr;
192   int (*callout)(pcre2_callout_block *, void *);
193   void *callout_data;
194   /* Everything else after. */
195   sljit_uw offset_limit;
196   sljit_u32 limit_match;
197   sljit_u32 oveccount;
198   sljit_u32 options;
199 } jit_arguments;
200 
201 #define JIT_NUMBER_OF_COMPILE_MODES 3
202 
203 typedef struct executable_functions {
204   void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
205   void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
206   sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
207   sljit_u32 top_bracket;
208   sljit_u32 limit_match;
209 } executable_functions;
210 
211 typedef struct jump_list {
212   struct sljit_jump *jump;
213   struct jump_list *next;
214 } jump_list;
215 
216 typedef struct stub_list {
217   struct sljit_jump *start;
218   struct sljit_label *quit;
219   struct stub_list *next;
220 } stub_list;
221 
222 enum frame_types {
223   no_frame = -1,
224   no_stack = -2
225 };
226 
227 enum control_types {
228   type_mark = 0,
229   type_then_trap = 1
230 };
231 
232 enum  early_fail_types {
233   type_skip = 0,
234   type_fail = 1,
235   type_fail_range = 2
236 };
237 
238 typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
239 
240 /* The following structure is the key data type for the recursive
241 code generator. It is allocated by compile_matchingpath, and contains
242 the arguments for compile_backtrackingpath. Must be the first member
243 of its descendants. */
244 typedef struct backtrack_common {
245   /* Backtracking path of an opcode, which falls back
246      to our opcode, if it cannot resume matching. */
247   struct backtrack_common *prev;
248   /* Backtracks for opcodes without backtracking path.
249      These opcodes are between 'prev' and the current
250      opcode, and they never resume the match. */
251   jump_list *simple_backtracks;
252   /* Internal backtracking list for block constructs
253      which contains other opcodes, such as brackets,
254      asserts, conditionals, etc. */
255   struct backtrack_common *top;
256   /* Backtracks used internally by the opcode. For component
257      opcodes, this list is also used by those opcodes without
258      backtracking path which follows the 'top' backtrack. */
259   jump_list *own_backtracks;
260   /* Opcode pointer. */
261   PCRE2_SPTR cc;
262 } backtrack_common;
263 
264 typedef struct assert_backtrack {
265   backtrack_common common;
266   jump_list *condfailed;
267   /* Less than 0 if a frame is not needed. */
268   int framesize;
269   /* Points to our private memory word on the stack. */
270   int private_data_ptr;
271   /* For iterators. */
272   struct sljit_label *matchingpath;
273 } assert_backtrack;
274 
275 typedef struct bracket_backtrack {
276   backtrack_common common;
277   /* Where to coninue if an alternative is successfully matched. */
278   struct sljit_label *alternative_matchingpath;
279   /* For rmin and rmax iterators. */
280   struct sljit_label *recursive_matchingpath;
281   /* For greedy ? operator. */
282   struct sljit_label *zero_matchingpath;
283   /* Contains the branches of a failed condition. */
284   union {
285     /* Both for OP_COND, OP_SCOND. */
286     jump_list *condfailed;
287     assert_backtrack *assert;
288     /* For OP_ONCE. Less than 0 if not needed. */
289     int framesize;
290     /* For brackets with >3 alternatives. */
291     struct sljit_put_label *matching_put_label;
292   } u;
293   /* Points to our private memory word on the stack. */
294   int private_data_ptr;
295 } bracket_backtrack;
296 
297 typedef struct bracketpos_backtrack {
298   backtrack_common common;
299   /* Points to our private memory word on the stack. */
300   int private_data_ptr;
301   /* Reverting stack is needed. */
302   int framesize;
303   /* Allocated stack size. */
304   int stacksize;
305 } bracketpos_backtrack;
306 
307 typedef struct braminzero_backtrack {
308   backtrack_common common;
309   struct sljit_label *matchingpath;
310 } braminzero_backtrack;
311 
312 typedef struct char_iterator_backtrack {
313   backtrack_common common;
314   /* Next iteration. */
315   struct sljit_label *matchingpath;
316   union {
317     jump_list *backtracks;
318     struct {
319       unsigned int othercasebit;
320       PCRE2_UCHAR chr;
321       BOOL enabled;
322     } charpos;
323   } u;
324 } char_iterator_backtrack;
325 
326 typedef struct ref_iterator_backtrack {
327   backtrack_common common;
328   /* Next iteration. */
329   struct sljit_label *matchingpath;
330 } ref_iterator_backtrack;
331 
332 typedef struct recurse_entry {
333   struct recurse_entry *next;
334   /* Contains the function entry label. */
335   struct sljit_label *entry_label;
336   /* Contains the function entry label. */
337   struct sljit_label *backtrack_label;
338   /* Collects the entry calls until the function is not created. */
339   jump_list *entry_calls;
340   /* Collects the backtrack calls until the function is not created. */
341   jump_list *backtrack_calls;
342   /* Points to the starting opcode. */
343   sljit_sw start;
344 } recurse_entry;
345 
346 typedef struct recurse_backtrack {
347   backtrack_common common;
348   /* Return to the matching path. */
349   struct sljit_label *matchingpath;
350   /* Recursive pattern. */
351   recurse_entry *entry;
352   /* Pattern is inlined. */
353   BOOL inlined_pattern;
354 } recurse_backtrack;
355 
356 typedef struct vreverse_backtrack {
357   backtrack_common common;
358   /* Return to the matching path. */
359   struct sljit_label *matchingpath;
360 } vreverse_backtrack;
361 
362 #define OP_THEN_TRAP OP_TABLE_LENGTH
363 
364 typedef struct then_trap_backtrack {
365   backtrack_common common;
366   /* If then_trap is not NULL, this structure contains the real
367   then_trap for the backtracking path. */
368   struct then_trap_backtrack *then_trap;
369   /* Points to the starting opcode. */
370   sljit_sw start;
371   /* Exit point for the then opcodes of this alternative. */
372   jump_list *quit;
373   /* Frame size of the current alternative. */
374   int framesize;
375 } then_trap_backtrack;
376 
377 #define MAX_N_CHARS 12
378 #define MAX_DIFF_CHARS 5
379 
380 typedef struct fast_forward_char_data {
381   /* Number of characters in the chars array, 255 for any character. */
382   sljit_u8 count;
383   /* Number of last UTF-8 characters in the chars array. */
384   sljit_u8 last_count;
385   /* Available characters in the current position. */
386   PCRE2_UCHAR chars[MAX_DIFF_CHARS];
387 } fast_forward_char_data;
388 
389 #define MAX_CLASS_RANGE_SIZE 4
390 #define MAX_CLASS_CHARS_SIZE 3
391 
392 typedef struct compiler_common {
393   /* The sljit ceneric compiler. */
394   struct sljit_compiler *compiler;
395   /* Compiled regular expression. */
396   pcre2_real_code *re;
397   /* First byte code. */
398   PCRE2_SPTR start;
399   /* Maps private data offset to each opcode. */
400   sljit_s32 *private_data_ptrs;
401   /* Chain list of read-only data ptrs. */
402   void *read_only_data_head;
403   /* Tells whether the capturing bracket is optimized. */
404   sljit_u8 *optimized_cbracket;
405   /* Tells whether the starting offset is a target of then. */
406   sljit_u8 *then_offsets;
407   /* Current position where a THEN must jump. */
408   then_trap_backtrack *then_trap;
409   /* Starting offset of private data for capturing brackets. */
410   sljit_s32 cbra_ptr;
411   /* Output vector starting point. Must be divisible by 2. */
412   sljit_s32 ovector_start;
413   /* Points to the starting character of the current match. */
414   sljit_s32 start_ptr;
415   /* Last known position of the requested byte. */
416   sljit_s32 req_char_ptr;
417   /* Head of the last recursion. */
418   sljit_s32 recursive_head_ptr;
419   /* First inspected character for partial matching.
420      (Needed for avoiding zero length partial matches.) */
421   sljit_s32 start_used_ptr;
422   /* Starting pointer for partial soft matches. */
423   sljit_s32 hit_start;
424   /* Pointer of the match end position. */
425   sljit_s32 match_end_ptr;
426   /* Points to the marked string. */
427   sljit_s32 mark_ptr;
428   /* Head of the recursive control verb management chain.
429      Each item must have a previous offset and type
430      (see control_types) values. See do_search_mark. */
431   sljit_s32 control_head_ptr;
432   /* Points to the last matched capture block index. */
433   sljit_s32 capture_last_ptr;
434   /* Fast forward skipping byte code pointer. */
435   PCRE2_SPTR fast_forward_bc_ptr;
436   /* Locals used by fast fail optimization. */
437   sljit_s32 early_fail_start_ptr;
438   sljit_s32 early_fail_end_ptr;
439   /* Variables used by recursive call generator. */
440   sljit_s32 recurse_bitset_size;
441   uint8_t *recurse_bitset;
442 
443   /* Flipped and lower case tables. */
444   const sljit_u8 *fcc;
445   sljit_sw lcc;
446   /* Mode can be PCRE2_JIT_COMPLETE and others. */
447   int mode;
448   /* TRUE, when empty match is accepted for partial matching. */
449   BOOL allow_empty_partial;
450   /* TRUE, when minlength is greater than 0. */
451   BOOL might_be_empty;
452   /* \K is found in the pattern. */
453   BOOL has_set_som;
454   /* (*SKIP:arg) is found in the pattern. */
455   BOOL has_skip_arg;
456   /* (*THEN) is found in the pattern. */
457   BOOL has_then;
458   /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
459   BOOL has_skip_in_assert_back;
460   /* Quit is redirected by recurse, negative assertion, or positive assertion in conditional block. */
461   BOOL local_quit_available;
462   /* Currently in a positive assertion. */
463   BOOL in_positive_assertion;
464   /* Newline control. */
465   int nltype;
466   sljit_u32 nlmax;
467   sljit_u32 nlmin;
468   int newline;
469   int bsr_nltype;
470   sljit_u32 bsr_nlmax;
471   sljit_u32 bsr_nlmin;
472   /* Dollar endonly. */
473   int endonly;
474   /* Tables. */
475   sljit_sw ctypes;
476   /* Named capturing brackets. */
477   PCRE2_SPTR name_table;
478   sljit_sw name_count;
479   sljit_sw name_entry_size;
480 
481   /* Labels and jump lists. */
482   struct sljit_label *partialmatchlabel;
483   struct sljit_label *quit_label;
484   struct sljit_label *abort_label;
485   struct sljit_label *accept_label;
486   struct sljit_label *ff_newline_shortcut;
487   stub_list *stubs;
488   recurse_entry *entries;
489   recurse_entry *currententry;
490   jump_list *partialmatch;
491   jump_list *quit;
492   jump_list *positive_assertion_quit;
493   jump_list *abort;
494   jump_list *failed_match;
495   jump_list *accept;
496   jump_list *calllimit;
497   jump_list *stackalloc;
498   jump_list *revertframes;
499   jump_list *wordboundary;
500   jump_list *ucp_wordboundary;
501   jump_list *anynewline;
502   jump_list *hspace;
503   jump_list *vspace;
504   jump_list *casefulcmp;
505   jump_list *caselesscmp;
506   jump_list *reset_match;
507   /* Same as reset_match, but resets the STR_PTR as well. */
508   jump_list *restart_match;
509   BOOL unset_backref;
510   BOOL alt_circumflex;
511 #ifdef SUPPORT_UNICODE
512   BOOL utf;
513   BOOL invalid_utf;
514   BOOL ucp;
515   /* Points to saving area for iref. */
516   sljit_s32 iref_ptr;
517   jump_list *getucd;
518   jump_list *getucdtype;
519 #if PCRE2_CODE_UNIT_WIDTH == 8
520   jump_list *utfreadchar;
521   jump_list *utfreadtype8;
522   jump_list *utfpeakcharback;
523 #endif
524 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
525   jump_list *utfreadchar_invalid;
526   jump_list *utfreadnewline_invalid;
527   jump_list *utfmoveback_invalid;
528   jump_list *utfpeakcharback_invalid;
529 #endif
530 #endif /* SUPPORT_UNICODE */
531 } compiler_common;
532 
533 /* For byte_sequence_compare. */
534 
535 typedef struct compare_context {
536   int length;
537   int sourcereg;
538 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
539   int ucharptr;
540   union {
541     sljit_s32 asint;
542     sljit_u16 asushort;
543 #if PCRE2_CODE_UNIT_WIDTH == 8
544     sljit_u8 asbyte;
545     sljit_u8 asuchars[4];
546 #elif PCRE2_CODE_UNIT_WIDTH == 16
547     sljit_u16 asuchars[2];
548 #elif PCRE2_CODE_UNIT_WIDTH == 32
549     sljit_u32 asuchars[1];
550 #endif
551   } c;
552   union {
553     sljit_s32 asint;
554     sljit_u16 asushort;
555 #if PCRE2_CODE_UNIT_WIDTH == 8
556     sljit_u8 asbyte;
557     sljit_u8 asuchars[4];
558 #elif PCRE2_CODE_UNIT_WIDTH == 16
559     sljit_u16 asuchars[2];
560 #elif PCRE2_CODE_UNIT_WIDTH == 32
561     sljit_u32 asuchars[1];
562 #endif
563   } oc;
564 #endif
565 } compare_context;
566 
567 /* Undefine sljit macros. */
568 #undef CMP
569 
570 /* Used for accessing the elements of the stack. */
571 #define STACK(i)      ((i) * SSIZE_OF(sw))
572 
573 #ifdef SLJIT_PREF_SHIFT_REG
574 #if SLJIT_PREF_SHIFT_REG == SLJIT_R2
575 /* Nothing. */
576 #elif SLJIT_PREF_SHIFT_REG == SLJIT_R3
577 #define SHIFT_REG_IS_R3
578 #else
579 #error "Unsupported shift register"
580 #endif
581 #endif
582 
583 #define TMP1          SLJIT_R0
584 #ifdef SHIFT_REG_IS_R3
585 #define TMP2          SLJIT_R3
586 #define TMP3          SLJIT_R2
587 #else
588 #define TMP2          SLJIT_R2
589 #define TMP3          SLJIT_R3
590 #endif
591 #define STR_PTR       SLJIT_R1
592 #define STR_END       SLJIT_S0
593 #define STACK_TOP     SLJIT_S1
594 #define STACK_LIMIT   SLJIT_S2
595 #define COUNT_MATCH   SLJIT_S3
596 #define ARGUMENTS     SLJIT_S4
597 #define RETURN_ADDR   SLJIT_R4
598 
599 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
600 #define HAS_VIRTUAL_REGISTERS 1
601 #else
602 #define HAS_VIRTUAL_REGISTERS 0
603 #endif
604 
605 /* Local space layout. */
606 /* These two locals can be used by the current opcode. */
607 #define LOCALS0          (0 * sizeof(sljit_sw))
608 #define LOCALS1          (1 * sizeof(sljit_sw))
609 /* Two local variables for possessive quantifiers (char1 cannot use them). */
610 #define POSSESSIVE0      (2 * sizeof(sljit_sw))
611 #define POSSESSIVE1      (3 * sizeof(sljit_sw))
612 /* Max limit of recursions. */
613 #define LIMIT_MATCH      (4 * sizeof(sljit_sw))
614 /* The output vector is stored on the stack, and contains pointers
615 to characters. The vector data is divided into two groups: the first
616 group contains the start / end character pointers, and the second is
617 the start pointers when the end of the capturing group has not yet reached. */
618 #define OVECTOR_START    (common->ovector_start)
619 #define OVECTOR(i)       (OVECTOR_START + (i) * SSIZE_OF(sw))
620 #define OVECTOR_PRIV(i)  (common->cbra_ptr + (i) * SSIZE_OF(sw))
621 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
622 
623 #if PCRE2_CODE_UNIT_WIDTH == 8
624 #define MOV_UCHAR  SLJIT_MOV_U8
625 #define IN_UCHARS(x) (x)
626 #elif PCRE2_CODE_UNIT_WIDTH == 16
627 #define MOV_UCHAR  SLJIT_MOV_U16
628 #define UCHAR_SHIFT (1)
629 #define IN_UCHARS(x) ((x) * 2)
630 #elif PCRE2_CODE_UNIT_WIDTH == 32
631 #define MOV_UCHAR  SLJIT_MOV_U32
632 #define UCHAR_SHIFT (2)
633 #define IN_UCHARS(x) ((x) * 4)
634 #else
635 #error Unsupported compiling mode
636 #endif
637 
638 /* Shortcuts. */
639 #define DEFINE_COMPILER \
640   struct sljit_compiler *compiler = common->compiler
641 #define OP1(op, dst, dstw, src, srcw) \
642   sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
643 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
644   sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
645 #define OP2U(op, src1, src1w, src2, src2w) \
646   sljit_emit_op2u(compiler, (op), (src1), (src1w), (src2), (src2w))
647 #define OP_SRC(op, src, srcw) \
648   sljit_emit_op_src(compiler, (op), (src), (srcw))
649 #define LABEL() \
650   sljit_emit_label(compiler)
651 #define JUMP(type) \
652   sljit_emit_jump(compiler, (type))
653 #define JUMPTO(type, label) \
654   sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
655 #define JUMPHERE(jump) \
656   sljit_set_label((jump), sljit_emit_label(compiler))
657 #define SET_LABEL(jump, label) \
658   sljit_set_label((jump), (label))
659 #define CMP(type, src1, src1w, src2, src2w) \
660   sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
661 #define CMPTO(type, src1, src1w, src2, src2w, label) \
662   sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
663 #define OP_FLAGS(op, dst, dstw, type) \
664   sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
665 #define SELECT(type, dst_reg, src1, src1w, src2_reg) \
666   sljit_emit_select(compiler, (type), (dst_reg), (src1), (src1w), (src2_reg))
667 #define GET_LOCAL_BASE(dst, dstw, offset) \
668   sljit_get_local_base(compiler, (dst), (dstw), (offset))
669 
670 #define READ_CHAR_MAX 0x7fffffff
671 
672 #define INVALID_UTF_CHAR -1
673 #define UNASSIGNED_UTF_CHAR 888
674 
675 #if defined SUPPORT_UNICODE
676 #if PCRE2_CODE_UNIT_WIDTH == 8
677 
678 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
679   { \
680   if (ptr[0] <= 0x7f) \
681     c = *ptr++; \
682   else if (ptr + 1 < end && ptr[1] >= 0x80 && ptr[1] < 0xc0) \
683     { \
684     c = ptr[1] - 0x80; \
685     \
686     if (ptr[0] >= 0xc2 && ptr[0] <= 0xdf) \
687       { \
688       c |= (ptr[0] - 0xc0) << 6; \
689       ptr += 2; \
690       } \
691     else if (ptr + 2 < end && ptr[2] >= 0x80 && ptr[2] < 0xc0) \
692       { \
693       c = c << 6 | (ptr[2] - 0x80); \
694       \
695       if (ptr[0] >= 0xe0 && ptr[0] <= 0xef) \
696         { \
697         c |= (ptr[0] - 0xe0) << 12; \
698         ptr += 3; \
699         \
700         if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
701           { \
702           invalid_action; \
703           } \
704         } \
705       else if (ptr + 3 < end && ptr[3] >= 0x80 && ptr[3] < 0xc0) \
706         { \
707         c = c << 6 | (ptr[3] - 0x80); \
708         \
709         if (ptr[0] >= 0xf0 && ptr[0] <= 0xf4) \
710           { \
711           c |= (ptr[0] - 0xf0) << 18; \
712           ptr += 4; \
713           \
714           if (c >= 0x110000 || c < 0x10000) \
715             { \
716             invalid_action; \
717             } \
718           } \
719         else \
720           { \
721           invalid_action; \
722           } \
723         } \
724       else \
725         { \
726         invalid_action; \
727         } \
728       } \
729     else \
730       { \
731       invalid_action; \
732       } \
733     } \
734   else \
735     { \
736     invalid_action; \
737     } \
738   }
739 
740 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
741   { \
742   c = ptr[-1]; \
743   if (c <= 0x7f) \
744     ptr--; \
745   else if (ptr - 1 > start && ptr[-1] >= 0x80 && ptr[-1] < 0xc0) \
746     { \
747     c -= 0x80; \
748     \
749     if (ptr[-2] >= 0xc2 && ptr[-2] <= 0xdf) \
750       { \
751       c |= (ptr[-2] - 0xc0) << 6; \
752       ptr -= 2; \
753       } \
754     else if (ptr - 2 > start && ptr[-2] >= 0x80 && ptr[-2] < 0xc0) \
755       { \
756       c = c << 6 | (ptr[-2] - 0x80); \
757       \
758       if (ptr[-3] >= 0xe0 && ptr[-3] <= 0xef) \
759         { \
760         c |= (ptr[-3] - 0xe0) << 12; \
761         ptr -= 3; \
762         \
763         if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
764           { \
765           invalid_action; \
766           } \
767         } \
768       else if (ptr - 3 > start && ptr[-3] >= 0x80 && ptr[-3] < 0xc0) \
769         { \
770         c = c << 6 | (ptr[-3] - 0x80); \
771         \
772         if (ptr[-4] >= 0xf0 && ptr[-4] <= 0xf4) \
773           { \
774           c |= (ptr[-4] - 0xf0) << 18; \
775           ptr -= 4; \
776           \
777           if (c >= 0x110000 || c < 0x10000) \
778             { \
779             invalid_action; \
780             } \
781           } \
782         else \
783           { \
784           invalid_action; \
785           } \
786         } \
787       else \
788         { \
789         invalid_action; \
790         } \
791       } \
792     else \
793       { \
794       invalid_action; \
795       } \
796     } \
797   else \
798     { \
799     invalid_action; \
800     } \
801   }
802 
803 #elif PCRE2_CODE_UNIT_WIDTH == 16
804 
805 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
806   { \
807   if (ptr[0] < 0xd800 || ptr[0] >= 0xe000) \
808     c = *ptr++; \
809   else if (ptr[0] < 0xdc00 && ptr + 1 < end && ptr[1] >= 0xdc00 && ptr[1] < 0xe000) \
810     { \
811     c = (((ptr[0] - 0xd800) << 10) | (ptr[1] - 0xdc00)) + 0x10000; \
812     ptr += 2; \
813     } \
814   else \
815     { \
816     invalid_action; \
817     } \
818   }
819 
820 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
821   { \
822   c = ptr[-1]; \
823   if (c < 0xd800 || c >= 0xe000) \
824     ptr--; \
825   else if (c >= 0xdc00 && ptr - 1 > start && ptr[-2] >= 0xd800 && ptr[-2] < 0xdc00) \
826     { \
827     c = (((ptr[-2] - 0xd800) << 10) | (c - 0xdc00)) + 0x10000; \
828     ptr -= 2; \
829     } \
830   else \
831     { \
832     invalid_action; \
833     } \
834   }
835 
836 
837 #elif PCRE2_CODE_UNIT_WIDTH == 32
838 
839 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
840   { \
841   if (ptr[0] < 0xd800 || (ptr[0] >= 0xe000 && ptr[0] < 0x110000)) \
842     c = *ptr++; \
843   else \
844     { \
845     invalid_action; \
846     } \
847   }
848 
849 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
850   { \
851   c = ptr[-1]; \
852   if (ptr[-1] < 0xd800 || (ptr[-1] >= 0xe000 && ptr[-1] < 0x110000)) \
853     ptr--; \
854   else \
855     { \
856     invalid_action; \
857     } \
858   }
859 
860 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
861 #endif /* SUPPORT_UNICODE */
862 
bracketend(PCRE2_SPTR cc)863 static PCRE2_SPTR bracketend(PCRE2_SPTR cc)
864 {
865 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
866 do cc += GET(cc, 1); while (*cc == OP_ALT);
867 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
868 cc += 1 + LINK_SIZE;
869 return cc;
870 }
871 
no_alternatives(PCRE2_SPTR cc)872 static int no_alternatives(PCRE2_SPTR cc)
873 {
874 int count = 0;
875 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
876 do
877   {
878   cc += GET(cc, 1);
879   count++;
880   }
881 while (*cc == OP_ALT);
882 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
883 return count;
884 }
885 
find_vreverse(PCRE2_SPTR cc)886 static BOOL find_vreverse(PCRE2_SPTR cc)
887 {
888   SLJIT_ASSERT(*cc == OP_ASSERTBACK || *cc == OP_ASSERTBACK_NOT ||  *cc == OP_ASSERTBACK_NA);
889 
890   do
891     {
892     if (cc[1 + LINK_SIZE] == OP_VREVERSE)
893       return TRUE;
894     cc += GET(cc, 1);
895     }
896   while (*cc == OP_ALT);
897 
898   return FALSE;
899 }
900 
901 /* Functions whose might need modification for all new supported opcodes:
902  next_opcode
903  check_opcode_types
904  set_private_data_ptrs
905  get_framesize
906  init_frame
907  get_recurse_data_length
908  copy_recurse_data
909  compile_matchingpath
910  compile_backtrackingpath
911 */
912 
next_opcode(compiler_common * common,PCRE2_SPTR cc)913 static PCRE2_SPTR next_opcode(compiler_common *common, PCRE2_SPTR cc)
914 {
915 SLJIT_UNUSED_ARG(common);
916 switch(*cc)
917   {
918   case OP_SOD:
919   case OP_SOM:
920   case OP_SET_SOM:
921   case OP_NOT_WORD_BOUNDARY:
922   case OP_WORD_BOUNDARY:
923   case OP_NOT_DIGIT:
924   case OP_DIGIT:
925   case OP_NOT_WHITESPACE:
926   case OP_WHITESPACE:
927   case OP_NOT_WORDCHAR:
928   case OP_WORDCHAR:
929   case OP_ANY:
930   case OP_ALLANY:
931   case OP_NOTPROP:
932   case OP_PROP:
933   case OP_ANYNL:
934   case OP_NOT_HSPACE:
935   case OP_HSPACE:
936   case OP_NOT_VSPACE:
937   case OP_VSPACE:
938   case OP_EXTUNI:
939   case OP_EODN:
940   case OP_EOD:
941   case OP_CIRC:
942   case OP_CIRCM:
943   case OP_DOLL:
944   case OP_DOLLM:
945   case OP_CRSTAR:
946   case OP_CRMINSTAR:
947   case OP_CRPLUS:
948   case OP_CRMINPLUS:
949   case OP_CRQUERY:
950   case OP_CRMINQUERY:
951   case OP_CRRANGE:
952   case OP_CRMINRANGE:
953   case OP_CRPOSSTAR:
954   case OP_CRPOSPLUS:
955   case OP_CRPOSQUERY:
956   case OP_CRPOSRANGE:
957   case OP_CLASS:
958   case OP_NCLASS:
959   case OP_REF:
960   case OP_REFI:
961   case OP_DNREF:
962   case OP_DNREFI:
963   case OP_RECURSE:
964   case OP_CALLOUT:
965   case OP_ALT:
966   case OP_KET:
967   case OP_KETRMAX:
968   case OP_KETRMIN:
969   case OP_KETRPOS:
970   case OP_REVERSE:
971   case OP_VREVERSE:
972   case OP_ASSERT:
973   case OP_ASSERT_NOT:
974   case OP_ASSERTBACK:
975   case OP_ASSERTBACK_NOT:
976   case OP_ASSERT_NA:
977   case OP_ASSERTBACK_NA:
978   case OP_ONCE:
979   case OP_SCRIPT_RUN:
980   case OP_BRA:
981   case OP_BRAPOS:
982   case OP_CBRA:
983   case OP_CBRAPOS:
984   case OP_COND:
985   case OP_SBRA:
986   case OP_SBRAPOS:
987   case OP_SCBRA:
988   case OP_SCBRAPOS:
989   case OP_SCOND:
990   case OP_CREF:
991   case OP_DNCREF:
992   case OP_RREF:
993   case OP_DNRREF:
994   case OP_FALSE:
995   case OP_TRUE:
996   case OP_BRAZERO:
997   case OP_BRAMINZERO:
998   case OP_BRAPOSZERO:
999   case OP_PRUNE:
1000   case OP_SKIP:
1001   case OP_THEN:
1002   case OP_COMMIT:
1003   case OP_FAIL:
1004   case OP_ACCEPT:
1005   case OP_ASSERT_ACCEPT:
1006   case OP_CLOSE:
1007   case OP_SKIPZERO:
1008   case OP_NOT_UCP_WORD_BOUNDARY:
1009   case OP_UCP_WORD_BOUNDARY:
1010   return cc + PRIV(OP_lengths)[*cc];
1011 
1012   case OP_CHAR:
1013   case OP_CHARI:
1014   case OP_NOT:
1015   case OP_NOTI:
1016   case OP_STAR:
1017   case OP_MINSTAR:
1018   case OP_PLUS:
1019   case OP_MINPLUS:
1020   case OP_QUERY:
1021   case OP_MINQUERY:
1022   case OP_UPTO:
1023   case OP_MINUPTO:
1024   case OP_EXACT:
1025   case OP_POSSTAR:
1026   case OP_POSPLUS:
1027   case OP_POSQUERY:
1028   case OP_POSUPTO:
1029   case OP_STARI:
1030   case OP_MINSTARI:
1031   case OP_PLUSI:
1032   case OP_MINPLUSI:
1033   case OP_QUERYI:
1034   case OP_MINQUERYI:
1035   case OP_UPTOI:
1036   case OP_MINUPTOI:
1037   case OP_EXACTI:
1038   case OP_POSSTARI:
1039   case OP_POSPLUSI:
1040   case OP_POSQUERYI:
1041   case OP_POSUPTOI:
1042   case OP_NOTSTAR:
1043   case OP_NOTMINSTAR:
1044   case OP_NOTPLUS:
1045   case OP_NOTMINPLUS:
1046   case OP_NOTQUERY:
1047   case OP_NOTMINQUERY:
1048   case OP_NOTUPTO:
1049   case OP_NOTMINUPTO:
1050   case OP_NOTEXACT:
1051   case OP_NOTPOSSTAR:
1052   case OP_NOTPOSPLUS:
1053   case OP_NOTPOSQUERY:
1054   case OP_NOTPOSUPTO:
1055   case OP_NOTSTARI:
1056   case OP_NOTMINSTARI:
1057   case OP_NOTPLUSI:
1058   case OP_NOTMINPLUSI:
1059   case OP_NOTQUERYI:
1060   case OP_NOTMINQUERYI:
1061   case OP_NOTUPTOI:
1062   case OP_NOTMINUPTOI:
1063   case OP_NOTEXACTI:
1064   case OP_NOTPOSSTARI:
1065   case OP_NOTPOSPLUSI:
1066   case OP_NOTPOSQUERYI:
1067   case OP_NOTPOSUPTOI:
1068   cc += PRIV(OP_lengths)[*cc];
1069 #ifdef SUPPORT_UNICODE
1070   if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1071 #endif
1072   return cc;
1073 
1074   /* Special cases. */
1075   case OP_TYPESTAR:
1076   case OP_TYPEMINSTAR:
1077   case OP_TYPEPLUS:
1078   case OP_TYPEMINPLUS:
1079   case OP_TYPEQUERY:
1080   case OP_TYPEMINQUERY:
1081   case OP_TYPEUPTO:
1082   case OP_TYPEMINUPTO:
1083   case OP_TYPEEXACT:
1084   case OP_TYPEPOSSTAR:
1085   case OP_TYPEPOSPLUS:
1086   case OP_TYPEPOSQUERY:
1087   case OP_TYPEPOSUPTO:
1088   return cc + PRIV(OP_lengths)[*cc] - 1;
1089 
1090   case OP_ANYBYTE:
1091 #ifdef SUPPORT_UNICODE
1092   if (common->utf) return NULL;
1093 #endif
1094   return cc + 1;
1095 
1096   case OP_CALLOUT_STR:
1097   return cc + GET(cc, 1 + 2*LINK_SIZE);
1098 
1099 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1100   case OP_XCLASS:
1101   return cc + GET(cc, 1);
1102 #endif
1103 
1104   case OP_MARK:
1105   case OP_COMMIT_ARG:
1106   case OP_PRUNE_ARG:
1107   case OP_SKIP_ARG:
1108   case OP_THEN_ARG:
1109   return cc + 1 + 2 + cc[1];
1110 
1111   default:
1112   SLJIT_UNREACHABLE();
1113   return NULL;
1114   }
1115 }
1116 
check_opcode_types(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend)1117 static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend)
1118 {
1119 int count;
1120 PCRE2_SPTR slot;
1121 PCRE2_SPTR assert_back_end = cc - 1;
1122 PCRE2_SPTR assert_na_end = cc - 1;
1123 
1124 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
1125 while (cc < ccend)
1126   {
1127   switch(*cc)
1128     {
1129     case OP_SET_SOM:
1130     common->has_set_som = TRUE;
1131     common->might_be_empty = TRUE;
1132     cc += 1;
1133     break;
1134 
1135     case OP_REFI:
1136 #ifdef SUPPORT_UNICODE
1137     if (common->iref_ptr == 0)
1138       {
1139       common->iref_ptr = common->ovector_start;
1140       common->ovector_start += 3 * sizeof(sljit_sw);
1141       }
1142 #endif /* SUPPORT_UNICODE */
1143     /* Fall through. */
1144     case OP_REF:
1145     common->optimized_cbracket[GET2(cc, 1)] = 0;
1146     cc += 1 + IMM2_SIZE;
1147     break;
1148 
1149     case OP_ASSERT_NA:
1150     case OP_ASSERTBACK_NA:
1151     slot = bracketend(cc);
1152     if (slot > assert_na_end)
1153       assert_na_end = slot;
1154     cc += 1 + LINK_SIZE;
1155     break;
1156 
1157     case OP_CBRAPOS:
1158     case OP_SCBRAPOS:
1159     common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
1160     cc += 1 + LINK_SIZE + IMM2_SIZE;
1161     break;
1162 
1163     case OP_COND:
1164     case OP_SCOND:
1165     /* Only AUTO_CALLOUT can insert this opcode. We do
1166        not intend to support this case. */
1167     if (cc[1 + LINK_SIZE] == OP_CALLOUT || cc[1 + LINK_SIZE] == OP_CALLOUT_STR)
1168       return FALSE;
1169     cc += 1 + LINK_SIZE;
1170     break;
1171 
1172     case OP_CREF:
1173     common->optimized_cbracket[GET2(cc, 1)] = 0;
1174     cc += 1 + IMM2_SIZE;
1175     break;
1176 
1177     case OP_DNREF:
1178     case OP_DNREFI:
1179     case OP_DNCREF:
1180     count = GET2(cc, 1 + IMM2_SIZE);
1181     slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
1182     while (count-- > 0)
1183       {
1184       common->optimized_cbracket[GET2(slot, 0)] = 0;
1185       slot += common->name_entry_size;
1186       }
1187     cc += 1 + 2 * IMM2_SIZE;
1188     break;
1189 
1190     case OP_RECURSE:
1191     /* Set its value only once. */
1192     if (common->recursive_head_ptr == 0)
1193       {
1194       common->recursive_head_ptr = common->ovector_start;
1195       common->ovector_start += sizeof(sljit_sw);
1196       }
1197     cc += 1 + LINK_SIZE;
1198     break;
1199 
1200     case OP_CALLOUT:
1201     case OP_CALLOUT_STR:
1202     if (common->capture_last_ptr == 0)
1203       {
1204       common->capture_last_ptr = common->ovector_start;
1205       common->ovector_start += sizeof(sljit_sw);
1206       }
1207     cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE);
1208     break;
1209 
1210     case OP_ASSERTBACK:
1211     slot = bracketend(cc);
1212     if (slot > assert_back_end)
1213       assert_back_end = slot;
1214     cc += 1 + LINK_SIZE;
1215     break;
1216 
1217     case OP_THEN_ARG:
1218     common->has_then = TRUE;
1219     common->control_head_ptr = 1;
1220     /* Fall through. */
1221 
1222     case OP_COMMIT_ARG:
1223     case OP_PRUNE_ARG:
1224     if (cc < assert_na_end)
1225       return FALSE;
1226     /* Fall through */
1227     case OP_MARK:
1228     if (common->mark_ptr == 0)
1229       {
1230       common->mark_ptr = common->ovector_start;
1231       common->ovector_start += sizeof(sljit_sw);
1232       }
1233     cc += 1 + 2 + cc[1];
1234     break;
1235 
1236     case OP_THEN:
1237     common->has_then = TRUE;
1238     common->control_head_ptr = 1;
1239     cc += 1;
1240     break;
1241 
1242     case OP_SKIP:
1243     if (cc < assert_back_end)
1244       common->has_skip_in_assert_back = TRUE;
1245     if (cc < assert_na_end)
1246       return FALSE;
1247     cc += 1;
1248     break;
1249 
1250     case OP_SKIP_ARG:
1251     common->control_head_ptr = 1;
1252     common->has_skip_arg = TRUE;
1253     if (cc < assert_back_end)
1254       common->has_skip_in_assert_back = TRUE;
1255     if (cc < assert_na_end)
1256       return FALSE;
1257     cc += 1 + 2 + cc[1];
1258     break;
1259 
1260     case OP_PRUNE:
1261     case OP_COMMIT:
1262     case OP_ASSERT_ACCEPT:
1263     if (cc < assert_na_end)
1264       return FALSE;
1265     cc++;
1266     break;
1267 
1268     default:
1269     cc = next_opcode(common, cc);
1270     if (cc == NULL)
1271       return FALSE;
1272     break;
1273     }
1274   }
1275 return TRUE;
1276 }
1277 
1278 #define EARLY_FAIL_ENHANCE_MAX (3 + 3)
1279 
1280 /*
1281   Start represent the number of allowed early fail enhancements
1282 
1283   The 0-2 values has a special meaning:
1284     0 - skip is allowed for all iterators
1285     1 - fail is allowed for all iterators
1286     2 - fail is allowed for greedy iterators
1287     3 - only ranged early fail is allowed
1288   >3 - (start - 3) number of remaining ranged early fails allowed
1289 
1290 return: the updated value of start
1291 */
detect_early_fail(compiler_common * common,PCRE2_SPTR cc,int * private_data_start,sljit_s32 depth,int start)1292 static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc,
1293    int *private_data_start, sljit_s32 depth, int start)
1294 {
1295 PCRE2_SPTR begin = cc;
1296 PCRE2_SPTR next_alt;
1297 PCRE2_SPTR end;
1298 PCRE2_SPTR accelerated_start;
1299 int result = 0;
1300 int count, prev_count;
1301 
1302 SLJIT_ASSERT(*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA);
1303 SLJIT_ASSERT(*cc != OP_CBRA || common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] != 0);
1304 SLJIT_ASSERT(start < EARLY_FAIL_ENHANCE_MAX);
1305 
1306 next_alt = cc + GET(cc, 1);
1307 if (*next_alt == OP_ALT && start < 1)
1308   start = 1;
1309 
1310 do
1311   {
1312   count = start;
1313   cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1314 
1315   while (TRUE)
1316     {
1317     accelerated_start = NULL;
1318 
1319     switch(*cc)
1320       {
1321       case OP_SOD:
1322       case OP_SOM:
1323       case OP_SET_SOM:
1324       case OP_NOT_WORD_BOUNDARY:
1325       case OP_WORD_BOUNDARY:
1326       case OP_EODN:
1327       case OP_EOD:
1328       case OP_CIRC:
1329       case OP_CIRCM:
1330       case OP_DOLL:
1331       case OP_DOLLM:
1332       case OP_NOT_UCP_WORD_BOUNDARY:
1333       case OP_UCP_WORD_BOUNDARY:
1334       /* Zero width assertions. */
1335       cc++;
1336       continue;
1337 
1338       case OP_NOT_DIGIT:
1339       case OP_DIGIT:
1340       case OP_NOT_WHITESPACE:
1341       case OP_WHITESPACE:
1342       case OP_NOT_WORDCHAR:
1343       case OP_WORDCHAR:
1344       case OP_ANY:
1345       case OP_ALLANY:
1346       case OP_ANYBYTE:
1347       case OP_NOT_HSPACE:
1348       case OP_HSPACE:
1349       case OP_NOT_VSPACE:
1350       case OP_VSPACE:
1351       if (count < 1)
1352         count = 1;
1353       cc++;
1354       continue;
1355 
1356       case OP_ANYNL:
1357       case OP_EXTUNI:
1358       if (count < 3)
1359         count = 3;
1360       cc++;
1361       continue;
1362 
1363       case OP_NOTPROP:
1364       case OP_PROP:
1365       if (count < 1)
1366         count = 1;
1367       cc += 1 + 2;
1368       continue;
1369 
1370       case OP_CHAR:
1371       case OP_CHARI:
1372       case OP_NOT:
1373       case OP_NOTI:
1374       if (count < 1)
1375         count = 1;
1376       cc += 2;
1377 #ifdef SUPPORT_UNICODE
1378       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1379 #endif
1380       continue;
1381 
1382       case OP_TYPEMINSTAR:
1383       case OP_TYPEMINPLUS:
1384       if (count == 2)
1385         count = 3;
1386       /* Fall through */
1387 
1388       case OP_TYPESTAR:
1389       case OP_TYPEPLUS:
1390       case OP_TYPEPOSSTAR:
1391       case OP_TYPEPOSPLUS:
1392       /* The type or prop opcode is skipped in the next iteration. */
1393       cc += 1;
1394 
1395       if (cc[0] != OP_ANYNL && cc[0] != OP_EXTUNI)
1396         {
1397         accelerated_start = cc - 1;
1398         break;
1399         }
1400 
1401       if (count < 3)
1402         count = 3;
1403       continue;
1404 
1405       case OP_TYPEEXACT:
1406       if (count < 1)
1407         count = 1;
1408       cc += 1 + IMM2_SIZE;
1409       continue;
1410 
1411       case OP_TYPEUPTO:
1412       case OP_TYPEMINUPTO:
1413       case OP_TYPEPOSUPTO:
1414       cc += IMM2_SIZE;
1415       /* Fall through */
1416 
1417       case OP_TYPEQUERY:
1418       case OP_TYPEMINQUERY:
1419       case OP_TYPEPOSQUERY:
1420       /* The type or prop opcode is skipped in the next iteration. */
1421       if (count < 3)
1422         count = 3;
1423       cc += 1;
1424       continue;
1425 
1426       case OP_MINSTAR:
1427       case OP_MINPLUS:
1428       case OP_MINSTARI:
1429       case OP_MINPLUSI:
1430       case OP_NOTMINSTAR:
1431       case OP_NOTMINPLUS:
1432       case OP_NOTMINSTARI:
1433       case OP_NOTMINPLUSI:
1434       if (count == 2)
1435         count = 3;
1436       /* Fall through */
1437 
1438       case OP_STAR:
1439       case OP_PLUS:
1440       case OP_POSSTAR:
1441       case OP_POSPLUS:
1442 
1443       case OP_STARI:
1444       case OP_PLUSI:
1445       case OP_POSSTARI:
1446       case OP_POSPLUSI:
1447 
1448       case OP_NOTSTAR:
1449       case OP_NOTPLUS:
1450       case OP_NOTPOSSTAR:
1451       case OP_NOTPOSPLUS:
1452 
1453       case OP_NOTSTARI:
1454       case OP_NOTPLUSI:
1455       case OP_NOTPOSSTARI:
1456       case OP_NOTPOSPLUSI:
1457       accelerated_start = cc;
1458       cc += 2;
1459 #ifdef SUPPORT_UNICODE
1460       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1461 #endif
1462       break;
1463 
1464       case OP_EXACT:
1465       if (count < 1)
1466         count = 1;
1467       cc += 2 + IMM2_SIZE;
1468 #ifdef SUPPORT_UNICODE
1469       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1470 #endif
1471       continue;
1472 
1473       case OP_UPTO:
1474       case OP_MINUPTO:
1475       case OP_POSUPTO:
1476       case OP_UPTOI:
1477       case OP_MINUPTOI:
1478       case OP_EXACTI:
1479       case OP_POSUPTOI:
1480       case OP_NOTUPTO:
1481       case OP_NOTMINUPTO:
1482       case OP_NOTEXACT:
1483       case OP_NOTPOSUPTO:
1484       case OP_NOTUPTOI:
1485       case OP_NOTMINUPTOI:
1486       case OP_NOTEXACTI:
1487       case OP_NOTPOSUPTOI:
1488       cc += IMM2_SIZE;
1489       /* Fall through */
1490 
1491       case OP_QUERY:
1492       case OP_MINQUERY:
1493       case OP_POSQUERY:
1494       case OP_QUERYI:
1495       case OP_MINQUERYI:
1496       case OP_POSQUERYI:
1497       case OP_NOTQUERY:
1498       case OP_NOTMINQUERY:
1499       case OP_NOTPOSQUERY:
1500       case OP_NOTQUERYI:
1501       case OP_NOTMINQUERYI:
1502       case OP_NOTPOSQUERYI:
1503       if (count < 3)
1504         count = 3;
1505       cc += 2;
1506 #ifdef SUPPORT_UNICODE
1507       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1508 #endif
1509       continue;
1510 
1511       case OP_CLASS:
1512       case OP_NCLASS:
1513 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1514       case OP_XCLASS:
1515       accelerated_start = cc;
1516       cc += ((*cc == OP_XCLASS) ? GET(cc, 1) : (unsigned int)(1 + (32 / sizeof(PCRE2_UCHAR))));
1517 #else
1518       accelerated_start = cc;
1519       cc += (1 + (32 / sizeof(PCRE2_UCHAR)));
1520 #endif
1521 
1522       switch (*cc)
1523         {
1524         case OP_CRMINSTAR:
1525         case OP_CRMINPLUS:
1526         if (count == 2)
1527           count = 3;
1528         /* Fall through */
1529 
1530         case OP_CRSTAR:
1531         case OP_CRPLUS:
1532         case OP_CRPOSSTAR:
1533         case OP_CRPOSPLUS:
1534         cc++;
1535         break;
1536 
1537         case OP_CRRANGE:
1538         case OP_CRMINRANGE:
1539         case OP_CRPOSRANGE:
1540         if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
1541           {
1542           /* Exact repeat. */
1543           cc += 1 + 2 * IMM2_SIZE;
1544           if (count < 1)
1545             count = 1;
1546           continue;
1547           }
1548 
1549         cc += 2 * IMM2_SIZE;
1550         /* Fall through */
1551         case OP_CRQUERY:
1552         case OP_CRMINQUERY:
1553         case OP_CRPOSQUERY:
1554         cc++;
1555         if (count < 3)
1556           count = 3;
1557         continue;
1558 
1559         default:
1560         /* No repeat. */
1561         if (count < 1)
1562           count = 1;
1563         continue;
1564         }
1565       break;
1566 
1567       case OP_BRA:
1568       case OP_CBRA:
1569       prev_count = count;
1570       if (count < 1)
1571         count = 1;
1572 
1573       if (depth >= 4)
1574         break;
1575 
1576       if (count < 3 && cc[GET(cc, 1)] == OP_ALT)
1577         count = 3;
1578 
1579       end = bracketend(cc);
1580       if (end[-1 - LINK_SIZE] != OP_KET || (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0))
1581         break;
1582 
1583       prev_count = detect_early_fail(common, cc, private_data_start, depth + 1, prev_count);
1584 
1585       if (prev_count > count)
1586         count = prev_count;
1587 
1588       if (PRIVATE_DATA(cc) != 0)
1589         common->private_data_ptrs[begin - common->start] = 1;
1590 
1591       if (count < EARLY_FAIL_ENHANCE_MAX)
1592         {
1593         cc = end;
1594         continue;
1595         }
1596       break;
1597 
1598       case OP_KET:
1599       SLJIT_ASSERT(PRIVATE_DATA(cc) == 0);
1600       if (cc >= next_alt)
1601         break;
1602       cc += 1 + LINK_SIZE;
1603       continue;
1604       }
1605 
1606     if (accelerated_start == NULL)
1607       break;
1608 
1609     if (count == 0)
1610       {
1611       common->fast_forward_bc_ptr = accelerated_start;
1612       common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_skip;
1613       *private_data_start += sizeof(sljit_sw);
1614       count = 4;
1615       }
1616     else if (count < 3)
1617       {
1618       common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail;
1619 
1620       if (common->early_fail_start_ptr == 0)
1621         common->early_fail_start_ptr = *private_data_start;
1622 
1623       *private_data_start += sizeof(sljit_sw);
1624       common->early_fail_end_ptr = *private_data_start;
1625 
1626       if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1627         return EARLY_FAIL_ENHANCE_MAX;
1628 
1629       count = 4;
1630       }
1631     else
1632       {
1633       common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail_range;
1634 
1635       if (common->early_fail_start_ptr == 0)
1636         common->early_fail_start_ptr = *private_data_start;
1637 
1638       *private_data_start += 2 * sizeof(sljit_sw);
1639       common->early_fail_end_ptr = *private_data_start;
1640 
1641       if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1642         return EARLY_FAIL_ENHANCE_MAX;
1643 
1644       count++;
1645       }
1646 
1647     /* Cannot be part of a repeat. */
1648     common->private_data_ptrs[begin - common->start] = 1;
1649 
1650     if (count >= EARLY_FAIL_ENHANCE_MAX)
1651       break;
1652     }
1653 
1654   if (*cc != OP_ALT && *cc != OP_KET)
1655     result = EARLY_FAIL_ENHANCE_MAX;
1656   else if (result < count)
1657     result = count;
1658 
1659   cc = next_alt;
1660   next_alt = cc + GET(cc, 1);
1661   }
1662 while (*cc == OP_ALT);
1663 
1664 return result;
1665 }
1666 
get_class_iterator_size(PCRE2_SPTR cc)1667 static int get_class_iterator_size(PCRE2_SPTR cc)
1668 {
1669 sljit_u32 min;
1670 sljit_u32 max;
1671 switch(*cc)
1672   {
1673   case OP_CRSTAR:
1674   case OP_CRPLUS:
1675   return 2;
1676 
1677   case OP_CRMINSTAR:
1678   case OP_CRMINPLUS:
1679   case OP_CRQUERY:
1680   case OP_CRMINQUERY:
1681   return 1;
1682 
1683   case OP_CRRANGE:
1684   case OP_CRMINRANGE:
1685   min = GET2(cc, 1);
1686   max = GET2(cc, 1 + IMM2_SIZE);
1687   if (max == 0)
1688     return (*cc == OP_CRRANGE) ? 2 : 1;
1689   max -= min;
1690   if (max > 2)
1691     max = 2;
1692   return max;
1693 
1694   default:
1695   return 0;
1696   }
1697 }
1698 
detect_repeat(compiler_common * common,PCRE2_SPTR begin)1699 static BOOL detect_repeat(compiler_common *common, PCRE2_SPTR begin)
1700 {
1701 PCRE2_SPTR end = bracketend(begin);
1702 PCRE2_SPTR next;
1703 PCRE2_SPTR next_end;
1704 PCRE2_SPTR max_end;
1705 PCRE2_UCHAR type;
1706 sljit_sw length = end - begin;
1707 sljit_s32 min, max, i;
1708 
1709 /* Detect fixed iterations first. */
1710 if (end[-(1 + LINK_SIZE)] != OP_KET || PRIVATE_DATA(begin) != 0)
1711   return FALSE;
1712 
1713 /* /(?:AB){4,6}/ is currently converted to /(?:AB){3}(?AB){1,3}/
1714  * Skip the check of the second part. */
1715 if (PRIVATE_DATA(end - LINK_SIZE) != 0)
1716   return TRUE;
1717 
1718 next = end;
1719 min = 1;
1720 while (1)
1721   {
1722   if (*next != *begin)
1723     break;
1724   next_end = bracketend(next);
1725   if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1726     break;
1727   next = next_end;
1728   min++;
1729   }
1730 
1731 if (min == 2)
1732   return FALSE;
1733 
1734 max = 0;
1735 max_end = next;
1736 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1737   {
1738   type = *next;
1739   while (1)
1740     {
1741     if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1742       break;
1743     next_end = bracketend(next + 2 + LINK_SIZE);
1744     if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1745       break;
1746     next = next_end;
1747     max++;
1748     }
1749 
1750   if (next[0] == type && next[1] == *begin && max >= 1)
1751     {
1752     next_end = bracketend(next + 1);
1753     if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1754       {
1755       for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1756         if (*next_end != OP_KET)
1757           break;
1758 
1759       if (i == max)
1760         {
1761         common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1762         common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1763         /* +2 the original and the last. */
1764         common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1765         if (min == 1)
1766           return TRUE;
1767         min--;
1768         max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1769         }
1770       }
1771     }
1772   }
1773 
1774 if (min >= 3)
1775   {
1776   common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1777   common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1778   common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1779   return TRUE;
1780   }
1781 
1782 return FALSE;
1783 }
1784 
1785 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1786     case OP_MINSTAR: \
1787     case OP_MINPLUS: \
1788     case OP_QUERY: \
1789     case OP_MINQUERY: \
1790     case OP_MINSTARI: \
1791     case OP_MINPLUSI: \
1792     case OP_QUERYI: \
1793     case OP_MINQUERYI: \
1794     case OP_NOTMINSTAR: \
1795     case OP_NOTMINPLUS: \
1796     case OP_NOTQUERY: \
1797     case OP_NOTMINQUERY: \
1798     case OP_NOTMINSTARI: \
1799     case OP_NOTMINPLUSI: \
1800     case OP_NOTQUERYI: \
1801     case OP_NOTMINQUERYI:
1802 
1803 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1804     case OP_STAR: \
1805     case OP_PLUS: \
1806     case OP_STARI: \
1807     case OP_PLUSI: \
1808     case OP_NOTSTAR: \
1809     case OP_NOTPLUS: \
1810     case OP_NOTSTARI: \
1811     case OP_NOTPLUSI:
1812 
1813 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1814     case OP_UPTO: \
1815     case OP_MINUPTO: \
1816     case OP_UPTOI: \
1817     case OP_MINUPTOI: \
1818     case OP_NOTUPTO: \
1819     case OP_NOTMINUPTO: \
1820     case OP_NOTUPTOI: \
1821     case OP_NOTMINUPTOI:
1822 
1823 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1824     case OP_TYPEMINSTAR: \
1825     case OP_TYPEMINPLUS: \
1826     case OP_TYPEQUERY: \
1827     case OP_TYPEMINQUERY:
1828 
1829 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1830     case OP_TYPESTAR: \
1831     case OP_TYPEPLUS:
1832 
1833 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1834     case OP_TYPEUPTO: \
1835     case OP_TYPEMINUPTO:
1836 
set_private_data_ptrs(compiler_common * common,int * private_data_start,PCRE2_SPTR ccend)1837 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, PCRE2_SPTR ccend)
1838 {
1839 PCRE2_SPTR cc = common->start;
1840 PCRE2_SPTR alternative;
1841 PCRE2_SPTR end = NULL;
1842 int private_data_ptr = *private_data_start;
1843 int space, size, bracketlen;
1844 BOOL repeat_check = TRUE;
1845 
1846 while (cc < ccend)
1847   {
1848   space = 0;
1849   size = 0;
1850   bracketlen = 0;
1851   if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1852     break;
1853 
1854   /* When the bracket is prefixed by a zero iteration, skip the repeat check (at this point). */
1855   if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1856     {
1857     if (detect_repeat(common, cc))
1858       {
1859       /* These brackets are converted to repeats, so no global
1860       based single character repeat is allowed. */
1861       if (cc >= end)
1862         end = bracketend(cc);
1863       }
1864     }
1865   repeat_check = TRUE;
1866 
1867   switch(*cc)
1868     {
1869     case OP_KET:
1870     if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1871       {
1872       common->private_data_ptrs[cc - common->start] = private_data_ptr;
1873       private_data_ptr += sizeof(sljit_sw);
1874       cc += common->private_data_ptrs[cc + 1 - common->start];
1875       }
1876     cc += 1 + LINK_SIZE;
1877     break;
1878 
1879     case OP_ASSERT:
1880     case OP_ASSERT_NOT:
1881     case OP_ASSERTBACK:
1882     case OP_ASSERTBACK_NOT:
1883     case OP_ASSERT_NA:
1884     case OP_ONCE:
1885     case OP_SCRIPT_RUN:
1886     case OP_BRAPOS:
1887     case OP_SBRA:
1888     case OP_SBRAPOS:
1889     case OP_SCOND:
1890     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1891     private_data_ptr += sizeof(sljit_sw);
1892     bracketlen = 1 + LINK_SIZE;
1893     break;
1894 
1895     case OP_ASSERTBACK_NA:
1896     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1897     private_data_ptr += sizeof(sljit_sw);
1898 
1899     if (find_vreverse(cc))
1900       {
1901       common->private_data_ptrs[cc + 1 - common->start] = 1;
1902       private_data_ptr += sizeof(sljit_sw);
1903       }
1904 
1905     bracketlen = 1 + LINK_SIZE;
1906     break;
1907 
1908     case OP_CBRAPOS:
1909     case OP_SCBRAPOS:
1910     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1911     private_data_ptr += sizeof(sljit_sw);
1912     bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1913     break;
1914 
1915     case OP_COND:
1916     /* Might be a hidden SCOND. */
1917     common->private_data_ptrs[cc - common->start] = 0;
1918     alternative = cc + GET(cc, 1);
1919     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1920       {
1921       common->private_data_ptrs[cc - common->start] = private_data_ptr;
1922       private_data_ptr += sizeof(sljit_sw);
1923       }
1924     bracketlen = 1 + LINK_SIZE;
1925     break;
1926 
1927     case OP_BRA:
1928     bracketlen = 1 + LINK_SIZE;
1929     break;
1930 
1931     case OP_CBRA:
1932     case OP_SCBRA:
1933     bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1934     break;
1935 
1936     case OP_BRAZERO:
1937     case OP_BRAMINZERO:
1938     case OP_BRAPOSZERO:
1939     size = 1;
1940     repeat_check = FALSE;
1941     break;
1942 
1943     CASE_ITERATOR_PRIVATE_DATA_1
1944     size = -2;
1945     space = 1;
1946     break;
1947 
1948     CASE_ITERATOR_PRIVATE_DATA_2A
1949     size = -2;
1950     space = 2;
1951     break;
1952 
1953     CASE_ITERATOR_PRIVATE_DATA_2B
1954     size = -(2 + IMM2_SIZE);
1955     space = 2;
1956     break;
1957 
1958     CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1959     size = 1;
1960     space = 1;
1961     break;
1962 
1963     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1964     size = 1;
1965     if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1966       space = 2;
1967     break;
1968 
1969     case OP_TYPEUPTO:
1970     size = 1 + IMM2_SIZE;
1971     if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1972       space = 2;
1973     break;
1974 
1975     case OP_TYPEMINUPTO:
1976     size = 1 + IMM2_SIZE;
1977     space = 2;
1978     break;
1979 
1980     case OP_CLASS:
1981     case OP_NCLASS:
1982     size = 1 + 32 / sizeof(PCRE2_UCHAR);
1983     space = get_class_iterator_size(cc + size);
1984     break;
1985 
1986 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1987     case OP_XCLASS:
1988     size = GET(cc, 1);
1989     space = get_class_iterator_size(cc + size);
1990     break;
1991 #endif
1992 
1993     default:
1994     cc = next_opcode(common, cc);
1995     SLJIT_ASSERT(cc != NULL);
1996     break;
1997     }
1998 
1999   /* Character iterators, which are not inside a repeated bracket,
2000      gets a private slot instead of allocating it on the stack. */
2001   if (space > 0 && cc >= end)
2002     {
2003     common->private_data_ptrs[cc - common->start] = private_data_ptr;
2004     private_data_ptr += sizeof(sljit_sw) * space;
2005     }
2006 
2007   if (size != 0)
2008     {
2009     if (size < 0)
2010       {
2011       cc += -size;
2012 #ifdef SUPPORT_UNICODE
2013       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2014 #endif
2015       }
2016     else
2017       cc += size;
2018     }
2019 
2020   if (bracketlen > 0)
2021     {
2022     if (cc >= end)
2023       {
2024       end = bracketend(cc);
2025       if (end[-1 - LINK_SIZE] == OP_KET)
2026         end = NULL;
2027       }
2028     cc += bracketlen;
2029     }
2030   }
2031 *private_data_start = private_data_ptr;
2032 }
2033 
2034 /* Returns with a frame_types (always < 0) if no need for frame. */
get_framesize(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,BOOL recursive,BOOL * needs_control_head)2035 static int get_framesize(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL recursive, BOOL *needs_control_head)
2036 {
2037 int length = 0;
2038 int possessive = 0;
2039 BOOL stack_restore = FALSE;
2040 BOOL setsom_found = recursive;
2041 BOOL setmark_found = recursive;
2042 /* The last capture is a local variable even for recursions. */
2043 BOOL capture_last_found = FALSE;
2044 
2045 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2046 SLJIT_ASSERT(common->control_head_ptr != 0);
2047 *needs_control_head = TRUE;
2048 #else
2049 *needs_control_head = FALSE;
2050 #endif
2051 
2052 if (ccend == NULL)
2053   {
2054   ccend = bracketend(cc) - (1 + LINK_SIZE);
2055   if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
2056     {
2057     possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
2058     /* This is correct regardless of common->capture_last_ptr. */
2059     capture_last_found = TRUE;
2060     }
2061   cc = next_opcode(common, cc);
2062   }
2063 
2064 SLJIT_ASSERT(cc != NULL);
2065 while (cc < ccend)
2066   switch(*cc)
2067     {
2068     case OP_SET_SOM:
2069     SLJIT_ASSERT(common->has_set_som);
2070     stack_restore = TRUE;
2071     if (!setsom_found)
2072       {
2073       length += 2;
2074       setsom_found = TRUE;
2075       }
2076     cc += 1;
2077     break;
2078 
2079     case OP_MARK:
2080     case OP_COMMIT_ARG:
2081     case OP_PRUNE_ARG:
2082     case OP_THEN_ARG:
2083     SLJIT_ASSERT(common->mark_ptr != 0);
2084     stack_restore = TRUE;
2085     if (!setmark_found)
2086       {
2087       length += 2;
2088       setmark_found = TRUE;
2089       }
2090     if (common->control_head_ptr != 0)
2091       *needs_control_head = TRUE;
2092     cc += 1 + 2 + cc[1];
2093     break;
2094 
2095     case OP_RECURSE:
2096     stack_restore = TRUE;
2097     if (common->has_set_som && !setsom_found)
2098       {
2099       length += 2;
2100       setsom_found = TRUE;
2101       }
2102     if (common->mark_ptr != 0 && !setmark_found)
2103       {
2104       length += 2;
2105       setmark_found = TRUE;
2106       }
2107     if (common->capture_last_ptr != 0 && !capture_last_found)
2108       {
2109       length += 2;
2110       capture_last_found = TRUE;
2111       }
2112     cc += 1 + LINK_SIZE;
2113     break;
2114 
2115     case OP_CBRA:
2116     case OP_CBRAPOS:
2117     case OP_SCBRA:
2118     case OP_SCBRAPOS:
2119     stack_restore = TRUE;
2120     if (common->capture_last_ptr != 0 && !capture_last_found)
2121       {
2122       length += 2;
2123       capture_last_found = TRUE;
2124       }
2125     length += 3;
2126     cc += 1 + LINK_SIZE + IMM2_SIZE;
2127     break;
2128 
2129     case OP_THEN:
2130     stack_restore = TRUE;
2131     if (common->control_head_ptr != 0)
2132       *needs_control_head = TRUE;
2133     cc ++;
2134     break;
2135 
2136     default:
2137     stack_restore = TRUE;
2138     /* Fall through. */
2139 
2140     case OP_NOT_WORD_BOUNDARY:
2141     case OP_WORD_BOUNDARY:
2142     case OP_NOT_DIGIT:
2143     case OP_DIGIT:
2144     case OP_NOT_WHITESPACE:
2145     case OP_WHITESPACE:
2146     case OP_NOT_WORDCHAR:
2147     case OP_WORDCHAR:
2148     case OP_ANY:
2149     case OP_ALLANY:
2150     case OP_ANYBYTE:
2151     case OP_NOTPROP:
2152     case OP_PROP:
2153     case OP_ANYNL:
2154     case OP_NOT_HSPACE:
2155     case OP_HSPACE:
2156     case OP_NOT_VSPACE:
2157     case OP_VSPACE:
2158     case OP_EXTUNI:
2159     case OP_EODN:
2160     case OP_EOD:
2161     case OP_CIRC:
2162     case OP_CIRCM:
2163     case OP_DOLL:
2164     case OP_DOLLM:
2165     case OP_CHAR:
2166     case OP_CHARI:
2167     case OP_NOT:
2168     case OP_NOTI:
2169 
2170     case OP_EXACT:
2171     case OP_POSSTAR:
2172     case OP_POSPLUS:
2173     case OP_POSQUERY:
2174     case OP_POSUPTO:
2175 
2176     case OP_EXACTI:
2177     case OP_POSSTARI:
2178     case OP_POSPLUSI:
2179     case OP_POSQUERYI:
2180     case OP_POSUPTOI:
2181 
2182     case OP_NOTEXACT:
2183     case OP_NOTPOSSTAR:
2184     case OP_NOTPOSPLUS:
2185     case OP_NOTPOSQUERY:
2186     case OP_NOTPOSUPTO:
2187 
2188     case OP_NOTEXACTI:
2189     case OP_NOTPOSSTARI:
2190     case OP_NOTPOSPLUSI:
2191     case OP_NOTPOSQUERYI:
2192     case OP_NOTPOSUPTOI:
2193 
2194     case OP_TYPEEXACT:
2195     case OP_TYPEPOSSTAR:
2196     case OP_TYPEPOSPLUS:
2197     case OP_TYPEPOSQUERY:
2198     case OP_TYPEPOSUPTO:
2199 
2200     case OP_CLASS:
2201     case OP_NCLASS:
2202     case OP_XCLASS:
2203 
2204     case OP_CALLOUT:
2205     case OP_CALLOUT_STR:
2206 
2207     case OP_NOT_UCP_WORD_BOUNDARY:
2208     case OP_UCP_WORD_BOUNDARY:
2209 
2210     cc = next_opcode(common, cc);
2211     SLJIT_ASSERT(cc != NULL);
2212     break;
2213     }
2214 
2215 /* Possessive quantifiers can use a special case. */
2216 if (SLJIT_UNLIKELY(possessive == length))
2217   return stack_restore ? no_frame : no_stack;
2218 
2219 if (length > 0)
2220   return length + 1;
2221 return stack_restore ? no_frame : no_stack;
2222 }
2223 
init_frame(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,int stackpos,int stacktop)2224 static void init_frame(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, int stackpos, int stacktop)
2225 {
2226 DEFINE_COMPILER;
2227 BOOL setsom_found = FALSE;
2228 BOOL setmark_found = FALSE;
2229 /* The last capture is a local variable even for recursions. */
2230 BOOL capture_last_found = FALSE;
2231 int offset;
2232 
2233 /* >= 1 + shortest item size (2) */
2234 SLJIT_UNUSED_ARG(stacktop);
2235 SLJIT_ASSERT(stackpos >= stacktop + 2);
2236 
2237 stackpos = STACK(stackpos);
2238 if (ccend == NULL)
2239   {
2240   ccend = bracketend(cc) - (1 + LINK_SIZE);
2241   if (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)
2242     cc = next_opcode(common, cc);
2243   }
2244 
2245 SLJIT_ASSERT(cc != NULL);
2246 while (cc < ccend)
2247   switch(*cc)
2248     {
2249     case OP_SET_SOM:
2250     SLJIT_ASSERT(common->has_set_som);
2251     if (!setsom_found)
2252       {
2253       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2254       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2255       stackpos -= SSIZE_OF(sw);
2256       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2257       stackpos -= SSIZE_OF(sw);
2258       setsom_found = TRUE;
2259       }
2260     cc += 1;
2261     break;
2262 
2263     case OP_MARK:
2264     case OP_COMMIT_ARG:
2265     case OP_PRUNE_ARG:
2266     case OP_THEN_ARG:
2267     SLJIT_ASSERT(common->mark_ptr != 0);
2268     if (!setmark_found)
2269       {
2270       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2271       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2272       stackpos -= SSIZE_OF(sw);
2273       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2274       stackpos -= SSIZE_OF(sw);
2275       setmark_found = TRUE;
2276       }
2277     cc += 1 + 2 + cc[1];
2278     break;
2279 
2280     case OP_RECURSE:
2281     if (common->has_set_som && !setsom_found)
2282       {
2283       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2284       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2285       stackpos -= SSIZE_OF(sw);
2286       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2287       stackpos -= SSIZE_OF(sw);
2288       setsom_found = TRUE;
2289       }
2290     if (common->mark_ptr != 0 && !setmark_found)
2291       {
2292       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2293       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2294       stackpos -= SSIZE_OF(sw);
2295       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2296       stackpos -= SSIZE_OF(sw);
2297       setmark_found = TRUE;
2298       }
2299     if (common->capture_last_ptr != 0 && !capture_last_found)
2300       {
2301       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2302       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2303       stackpos -= SSIZE_OF(sw);
2304       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2305       stackpos -= SSIZE_OF(sw);
2306       capture_last_found = TRUE;
2307       }
2308     cc += 1 + LINK_SIZE;
2309     break;
2310 
2311     case OP_CBRA:
2312     case OP_CBRAPOS:
2313     case OP_SCBRA:
2314     case OP_SCBRAPOS:
2315     if (common->capture_last_ptr != 0 && !capture_last_found)
2316       {
2317       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2318       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2319       stackpos -= SSIZE_OF(sw);
2320       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2321       stackpos -= SSIZE_OF(sw);
2322       capture_last_found = TRUE;
2323       }
2324     offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2325     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
2326     stackpos -= SSIZE_OF(sw);
2327     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
2328     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
2329     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2330     stackpos -= SSIZE_OF(sw);
2331     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
2332     stackpos -= SSIZE_OF(sw);
2333 
2334     cc += 1 + LINK_SIZE + IMM2_SIZE;
2335     break;
2336 
2337     default:
2338     cc = next_opcode(common, cc);
2339     SLJIT_ASSERT(cc != NULL);
2340     break;
2341     }
2342 
2343 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
2344 SLJIT_ASSERT(stackpos == STACK(stacktop));
2345 }
2346 
2347 #define RECURSE_TMP_REG_COUNT 3
2348 
2349 typedef struct delayed_mem_copy_status {
2350   struct sljit_compiler *compiler;
2351   int store_bases[RECURSE_TMP_REG_COUNT];
2352   int store_offsets[RECURSE_TMP_REG_COUNT];
2353   int tmp_regs[RECURSE_TMP_REG_COUNT];
2354   int saved_tmp_regs[RECURSE_TMP_REG_COUNT];
2355   int next_tmp_reg;
2356 } delayed_mem_copy_status;
2357 
delayed_mem_copy_init(delayed_mem_copy_status * status,compiler_common * common)2358 static void delayed_mem_copy_init(delayed_mem_copy_status *status, compiler_common *common)
2359 {
2360 int i;
2361 
2362 for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2363   {
2364   SLJIT_ASSERT(status->tmp_regs[i] >= 0);
2365   SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]);
2366 
2367   status->store_bases[i] = -1;
2368   }
2369 status->next_tmp_reg = 0;
2370 status->compiler = common->compiler;
2371 }
2372 
delayed_mem_copy_move(delayed_mem_copy_status * status,int load_base,sljit_sw load_offset,int store_base,sljit_sw store_offset)2373 static void delayed_mem_copy_move(delayed_mem_copy_status *status, int load_base, sljit_sw load_offset,
2374   int store_base, sljit_sw store_offset)
2375 {
2376 struct sljit_compiler *compiler = status->compiler;
2377 int next_tmp_reg = status->next_tmp_reg;
2378 int tmp_reg = status->tmp_regs[next_tmp_reg];
2379 
2380 SLJIT_ASSERT(load_base > 0 && store_base > 0);
2381 
2382 if (status->store_bases[next_tmp_reg] == -1)
2383   {
2384   /* Preserve virtual registers. */
2385   if (sljit_get_register_index(SLJIT_GP_REGISTER, status->saved_tmp_regs[next_tmp_reg]) < 0)
2386     OP1(SLJIT_MOV, status->saved_tmp_regs[next_tmp_reg], 0, tmp_reg, 0);
2387   }
2388 else
2389   OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2390 
2391 OP1(SLJIT_MOV, tmp_reg, 0, SLJIT_MEM1(load_base), load_offset);
2392 status->store_bases[next_tmp_reg] = store_base;
2393 status->store_offsets[next_tmp_reg] = store_offset;
2394 
2395 status->next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2396 }
2397 
delayed_mem_copy_finish(delayed_mem_copy_status * status)2398 static void delayed_mem_copy_finish(delayed_mem_copy_status *status)
2399 {
2400 struct sljit_compiler *compiler = status->compiler;
2401 int next_tmp_reg = status->next_tmp_reg;
2402 int tmp_reg, saved_tmp_reg, i;
2403 
2404 for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2405   {
2406   if (status->store_bases[next_tmp_reg] != -1)
2407     {
2408     tmp_reg = status->tmp_regs[next_tmp_reg];
2409     saved_tmp_reg = status->saved_tmp_regs[next_tmp_reg];
2410 
2411     OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2412 
2413     /* Restore virtual registers. */
2414     if (sljit_get_register_index(SLJIT_GP_REGISTER, saved_tmp_reg) < 0)
2415       OP1(SLJIT_MOV, tmp_reg, 0, saved_tmp_reg, 0);
2416     }
2417 
2418   next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2419   }
2420 }
2421 
2422 #undef RECURSE_TMP_REG_COUNT
2423 
recurse_check_bit(compiler_common * common,sljit_sw bit_index)2424 static BOOL recurse_check_bit(compiler_common *common, sljit_sw bit_index)
2425 {
2426 uint8_t *byte;
2427 uint8_t mask;
2428 
2429 SLJIT_ASSERT((bit_index & (sizeof(sljit_sw) - 1)) == 0);
2430 
2431 bit_index >>= SLJIT_WORD_SHIFT;
2432 
2433 SLJIT_ASSERT((bit_index >> 3) < common->recurse_bitset_size);
2434 
2435 mask = 1 << (bit_index & 0x7);
2436 byte = common->recurse_bitset + (bit_index >> 3);
2437 
2438 if (*byte & mask)
2439   return FALSE;
2440 
2441 *byte |= mask;
2442 return TRUE;
2443 }
2444 
2445 enum get_recurse_flags {
2446   recurse_flag_quit_found = (1 << 0),
2447   recurse_flag_accept_found = (1 << 1),
2448   recurse_flag_setsom_found = (1 << 2),
2449   recurse_flag_setmark_found = (1 << 3),
2450   recurse_flag_control_head_found = (1 << 4),
2451 };
2452 
get_recurse_data_length(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,uint32_t * result_flags)2453 static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, uint32_t *result_flags)
2454 {
2455 int length = 1;
2456 int size, offset;
2457 PCRE2_SPTR alternative;
2458 uint32_t recurse_flags = 0;
2459 
2460 memset(common->recurse_bitset, 0, common->recurse_bitset_size);
2461 
2462 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2463 SLJIT_ASSERT(common->control_head_ptr != 0);
2464 recurse_flags |= recurse_flag_control_head_found;
2465 #endif
2466 
2467 /* Calculate the sum of the private machine words. */
2468 while (cc < ccend)
2469   {
2470   size = 0;
2471   switch(*cc)
2472     {
2473     case OP_SET_SOM:
2474     SLJIT_ASSERT(common->has_set_som);
2475     recurse_flags |= recurse_flag_setsom_found;
2476     cc += 1;
2477     break;
2478 
2479     case OP_RECURSE:
2480     if (common->has_set_som)
2481       recurse_flags |= recurse_flag_setsom_found;
2482     if (common->mark_ptr != 0)
2483       recurse_flags |= recurse_flag_setmark_found;
2484     if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2485       length++;
2486     cc += 1 + LINK_SIZE;
2487     break;
2488 
2489     case OP_KET:
2490     offset = PRIVATE_DATA(cc);
2491     if (offset != 0)
2492       {
2493       if (recurse_check_bit(common, offset))
2494         length++;
2495       SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2496       cc += PRIVATE_DATA(cc + 1);
2497       }
2498     cc += 1 + LINK_SIZE;
2499     break;
2500 
2501     case OP_ASSERT:
2502     case OP_ASSERT_NOT:
2503     case OP_ASSERTBACK:
2504     case OP_ASSERTBACK_NOT:
2505     case OP_ASSERT_NA:
2506     case OP_ASSERTBACK_NA:
2507     case OP_ONCE:
2508     case OP_SCRIPT_RUN:
2509     case OP_BRAPOS:
2510     case OP_SBRA:
2511     case OP_SBRAPOS:
2512     case OP_SCOND:
2513     SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
2514     if (recurse_check_bit(common, PRIVATE_DATA(cc)))
2515       length++;
2516     cc += 1 + LINK_SIZE;
2517     break;
2518 
2519     case OP_CBRA:
2520     case OP_SCBRA:
2521     offset = GET2(cc, 1 + LINK_SIZE);
2522     if (recurse_check_bit(common, OVECTOR(offset << 1)))
2523       {
2524       SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
2525       length += 2;
2526       }
2527     if (common->optimized_cbracket[offset] == 0 && recurse_check_bit(common, OVECTOR_PRIV(offset)))
2528       length++;
2529     if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2530       length++;
2531     cc += 1 + LINK_SIZE + IMM2_SIZE;
2532     break;
2533 
2534     case OP_CBRAPOS:
2535     case OP_SCBRAPOS:
2536     offset = GET2(cc, 1 + LINK_SIZE);
2537     if (recurse_check_bit(common, OVECTOR(offset << 1)))
2538       {
2539       SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
2540       length += 2;
2541       }
2542     if (recurse_check_bit(common, OVECTOR_PRIV(offset)))
2543       length++;
2544     if (recurse_check_bit(common, PRIVATE_DATA(cc)))
2545       length++;
2546     if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2547       length++;
2548     cc += 1 + LINK_SIZE + IMM2_SIZE;
2549     break;
2550 
2551     case OP_COND:
2552     /* Might be a hidden SCOND. */
2553     alternative = cc + GET(cc, 1);
2554     if ((*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) && recurse_check_bit(common, PRIVATE_DATA(cc)))
2555       length++;
2556     cc += 1 + LINK_SIZE;
2557     break;
2558 
2559     CASE_ITERATOR_PRIVATE_DATA_1
2560     offset = PRIVATE_DATA(cc);
2561     if (offset != 0 && recurse_check_bit(common, offset))
2562       length++;
2563     cc += 2;
2564 #ifdef SUPPORT_UNICODE
2565     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2566 #endif
2567     break;
2568 
2569     CASE_ITERATOR_PRIVATE_DATA_2A
2570     offset = PRIVATE_DATA(cc);
2571     if (offset != 0 && recurse_check_bit(common, offset))
2572       {
2573       SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2574       length += 2;
2575       }
2576     cc += 2;
2577 #ifdef SUPPORT_UNICODE
2578     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2579 #endif
2580     break;
2581 
2582     CASE_ITERATOR_PRIVATE_DATA_2B
2583     offset = PRIVATE_DATA(cc);
2584     if (offset != 0 && recurse_check_bit(common, offset))
2585       {
2586       SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2587       length += 2;
2588       }
2589     cc += 2 + IMM2_SIZE;
2590 #ifdef SUPPORT_UNICODE
2591     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2592 #endif
2593     break;
2594 
2595     CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2596     offset = PRIVATE_DATA(cc);
2597     if (offset != 0 && recurse_check_bit(common, offset))
2598       length++;
2599     cc += 1;
2600     break;
2601 
2602     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2603     offset = PRIVATE_DATA(cc);
2604     if (offset != 0 && recurse_check_bit(common, offset))
2605       {
2606       SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2607       length += 2;
2608       }
2609     cc += 1;
2610     break;
2611 
2612     CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2613     offset = PRIVATE_DATA(cc);
2614     if (offset != 0 && recurse_check_bit(common, offset))
2615       {
2616       SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2617       length += 2;
2618       }
2619     cc += 1 + IMM2_SIZE;
2620     break;
2621 
2622     case OP_CLASS:
2623     case OP_NCLASS:
2624 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2625     case OP_XCLASS:
2626     size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2627 #else
2628     size = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2629 #endif
2630 
2631     offset = PRIVATE_DATA(cc);
2632     if (offset != 0 && recurse_check_bit(common, offset))
2633       length += get_class_iterator_size(cc + size);
2634     cc += size;
2635     break;
2636 
2637     case OP_MARK:
2638     case OP_COMMIT_ARG:
2639     case OP_PRUNE_ARG:
2640     case OP_THEN_ARG:
2641     SLJIT_ASSERT(common->mark_ptr != 0);
2642     recurse_flags |= recurse_flag_setmark_found;
2643     if (common->control_head_ptr != 0)
2644       recurse_flags |= recurse_flag_control_head_found;
2645     if (*cc != OP_MARK)
2646       recurse_flags |= recurse_flag_quit_found;
2647 
2648     cc += 1 + 2 + cc[1];
2649     break;
2650 
2651     case OP_PRUNE:
2652     case OP_SKIP:
2653     case OP_COMMIT:
2654     recurse_flags |= recurse_flag_quit_found;
2655     cc++;
2656     break;
2657 
2658     case OP_SKIP_ARG:
2659     recurse_flags |= recurse_flag_quit_found;
2660     cc += 1 + 2 + cc[1];
2661     break;
2662 
2663     case OP_THEN:
2664     SLJIT_ASSERT(common->control_head_ptr != 0);
2665     recurse_flags |= recurse_flag_quit_found | recurse_flag_control_head_found;
2666     cc++;
2667     break;
2668 
2669     case OP_ACCEPT:
2670     case OP_ASSERT_ACCEPT:
2671     recurse_flags |= recurse_flag_accept_found;
2672     cc++;
2673     break;
2674 
2675     default:
2676     cc = next_opcode(common, cc);
2677     SLJIT_ASSERT(cc != NULL);
2678     break;
2679     }
2680   }
2681 SLJIT_ASSERT(cc == ccend);
2682 
2683 if (recurse_flags & recurse_flag_control_head_found)
2684   length++;
2685 if (recurse_flags & recurse_flag_quit_found)
2686   {
2687   if (recurse_flags & recurse_flag_setsom_found)
2688     length++;
2689   if (recurse_flags & recurse_flag_setmark_found)
2690     length++;
2691   }
2692 
2693 *result_flags = recurse_flags;
2694 return length;
2695 }
2696 
2697 enum copy_recurse_data_types {
2698   recurse_copy_from_global,
2699   recurse_copy_private_to_global,
2700   recurse_copy_shared_to_global,
2701   recurse_copy_kept_shared_to_global,
2702   recurse_swap_global
2703 };
2704 
copy_recurse_data(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,int type,int stackptr,int stacktop,uint32_t recurse_flags)2705 static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
2706   int type, int stackptr, int stacktop, uint32_t recurse_flags)
2707 {
2708 delayed_mem_copy_status status;
2709 PCRE2_SPTR alternative;
2710 sljit_sw private_srcw[2];
2711 sljit_sw shared_srcw[3];
2712 sljit_sw kept_shared_srcw[2];
2713 int private_count, shared_count, kept_shared_count;
2714 int from_sp, base_reg, offset, i;
2715 
2716 memset(common->recurse_bitset, 0, common->recurse_bitset_size);
2717 
2718 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2719 SLJIT_ASSERT(common->control_head_ptr != 0);
2720 recurse_check_bit(common, common->control_head_ptr);
2721 #endif
2722 
2723 switch (type)
2724   {
2725   case recurse_copy_from_global:
2726   from_sp = TRUE;
2727   base_reg = STACK_TOP;
2728   break;
2729 
2730   case recurse_copy_private_to_global:
2731   case recurse_copy_shared_to_global:
2732   case recurse_copy_kept_shared_to_global:
2733   from_sp = FALSE;
2734   base_reg = STACK_TOP;
2735   break;
2736 
2737   default:
2738   SLJIT_ASSERT(type == recurse_swap_global);
2739   from_sp = FALSE;
2740   base_reg = TMP2;
2741   break;
2742   }
2743 
2744 stackptr = STACK(stackptr);
2745 stacktop = STACK(stacktop);
2746 
2747 status.tmp_regs[0] = TMP1;
2748 status.saved_tmp_regs[0] = TMP1;
2749 
2750 if (base_reg != TMP2)
2751   {
2752   status.tmp_regs[1] = TMP2;
2753   status.saved_tmp_regs[1] = TMP2;
2754   }
2755 else
2756   {
2757   status.saved_tmp_regs[1] = RETURN_ADDR;
2758   if (HAS_VIRTUAL_REGISTERS)
2759     status.tmp_regs[1] = STR_PTR;
2760   else
2761     status.tmp_regs[1] = RETURN_ADDR;
2762   }
2763 
2764 status.saved_tmp_regs[2] = TMP3;
2765 if (HAS_VIRTUAL_REGISTERS)
2766   status.tmp_regs[2] = STR_END;
2767 else
2768   status.tmp_regs[2] = TMP3;
2769 
2770 delayed_mem_copy_init(&status, common);
2771 
2772 if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2773   {
2774   SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2775 
2776   if (!from_sp)
2777     delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->recursive_head_ptr);
2778 
2779   if (from_sp || type == recurse_swap_global)
2780     delayed_mem_copy_move(&status, SLJIT_SP, common->recursive_head_ptr, base_reg, stackptr);
2781   }
2782 
2783 stackptr += sizeof(sljit_sw);
2784 
2785 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2786 if (type != recurse_copy_shared_to_global)
2787   {
2788   if (!from_sp)
2789     delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->control_head_ptr);
2790 
2791   if (from_sp || type == recurse_swap_global)
2792     delayed_mem_copy_move(&status, SLJIT_SP, common->control_head_ptr, base_reg, stackptr);
2793   }
2794 
2795 stackptr += sizeof(sljit_sw);
2796 #endif
2797 
2798 while (cc < ccend)
2799   {
2800   private_count = 0;
2801   shared_count = 0;
2802   kept_shared_count = 0;
2803 
2804   switch(*cc)
2805     {
2806     case OP_SET_SOM:
2807     SLJIT_ASSERT(common->has_set_som);
2808     if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, OVECTOR(0)))
2809       {
2810       kept_shared_srcw[0] = OVECTOR(0);
2811       kept_shared_count = 1;
2812       }
2813     cc += 1;
2814     break;
2815 
2816     case OP_RECURSE:
2817     if (recurse_flags & recurse_flag_quit_found)
2818       {
2819       if (common->has_set_som && recurse_check_bit(common, OVECTOR(0)))
2820         {
2821         kept_shared_srcw[0] = OVECTOR(0);
2822         kept_shared_count = 1;
2823         }
2824       if (common->mark_ptr != 0 && recurse_check_bit(common, common->mark_ptr))
2825         {
2826         kept_shared_srcw[kept_shared_count] = common->mark_ptr;
2827         kept_shared_count++;
2828         }
2829       }
2830     if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2831       {
2832       shared_srcw[0] = common->capture_last_ptr;
2833       shared_count = 1;
2834       }
2835     cc += 1 + LINK_SIZE;
2836     break;
2837 
2838     case OP_KET:
2839     private_srcw[0] = PRIVATE_DATA(cc);
2840     if (private_srcw[0] != 0)
2841       {
2842       if (recurse_check_bit(common, private_srcw[0]))
2843         private_count = 1;
2844       SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2845       cc += PRIVATE_DATA(cc + 1);
2846       }
2847     cc += 1 + LINK_SIZE;
2848     break;
2849 
2850     case OP_ASSERT:
2851     case OP_ASSERT_NOT:
2852     case OP_ASSERTBACK:
2853     case OP_ASSERTBACK_NOT:
2854     case OP_ASSERT_NA:
2855     case OP_ASSERTBACK_NA:
2856     case OP_ONCE:
2857     case OP_SCRIPT_RUN:
2858     case OP_BRAPOS:
2859     case OP_SBRA:
2860     case OP_SBRAPOS:
2861     case OP_SCOND:
2862     private_srcw[0] = PRIVATE_DATA(cc);
2863     if (recurse_check_bit(common, private_srcw[0]))
2864       private_count = 1;
2865     cc += 1 + LINK_SIZE;
2866     break;
2867 
2868     case OP_CBRA:
2869     case OP_SCBRA:
2870     offset = GET2(cc, 1 + LINK_SIZE);
2871     shared_srcw[0] = OVECTOR(offset << 1);
2872     if (recurse_check_bit(common, shared_srcw[0]))
2873       {
2874       shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
2875       SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
2876       shared_count = 2;
2877       }
2878 
2879     if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2880       {
2881       shared_srcw[shared_count] = common->capture_last_ptr;
2882       shared_count++;
2883       }
2884 
2885     if (common->optimized_cbracket[offset] == 0)
2886       {
2887       private_srcw[0] = OVECTOR_PRIV(offset);
2888       if (recurse_check_bit(common, private_srcw[0]))
2889         private_count = 1;
2890       }
2891 
2892     cc += 1 + LINK_SIZE + IMM2_SIZE;
2893     break;
2894 
2895     case OP_CBRAPOS:
2896     case OP_SCBRAPOS:
2897     offset = GET2(cc, 1 + LINK_SIZE);
2898     shared_srcw[0] = OVECTOR(offset << 1);
2899     if (recurse_check_bit(common, shared_srcw[0]))
2900       {
2901       shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
2902       SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
2903       shared_count = 2;
2904       }
2905 
2906     if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2907       {
2908       shared_srcw[shared_count] = common->capture_last_ptr;
2909       shared_count++;
2910       }
2911 
2912     private_srcw[0] = PRIVATE_DATA(cc);
2913     if (recurse_check_bit(common, private_srcw[0]))
2914       private_count = 1;
2915 
2916     offset = OVECTOR_PRIV(offset);
2917     if (recurse_check_bit(common, offset))
2918       {
2919       private_srcw[private_count] = offset;
2920       private_count++;
2921       }
2922     cc += 1 + LINK_SIZE + IMM2_SIZE;
2923     break;
2924 
2925     case OP_COND:
2926     /* Might be a hidden SCOND. */
2927     alternative = cc + GET(cc, 1);
2928     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2929       {
2930       private_srcw[0] = PRIVATE_DATA(cc);
2931       if (recurse_check_bit(common, private_srcw[0]))
2932         private_count = 1;
2933       }
2934     cc += 1 + LINK_SIZE;
2935     break;
2936 
2937     CASE_ITERATOR_PRIVATE_DATA_1
2938     private_srcw[0] = PRIVATE_DATA(cc);
2939     if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2940       private_count = 1;
2941     cc += 2;
2942 #ifdef SUPPORT_UNICODE
2943     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2944 #endif
2945     break;
2946 
2947     CASE_ITERATOR_PRIVATE_DATA_2A
2948     private_srcw[0] = PRIVATE_DATA(cc);
2949     if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2950       {
2951       private_count = 2;
2952       private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2953       SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2954       }
2955     cc += 2;
2956 #ifdef SUPPORT_UNICODE
2957     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2958 #endif
2959     break;
2960 
2961     CASE_ITERATOR_PRIVATE_DATA_2B
2962     private_srcw[0] = PRIVATE_DATA(cc);
2963     if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2964       {
2965       private_count = 2;
2966       private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2967       SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2968       }
2969     cc += 2 + IMM2_SIZE;
2970 #ifdef SUPPORT_UNICODE
2971     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2972 #endif
2973     break;
2974 
2975     CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2976     private_srcw[0] = PRIVATE_DATA(cc);
2977     if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2978       private_count = 1;
2979     cc += 1;
2980     break;
2981 
2982     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2983     private_srcw[0] = PRIVATE_DATA(cc);
2984     if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2985       {
2986       private_count = 2;
2987       private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2988       SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2989       }
2990     cc += 1;
2991     break;
2992 
2993     CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2994     private_srcw[0] = PRIVATE_DATA(cc);
2995     if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2996       {
2997       private_count = 2;
2998       private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2999       SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
3000       }
3001     cc += 1 + IMM2_SIZE;
3002     break;
3003 
3004     case OP_CLASS:
3005     case OP_NCLASS:
3006 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
3007     case OP_XCLASS:
3008     i = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
3009 #else
3010     i = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
3011 #endif
3012     if (PRIVATE_DATA(cc) != 0)
3013       {
3014       private_count = 1;
3015       private_srcw[0] = PRIVATE_DATA(cc);
3016       switch(get_class_iterator_size(cc + i))
3017         {
3018         case 1:
3019         break;
3020 
3021         case 2:
3022         if (recurse_check_bit(common, private_srcw[0]))
3023           {
3024           private_count = 2;
3025           private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
3026           SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
3027           }
3028         break;
3029 
3030         default:
3031         SLJIT_UNREACHABLE();
3032         break;
3033         }
3034       }
3035     cc += i;
3036     break;
3037 
3038     case OP_MARK:
3039     case OP_COMMIT_ARG:
3040     case OP_PRUNE_ARG:
3041     case OP_THEN_ARG:
3042     SLJIT_ASSERT(common->mark_ptr != 0);
3043     if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, common->mark_ptr))
3044       {
3045       kept_shared_srcw[0] = common->mark_ptr;
3046       kept_shared_count = 1;
3047       }
3048     if (common->control_head_ptr != 0 && recurse_check_bit(common, common->control_head_ptr))
3049       {
3050       private_srcw[0] = common->control_head_ptr;
3051       private_count = 1;
3052       }
3053     cc += 1 + 2 + cc[1];
3054     break;
3055 
3056     case OP_THEN:
3057     SLJIT_ASSERT(common->control_head_ptr != 0);
3058     if (recurse_check_bit(common, common->control_head_ptr))
3059       {
3060       private_srcw[0] = common->control_head_ptr;
3061       private_count = 1;
3062       }
3063     cc++;
3064     break;
3065 
3066     default:
3067     cc = next_opcode(common, cc);
3068     SLJIT_ASSERT(cc != NULL);
3069     continue;
3070     }
3071 
3072   if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
3073     {
3074     SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
3075 
3076     for (i = 0; i < private_count; i++)
3077       {
3078       SLJIT_ASSERT(private_srcw[i] != 0);
3079 
3080       if (!from_sp)
3081         delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, private_srcw[i]);
3082 
3083       if (from_sp || type == recurse_swap_global)
3084         delayed_mem_copy_move(&status, SLJIT_SP, private_srcw[i], base_reg, stackptr);
3085 
3086       stackptr += sizeof(sljit_sw);
3087       }
3088     }
3089   else
3090     stackptr += sizeof(sljit_sw) * private_count;
3091 
3092   if (type != recurse_copy_private_to_global && type != recurse_copy_kept_shared_to_global)
3093     {
3094     SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_swap_global);
3095 
3096     for (i = 0; i < shared_count; i++)
3097       {
3098       SLJIT_ASSERT(shared_srcw[i] != 0);
3099 
3100       if (!from_sp)
3101         delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, shared_srcw[i]);
3102 
3103       if (from_sp || type == recurse_swap_global)
3104         delayed_mem_copy_move(&status, SLJIT_SP, shared_srcw[i], base_reg, stackptr);
3105 
3106       stackptr += sizeof(sljit_sw);
3107       }
3108     }
3109   else
3110     stackptr += sizeof(sljit_sw) * shared_count;
3111 
3112   if (type != recurse_copy_private_to_global && type != recurse_swap_global)
3113     {
3114     SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_copy_kept_shared_to_global);
3115 
3116     for (i = 0; i < kept_shared_count; i++)
3117       {
3118       SLJIT_ASSERT(kept_shared_srcw[i] != 0);
3119 
3120       if (!from_sp)
3121         delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, kept_shared_srcw[i]);
3122 
3123       if (from_sp || type == recurse_swap_global)
3124         delayed_mem_copy_move(&status, SLJIT_SP, kept_shared_srcw[i], base_reg, stackptr);
3125 
3126       stackptr += sizeof(sljit_sw);
3127       }
3128     }
3129   else
3130     stackptr += sizeof(sljit_sw) * kept_shared_count;
3131   }
3132 
3133 SLJIT_ASSERT(cc == ccend && stackptr == stacktop);
3134 
3135 delayed_mem_copy_finish(&status);
3136 }
3137 
set_then_offsets(compiler_common * common,PCRE2_SPTR cc,sljit_u8 * current_offset)3138 static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_SPTR cc, sljit_u8 *current_offset)
3139 {
3140 PCRE2_SPTR end = bracketend(cc);
3141 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
3142 
3143 /* Assert captures then. */
3144 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA)
3145   current_offset = NULL;
3146 /* Conditional block does not. */
3147 if (*cc == OP_COND || *cc == OP_SCOND)
3148   has_alternatives = FALSE;
3149 
3150 cc = next_opcode(common, cc);
3151 
3152 if (has_alternatives)
3153   {
3154   if (*cc == OP_REVERSE)
3155     cc += 1 + IMM2_SIZE;
3156   else if (*cc == OP_VREVERSE)
3157     cc += 1 + 2 * IMM2_SIZE;
3158 
3159   current_offset = common->then_offsets + (cc - common->start);
3160   }
3161 
3162 while (cc < end)
3163   {
3164   if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
3165     cc = set_then_offsets(common, cc, current_offset);
3166   else
3167     {
3168     if (*cc == OP_ALT && has_alternatives)
3169       {
3170       cc += 1 + LINK_SIZE;
3171 
3172       if (*cc == OP_REVERSE)
3173         cc += 1 + IMM2_SIZE;
3174       else if (*cc == OP_VREVERSE)
3175         cc += 1 + 2 * IMM2_SIZE;
3176 
3177       current_offset = common->then_offsets + (cc - common->start);
3178       continue;
3179       }
3180 
3181     if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
3182       *current_offset = 1;
3183     cc = next_opcode(common, cc);
3184     }
3185   }
3186 
3187 return end;
3188 }
3189 
3190 #undef CASE_ITERATOR_PRIVATE_DATA_1
3191 #undef CASE_ITERATOR_PRIVATE_DATA_2A
3192 #undef CASE_ITERATOR_PRIVATE_DATA_2B
3193 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
3194 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
3195 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
3196 
is_powerof2(unsigned int value)3197 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
3198 {
3199 return (value & (value - 1)) == 0;
3200 }
3201 
set_jumps(jump_list * list,struct sljit_label * label)3202 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
3203 {
3204 while (list != NULL)
3205   {
3206   /* sljit_set_label is clever enough to do nothing
3207   if either the jump or the label is NULL. */
3208   SET_LABEL(list->jump, label);
3209   list = list->next;
3210   }
3211 }
3212 
add_jump(struct sljit_compiler * compiler,jump_list ** list,struct sljit_jump * jump)3213 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
3214 {
3215 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
3216 if (list_item)
3217   {
3218   list_item->next = *list;
3219   list_item->jump = jump;
3220   *list = list_item;
3221   }
3222 }
3223 
add_stub(compiler_common * common,struct sljit_jump * start)3224 static void add_stub(compiler_common *common, struct sljit_jump *start)
3225 {
3226 DEFINE_COMPILER;
3227 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
3228 
3229 if (list_item)
3230   {
3231   list_item->start = start;
3232   list_item->quit = LABEL();
3233   list_item->next = common->stubs;
3234   common->stubs = list_item;
3235   }
3236 }
3237 
flush_stubs(compiler_common * common)3238 static void flush_stubs(compiler_common *common)
3239 {
3240 DEFINE_COMPILER;
3241 stub_list *list_item = common->stubs;
3242 
3243 while (list_item)
3244   {
3245   JUMPHERE(list_item->start);
3246   add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
3247   JUMPTO(SLJIT_JUMP, list_item->quit);
3248   list_item = list_item->next;
3249   }
3250 common->stubs = NULL;
3251 }
3252 
count_match(compiler_common * common)3253 static SLJIT_INLINE void count_match(compiler_common *common)
3254 {
3255 DEFINE_COMPILER;
3256 
3257 OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
3258 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
3259 }
3260 
allocate_stack(compiler_common * common,int size)3261 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
3262 {
3263 /* May destroy all locals and registers except TMP2. */
3264 DEFINE_COMPILER;
3265 
3266 SLJIT_ASSERT(size > 0);
3267 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw));
3268 #ifdef DESTROY_REGISTERS
3269 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
3270 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3271 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3272 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
3273 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
3274 #endif
3275 add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
3276 }
3277 
free_stack(compiler_common * common,int size)3278 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
3279 {
3280 DEFINE_COMPILER;
3281 
3282 SLJIT_ASSERT(size > 0);
3283 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw));
3284 }
3285 
allocate_read_only_data(compiler_common * common,sljit_uw size)3286 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
3287 {
3288 DEFINE_COMPILER;
3289 sljit_uw *result;
3290 
3291 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3292   return NULL;
3293 
3294 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
3295 if (SLJIT_UNLIKELY(result == NULL))
3296   {
3297   sljit_set_compiler_memory_error(compiler);
3298   return NULL;
3299   }
3300 
3301 *(void**)result = common->read_only_data_head;
3302 common->read_only_data_head = (void *)result;
3303 return result + 1;
3304 }
3305 
reset_ovector(compiler_common * common,int length)3306 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
3307 {
3308 DEFINE_COMPILER;
3309 struct sljit_label *loop;
3310 sljit_s32 i;
3311 
3312 /* At this point we can freely use all temporary registers. */
3313 SLJIT_ASSERT(length > 1);
3314 /* TMP1 returns with begin - 1. */
3315 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
3316 if (length < 8)
3317   {
3318   for (i = 1; i < length; i++)
3319     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
3320   }
3321 else
3322   {
3323   if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3324     {
3325     GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
3326     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3327     loop = LABEL();
3328     sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
3329     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3330     JUMPTO(SLJIT_NOT_ZERO, loop);
3331     }
3332   else
3333     {
3334     GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));
3335     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3336     loop = LABEL();
3337     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);
3338     OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));
3339     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3340     JUMPTO(SLJIT_NOT_ZERO, loop);
3341     }
3342   }
3343 }
3344 
reset_early_fail(compiler_common * common)3345 static SLJIT_INLINE void reset_early_fail(compiler_common *common)
3346 {
3347 DEFINE_COMPILER;
3348 sljit_u32 size = (sljit_u32)(common->early_fail_end_ptr - common->early_fail_start_ptr);
3349 sljit_u32 uncleared_size;
3350 sljit_s32 src = SLJIT_IMM;
3351 sljit_s32 i;
3352 struct sljit_label *loop;
3353 
3354 SLJIT_ASSERT(common->early_fail_start_ptr < common->early_fail_end_ptr);
3355 
3356 if (size == sizeof(sljit_sw))
3357   {
3358   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->early_fail_start_ptr, SLJIT_IMM, 0);
3359   return;
3360   }
3361 
3362 if (sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) >= 0 && !sljit_has_cpu_feature(SLJIT_HAS_ZERO_REGISTER))
3363   {
3364   OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
3365   src = TMP3;
3366   }
3367 
3368 if (size <= 6 * sizeof(sljit_sw))
3369   {
3370   for (i = common->early_fail_start_ptr; i < common->early_fail_end_ptr; i += sizeof(sljit_sw))
3371     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, src, 0);
3372   return;
3373   }
3374 
3375 GET_LOCAL_BASE(TMP1, 0, common->early_fail_start_ptr);
3376 
3377 uncleared_size = ((size / sizeof(sljit_sw)) % 3) * sizeof(sljit_sw);
3378 
3379 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, size - uncleared_size);
3380 
3381 loop = LABEL();
3382 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3383 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3384 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * SSIZE_OF(sw), src, 0);
3385 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * SSIZE_OF(sw), src, 0);
3386 CMPTO(SLJIT_LESS, TMP1, 0, TMP2, 0, loop);
3387 
3388 if (uncleared_size >= sizeof(sljit_sw))
3389   OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3390 
3391 if (uncleared_size >= 2 * sizeof(sljit_sw))
3392   OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), sizeof(sljit_sw), src, 0);
3393 }
3394 
do_reset_match(compiler_common * common,int length)3395 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
3396 {
3397 DEFINE_COMPILER;
3398 struct sljit_label *loop;
3399 int i;
3400 
3401 SLJIT_ASSERT(length > 1);
3402 /* OVECTOR(1) contains the "string begin - 1" constant. */
3403 if (length > 2)
3404   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3405 if (length < 8)
3406   {
3407   for (i = 2; i < length; i++)
3408     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
3409   }
3410 else
3411   {
3412   if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3413     {
3414     GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
3415     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3416     loop = LABEL();
3417     sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
3418     OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3419     JUMPTO(SLJIT_NOT_ZERO, loop);
3420     }
3421   else
3422     {
3423     GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));
3424     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3425     loop = LABEL();
3426     OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
3427     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));
3428     OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3429     JUMPTO(SLJIT_NOT_ZERO, loop);
3430     }
3431   }
3432 
3433 if (!HAS_VIRTUAL_REGISTERS)
3434   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, stack));
3435 else
3436   OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
3437 
3438 if (common->mark_ptr != 0)
3439   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
3440 if (common->control_head_ptr != 0)
3441   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
3442 if (HAS_VIRTUAL_REGISTERS)
3443   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
3444 
3445 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3446 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));
3447 }
3448 
do_search_mark(sljit_sw * current,PCRE2_SPTR skip_arg)3449 static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg)
3450 {
3451 while (current != NULL)
3452   {
3453   switch (current[1])
3454     {
3455     case type_then_trap:
3456     break;
3457 
3458     case type_mark:
3459     if (PRIV(strcmp)(skip_arg, (PCRE2_SPTR)current[2]) == 0)
3460       return current[3];
3461     break;
3462 
3463     default:
3464     SLJIT_UNREACHABLE();
3465     break;
3466     }
3467   SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
3468   current = (sljit_sw*)current[0];
3469   }
3470 return 0;
3471 }
3472 
copy_ovector(compiler_common * common,int topbracket)3473 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
3474 {
3475 DEFINE_COMPILER;
3476 struct sljit_label *loop;
3477 BOOL has_pre;
3478 
3479 /* At this point we can freely use all registers. */
3480 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3481 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
3482 
3483 if (HAS_VIRTUAL_REGISTERS)
3484   {
3485   OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
3486   OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3487   if (common->mark_ptr != 0)
3488     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3489   OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount));
3490   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3491   if (common->mark_ptr != 0)
3492     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
3493   OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, match_data),
3494     SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3495   }
3496 else
3497   {
3498   OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3499   OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, match_data));
3500   if (common->mark_ptr != 0)
3501     OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3502   OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, oveccount));
3503   OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3504   if (common->mark_ptr != 0)
3505     OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R0, 0);
3506   OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3507   }
3508 
3509 has_pre = sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
3510 
3511 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
3512 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
3513 
3514 loop = LABEL();
3515 
3516 if (has_pre)
3517   sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
3518 else
3519   {
3520   OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
3521   OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
3522   }
3523 
3524 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(PCRE2_SIZE));
3525 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);
3526 /* Copy the integer value to the output buffer */
3527 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3528 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
3529 #endif
3530 
3531 SLJIT_ASSERT(sizeof(PCRE2_SIZE) == 4 || sizeof(PCRE2_SIZE) == 8);
3532 OP1(((sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV), SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);
3533 
3534 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3535 JUMPTO(SLJIT_NOT_ZERO, loop);
3536 
3537 /* Calculate the return value, which is the maximum ovector value. */
3538 if (topbracket > 1)
3539   {
3540   if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw))) == SLJIT_SUCCESS)
3541     {
3542     GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
3543     OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3544 
3545     /* OVECTOR(0) is never equal to SLJIT_S2. */
3546     loop = LABEL();
3547     sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw)));
3548     OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3549     CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3550     OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3551     }
3552   else
3553     {
3554     GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));
3555     OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3556 
3557     /* OVECTOR(0) is never equal to SLJIT_S2. */
3558     loop = LABEL();
3559     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
3560     OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
3561     OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3562     CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3563     OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3564     }
3565   }
3566 else
3567   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
3568 }
3569 
return_with_partial_match(compiler_common * common,struct sljit_label * quit)3570 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
3571 {
3572 DEFINE_COMPILER;
3573 sljit_s32 mov_opcode;
3574 sljit_s32 arguments_reg = !HAS_VIRTUAL_REGISTERS ? ARGUMENTS : SLJIT_R1;
3575 
3576 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S0, str_end_must_be_saved_reg0);
3577 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
3578   && (common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start != 0 : common->hit_start == 0));
3579 
3580 if (arguments_reg != ARGUMENTS)
3581   OP1(SLJIT_MOV, arguments_reg, 0, ARGUMENTS, 0);
3582 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP),
3583   common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start : common->start_ptr);
3584 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_PARTIAL);
3585 
3586 /* Store match begin and end. */
3587 OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, begin));
3588 OP1(SLJIT_MOV, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0);
3589 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, match_data));
3590 
3591 mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
3592 
3593 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S1, 0);
3594 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3595 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
3596 #endif
3597 OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector), SLJIT_R2, 0);
3598 
3599 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_S1, 0);
3600 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3601 OP2(SLJIT_ASHR, STR_END, 0, STR_END, 0, SLJIT_IMM, UCHAR_SHIFT);
3602 #endif
3603 OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector) + sizeof(PCRE2_SIZE), STR_END, 0);
3604 
3605 JUMPTO(SLJIT_JUMP, quit);
3606 }
3607 
check_start_used_ptr(compiler_common * common)3608 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
3609 {
3610 /* May destroy TMP1. */
3611 DEFINE_COMPILER;
3612 struct sljit_jump *jump;
3613 
3614 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3615   {
3616   /* The value of -1 must be kept for start_used_ptr! */
3617   OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
3618   /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
3619   is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
3620   jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
3621   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3622   JUMPHERE(jump);
3623   }
3624 else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3625   {
3626   jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3627   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3628   JUMPHERE(jump);
3629   }
3630 }
3631 
char_has_othercase(compiler_common * common,PCRE2_SPTR cc)3632 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR cc)
3633 {
3634 /* Detects if the character has an othercase. */
3635 unsigned int c;
3636 
3637 #ifdef SUPPORT_UNICODE
3638 if (common->utf || common->ucp)
3639   {
3640   if (common->utf)
3641     {
3642     GETCHAR(c, cc);
3643     }
3644   else
3645     c = *cc;
3646 
3647   if (c > 127)
3648     return c != UCD_OTHERCASE(c);
3649 
3650   return common->fcc[c] != c;
3651   }
3652 else
3653 #endif
3654   c = *cc;
3655 return MAX_255(c) ? common->fcc[c] != c : FALSE;
3656 }
3657 
char_othercase(compiler_common * common,unsigned int c)3658 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
3659 {
3660 /* Returns with the othercase. */
3661 #ifdef SUPPORT_UNICODE
3662 if ((common->utf || common->ucp) && c > 127)
3663   return UCD_OTHERCASE(c);
3664 #endif
3665 return TABLE_GET(c, common->fcc, c);
3666 }
3667 
char_get_othercase_bit(compiler_common * common,PCRE2_SPTR cc)3668 static unsigned int char_get_othercase_bit(compiler_common *common, PCRE2_SPTR cc)
3669 {
3670 /* Detects if the character and its othercase has only 1 bit difference. */
3671 unsigned int c, oc, bit;
3672 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3673 int n;
3674 #endif
3675 
3676 #ifdef SUPPORT_UNICODE
3677 if (common->utf || common->ucp)
3678   {
3679   if (common->utf)
3680     {
3681     GETCHAR(c, cc);
3682     }
3683   else
3684     c = *cc;
3685 
3686   if (c <= 127)
3687     oc = common->fcc[c];
3688   else
3689     oc = UCD_OTHERCASE(c);
3690   }
3691 else
3692   {
3693   c = *cc;
3694   oc = TABLE_GET(c, common->fcc, c);
3695   }
3696 #else
3697 c = *cc;
3698 oc = TABLE_GET(c, common->fcc, c);
3699 #endif
3700 
3701 SLJIT_ASSERT(c != oc);
3702 
3703 bit = c ^ oc;
3704 /* Optimized for English alphabet. */
3705 if (c <= 127 && bit == 0x20)
3706   return (0 << 8) | 0x20;
3707 
3708 /* Since c != oc, they must have at least 1 bit difference. */
3709 if (!is_powerof2(bit))
3710   return 0;
3711 
3712 #if PCRE2_CODE_UNIT_WIDTH == 8
3713 
3714 #ifdef SUPPORT_UNICODE
3715 if (common->utf && c > 127)
3716   {
3717   n = GET_EXTRALEN(*cc);
3718   while ((bit & 0x3f) == 0)
3719     {
3720     n--;
3721     bit >>= 6;
3722     }
3723   return (n << 8) | bit;
3724   }
3725 #endif /* SUPPORT_UNICODE */
3726 return (0 << 8) | bit;
3727 
3728 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3729 
3730 #ifdef SUPPORT_UNICODE
3731 if (common->utf && c > 65535)
3732   {
3733   if (bit >= (1u << 10))
3734     bit >>= 10;
3735   else
3736     return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
3737   }
3738 #endif /* SUPPORT_UNICODE */
3739 return (bit < 256) ? ((0u << 8) | bit) : ((1u << 8) | (bit >> 8));
3740 
3741 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3742 }
3743 
check_partial(compiler_common * common,BOOL force)3744 static void check_partial(compiler_common *common, BOOL force)
3745 {
3746 /* Checks whether a partial matching is occurred. Does not modify registers. */
3747 DEFINE_COMPILER;
3748 struct sljit_jump *jump = NULL;
3749 
3750 SLJIT_ASSERT(!force || common->mode != PCRE2_JIT_COMPLETE);
3751 
3752 if (common->mode == PCRE2_JIT_COMPLETE)
3753   return;
3754 
3755 if (!force && !common->allow_empty_partial)
3756   jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3757 else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3758   jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
3759 
3760 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3761   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3762 else
3763   {
3764   if (common->partialmatchlabel != NULL)
3765     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3766   else
3767     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3768   }
3769 
3770 if (jump != NULL)
3771   JUMPHERE(jump);
3772 }
3773 
check_str_end(compiler_common * common,jump_list ** end_reached)3774 static void check_str_end(compiler_common *common, jump_list **end_reached)
3775 {
3776 /* Does not affect registers. Usually used in a tight spot. */
3777 DEFINE_COMPILER;
3778 struct sljit_jump *jump;
3779 
3780 if (common->mode == PCRE2_JIT_COMPLETE)
3781   {
3782   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3783   return;
3784   }
3785 
3786 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3787 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3788   {
3789   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3790   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3791   add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
3792   }
3793 else
3794   {
3795   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3796   if (common->partialmatchlabel != NULL)
3797     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3798   else
3799     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3800   }
3801 JUMPHERE(jump);
3802 }
3803 
detect_partial_match(compiler_common * common,jump_list ** backtracks)3804 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
3805 {
3806 DEFINE_COMPILER;
3807 struct sljit_jump *jump;
3808 
3809 if (common->mode == PCRE2_JIT_COMPLETE)
3810   {
3811   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3812   return;
3813   }
3814 
3815 /* Partial matching mode. */
3816 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3817 if (!common->allow_empty_partial)
3818   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3819 else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3820   add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1));
3821 
3822 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3823   {
3824   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3825   add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3826   }
3827 else
3828   {
3829   if (common->partialmatchlabel != NULL)
3830     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3831   else
3832     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3833   }
3834 JUMPHERE(jump);
3835 }
3836 
process_partial_match(compiler_common * common)3837 static void process_partial_match(compiler_common *common)
3838 {
3839 DEFINE_COMPILER;
3840 struct sljit_jump *jump;
3841 
3842 /* Partial matching mode. */
3843 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3844   {
3845   jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3846   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3847   JUMPHERE(jump);
3848   }
3849 else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3850   {
3851   if (common->partialmatchlabel != NULL)
3852     CMPTO(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0, common->partialmatchlabel);
3853   else
3854     add_jump(compiler, &common->partialmatch, CMP(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3855   }
3856 }
3857 
detect_partial_match_to(compiler_common * common,struct sljit_label * label)3858 static void detect_partial_match_to(compiler_common *common, struct sljit_label *label)
3859 {
3860 DEFINE_COMPILER;
3861 
3862 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, label);
3863 process_partial_match(common);
3864 }
3865 
peek_char(compiler_common * common,sljit_u32 max,sljit_s32 dst,sljit_sw dstw,jump_list ** backtracks)3866 static void peek_char(compiler_common *common, sljit_u32 max, sljit_s32 dst, sljit_sw dstw, jump_list **backtracks)
3867 {
3868 /* Reads the character into TMP1, keeps STR_PTR.
3869 Does not check STR_END. TMP2, dst, RETURN_ADDR Destroyed. */
3870 DEFINE_COMPILER;
3871 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3872 struct sljit_jump *jump;
3873 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3874 
3875 SLJIT_UNUSED_ARG(max);
3876 SLJIT_UNUSED_ARG(dst);
3877 SLJIT_UNUSED_ARG(dstw);
3878 SLJIT_UNUSED_ARG(backtracks);
3879 
3880 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3881 
3882 #ifdef SUPPORT_UNICODE
3883 #if PCRE2_CODE_UNIT_WIDTH == 8
3884 if (common->utf)
3885   {
3886   if (max < 128) return;
3887 
3888   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3889   OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3890   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3891   add_jump(compiler, common->invalid_utf ? &common->utfreadchar_invalid : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3892   OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3893   if (backtracks && common->invalid_utf)
3894     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3895   JUMPHERE(jump);
3896   }
3897 #elif PCRE2_CODE_UNIT_WIDTH == 16
3898 if (common->utf)
3899   {
3900   if (max < 0xd800) return;
3901 
3902   OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3903 
3904   if (common->invalid_utf)
3905     {
3906     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3907     OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3908     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3909     add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3910     OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3911     if (backtracks && common->invalid_utf)
3912       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3913     }
3914   else
3915     {
3916     /* TMP2 contains the high surrogate. */
3917     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
3918     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3919     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3920     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
3921     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3922     }
3923 
3924   JUMPHERE(jump);
3925   }
3926 #elif PCRE2_CODE_UNIT_WIDTH == 32
3927 if (common->invalid_utf)
3928   {
3929   if (max < 0xd800) return;
3930 
3931   if (backtracks != NULL)
3932     {
3933     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3934     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3935     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
3936     }
3937   else
3938     {
3939     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3940     OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000);
3941     SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
3942     OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3943     SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
3944     }
3945   }
3946 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3947 #endif /* SUPPORT_UNICODE */
3948 }
3949 
peek_char_back(compiler_common * common,sljit_u32 max,jump_list ** backtracks)3950 static void peek_char_back(compiler_common *common, sljit_u32 max, jump_list **backtracks)
3951 {
3952 /* Reads one character back without moving STR_PTR. TMP2 must
3953 contain the start of the subject buffer. Affects TMP1, TMP2, and RETURN_ADDR. */
3954 DEFINE_COMPILER;
3955 
3956 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3957 struct sljit_jump *jump;
3958 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3959 
3960 SLJIT_UNUSED_ARG(max);
3961 SLJIT_UNUSED_ARG(backtracks);
3962 
3963 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3964 
3965 #ifdef SUPPORT_UNICODE
3966 #if PCRE2_CODE_UNIT_WIDTH == 8
3967 if (common->utf)
3968   {
3969   if (max < 128) return;
3970 
3971   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3972   if (common->invalid_utf)
3973     {
3974     add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3975     if (backtracks != NULL)
3976       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3977     }
3978   else
3979     add_jump(compiler, &common->utfpeakcharback, JUMP(SLJIT_FAST_CALL));
3980   JUMPHERE(jump);
3981   }
3982 #elif PCRE2_CODE_UNIT_WIDTH == 16
3983 if (common->utf)
3984   {
3985   if (max < 0xd800) return;
3986 
3987   if (common->invalid_utf)
3988     {
3989     jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3990     add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3991     if (backtracks != NULL)
3992       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3993     }
3994   else
3995     {
3996     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3997     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xdc00);
3998     /* TMP2 contains the low surrogate. */
3999     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4000     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
4001     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4002     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
4003     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4004     }
4005     JUMPHERE(jump);
4006   }
4007 #elif PCRE2_CODE_UNIT_WIDTH == 32
4008 if (common->invalid_utf)
4009   {
4010   OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4011   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4012   add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
4013   }
4014 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4015 #endif /* SUPPORT_UNICODE */
4016 }
4017 
4018 #define READ_CHAR_UPDATE_STR_PTR 0x1
4019 #define READ_CHAR_UTF8_NEWLINE 0x2
4020 #define READ_CHAR_NEWLINE (READ_CHAR_UPDATE_STR_PTR | READ_CHAR_UTF8_NEWLINE)
4021 #define READ_CHAR_VALID_UTF 0x4
4022 
read_char(compiler_common * common,sljit_u32 min,sljit_u32 max,jump_list ** backtracks,sljit_u32 options)4023 static void read_char(compiler_common *common, sljit_u32 min, sljit_u32 max,
4024   jump_list **backtracks, sljit_u32 options)
4025 {
4026 /* Reads the precise value of a character into TMP1, if the character is
4027 between min and max (c >= min && c <= max). Otherwise it returns with a value
4028 outside the range. Does not check STR_END. */
4029 DEFINE_COMPILER;
4030 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4031 struct sljit_jump *jump;
4032 #endif
4033 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4034 struct sljit_jump *jump2;
4035 #endif
4036 
4037 SLJIT_UNUSED_ARG(min);
4038 SLJIT_UNUSED_ARG(max);
4039 SLJIT_UNUSED_ARG(backtracks);
4040 SLJIT_UNUSED_ARG(options);
4041 SLJIT_ASSERT(min <= max);
4042 
4043 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4044 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4045 
4046 #ifdef SUPPORT_UNICODE
4047 #if PCRE2_CODE_UNIT_WIDTH == 8
4048 if (common->utf)
4049   {
4050   if (max < 128 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
4051 
4052   if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
4053     {
4054     jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4055 
4056     if (options & READ_CHAR_UTF8_NEWLINE)
4057       add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
4058     else
4059       add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4060 
4061     if (backtracks != NULL)
4062       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4063     JUMPHERE(jump);
4064     return;
4065     }
4066 
4067   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4068   if (min >= 0x10000)
4069     {
4070     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
4071     if (options & READ_CHAR_UPDATE_STR_PTR)
4072       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4073     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4074     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
4075     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4076     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4077     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4078     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4079     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4080     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4081     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4082     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4083     if (!(options & READ_CHAR_UPDATE_STR_PTR))
4084       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4085     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4086     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4087     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4088     JUMPHERE(jump2);
4089     if (options & READ_CHAR_UPDATE_STR_PTR)
4090       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
4091     }
4092   else if (min >= 0x800 && max <= 0xffff)
4093     {
4094     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4095     if (options & READ_CHAR_UPDATE_STR_PTR)
4096       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4097     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4098     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
4099     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4100     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4101     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4102     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4103     if (!(options & READ_CHAR_UPDATE_STR_PTR))
4104       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4105     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4106     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4107     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4108     JUMPHERE(jump2);
4109     if (options & READ_CHAR_UPDATE_STR_PTR)
4110       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
4111     }
4112   else if (max >= 0x800)
4113     {
4114     add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
4115     }
4116   else if (max < 128)
4117     {
4118     OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4119     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4120     }
4121   else
4122     {
4123     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4124     if (!(options & READ_CHAR_UPDATE_STR_PTR))
4125       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4126     else
4127       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4128     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4129     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4130     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4131     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4132     if (options & READ_CHAR_UPDATE_STR_PTR)
4133       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
4134     }
4135   JUMPHERE(jump);
4136   }
4137 #elif PCRE2_CODE_UNIT_WIDTH == 16
4138 if (common->utf)
4139   {
4140   if (max < 0xd800 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
4141 
4142   if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
4143     {
4144     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4145     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4146 
4147     if (options & READ_CHAR_UTF8_NEWLINE)
4148       add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
4149     else
4150       add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4151 
4152     if (backtracks != NULL)
4153       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4154     JUMPHERE(jump);
4155     return;
4156     }
4157 
4158   if (max >= 0x10000)
4159     {
4160     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4161     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
4162     /* TMP2 contains the high surrogate. */
4163     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4164     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
4165     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4166     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
4167     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4168     JUMPHERE(jump);
4169     return;
4170     }
4171 
4172   /* Skip low surrogate if necessary. */
4173   OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4174 
4175   if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4176     {
4177     if (options & READ_CHAR_UPDATE_STR_PTR)
4178       OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4179     OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
4180     if (options & READ_CHAR_UPDATE_STR_PTR)
4181       SELECT(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0, STR_PTR);
4182     if (max >= 0xd800)
4183       SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, 0x10000, TMP1);
4184     }
4185   else
4186     {
4187     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4188     if (options & READ_CHAR_UPDATE_STR_PTR)
4189       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4190     if (max >= 0xd800)
4191       OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
4192     JUMPHERE(jump);
4193     }
4194   }
4195 #elif PCRE2_CODE_UNIT_WIDTH == 32
4196 if (common->invalid_utf)
4197   {
4198   if (backtracks != NULL)
4199     {
4200     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4201     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4202     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
4203     }
4204   else
4205     {
4206     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4207     OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000);
4208     SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4209     OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4210     SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4211     }
4212   }
4213 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4214 #endif /* SUPPORT_UNICODE */
4215 }
4216 
skip_valid_char(compiler_common * common)4217 static void skip_valid_char(compiler_common *common)
4218 {
4219 DEFINE_COMPILER;
4220 #if (defined SUPPORT_UNICODE) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
4221 struct sljit_jump *jump;
4222 #endif
4223 
4224 #if (defined SUPPORT_UNICODE) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
4225   if (common->utf)
4226     {
4227     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4228     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4229 #if PCRE2_CODE_UNIT_WIDTH == 8
4230     jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4231     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4232     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4233 #elif PCRE2_CODE_UNIT_WIDTH == 16
4234     jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4235     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4236     OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xd800);
4237     OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4238     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4239     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4240 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
4241     JUMPHERE(jump);
4242     return;
4243     }
4244 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
4245   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4246 }
4247 
4248 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4249 
is_char7_bitset(const sljit_u8 * bitset,BOOL nclass)4250 static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
4251 {
4252 /* Tells whether the character codes below 128 are enough
4253 to determine a match. */
4254 const sljit_u8 value = nclass ? 0xff : 0;
4255 const sljit_u8 *end = bitset + 32;
4256 
4257 bitset += 16;
4258 do
4259   {
4260   if (*bitset++ != value)
4261     return FALSE;
4262   }
4263 while (bitset < end);
4264 return TRUE;
4265 }
4266 
read_char7_type(compiler_common * common,jump_list ** backtracks,BOOL negated)4267 static void read_char7_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4268 {
4269 /* Reads the precise character type of a character into TMP1, if the character
4270 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
4271 full_read argument tells whether characters above max are accepted or not. */
4272 DEFINE_COMPILER;
4273 struct sljit_jump *jump;
4274 
4275 SLJIT_ASSERT(common->utf);
4276 
4277 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4278 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4279 
4280 /* All values > 127 are zero in ctypes. */
4281 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4282 
4283 if (negated)
4284   {
4285   jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4286 
4287   if (common->invalid_utf)
4288     {
4289     OP1(SLJIT_MOV, TMP1, 0, TMP2, 0);
4290     add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4291     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4292     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4293     }
4294   else
4295     {
4296     OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4297     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4298     }
4299   JUMPHERE(jump);
4300   }
4301 }
4302 
4303 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4304 
read_char8_type(compiler_common * common,jump_list ** backtracks,BOOL negated)4305 static void read_char8_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4306 {
4307 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
4308 DEFINE_COMPILER;
4309 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
4310 struct sljit_jump *jump;
4311 #endif
4312 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4313 struct sljit_jump *jump2;
4314 #endif
4315 
4316 SLJIT_UNUSED_ARG(backtracks);
4317 SLJIT_UNUSED_ARG(negated);
4318 
4319 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4320 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4321 
4322 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4323 if (common->utf)
4324   {
4325   /* The result of this read may be unused, but saves an "else" part. */
4326   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4327   jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4328 
4329   if (!negated)
4330     {
4331     if (common->invalid_utf)
4332       add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4333 
4334     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4335     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4336     OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4337     if (common->invalid_utf)
4338       add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe0 - 0xc2));
4339 
4340     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4341     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4342     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4343     if (common->invalid_utf)
4344       add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40));
4345 
4346     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4347     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4348     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4349     JUMPHERE(jump2);
4350     }
4351   else if (common->invalid_utf)
4352     {
4353     add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4354     OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
4355     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4356 
4357     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4358     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4359     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4360     JUMPHERE(jump2);
4361     }
4362   else
4363     add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
4364 
4365   JUMPHERE(jump);
4366   return;
4367   }
4368 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4369 
4370 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32
4371 if (common->invalid_utf && negated)
4372   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x110000));
4373 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32 */
4374 
4375 #if PCRE2_CODE_UNIT_WIDTH != 8
4376 /* The ctypes array contains only 256 values. */
4377 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4378 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4379 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4380 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4381 #if PCRE2_CODE_UNIT_WIDTH != 8
4382 JUMPHERE(jump);
4383 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4384 
4385 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
4386 if (common->utf && negated)
4387   {
4388   /* Skip low surrogate if necessary. */
4389   if (!common->invalid_utf)
4390     {
4391     OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4392 
4393     if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4394       {
4395       OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4396       OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
4397       SELECT(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0, STR_PTR);
4398       }
4399     else
4400       {
4401       jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4402       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4403       JUMPHERE(jump);
4404       }
4405     return;
4406     }
4407 
4408   OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4409   jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4410   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4411   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4412 
4413   OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4414   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4415   OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
4416   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4417 
4418   JUMPHERE(jump);
4419   return;
4420   }
4421 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 */
4422 }
4423 
move_back(compiler_common * common,jump_list ** backtracks,BOOL must_be_valid)4424 static void move_back(compiler_common *common, jump_list **backtracks, BOOL must_be_valid)
4425 {
4426 /* Goes one character back. Affects STR_PTR and TMP1. If must_be_valid is TRUE,
4427 TMP2 is not used. Otherwise TMP2 must contain the start of the subject buffer,
4428 and it is destroyed. Does not modify STR_PTR for invalid character sequences. */
4429 DEFINE_COMPILER;
4430 
4431 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4432 struct sljit_jump *jump;
4433 #endif
4434 
4435 #ifdef SUPPORT_UNICODE
4436 #if PCRE2_CODE_UNIT_WIDTH == 8
4437 struct sljit_label *label;
4438 
4439 if (common->utf)
4440   {
4441   if (!must_be_valid && common->invalid_utf)
4442     {
4443     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4444     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4445     jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4446     add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4447     if (backtracks != NULL)
4448       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4449     JUMPHERE(jump);
4450     return;
4451     }
4452 
4453   label = LABEL();
4454   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4455   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4456   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4457   CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
4458   return;
4459   }
4460 #elif PCRE2_CODE_UNIT_WIDTH == 16
4461 if (common->utf)
4462   {
4463   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4464   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4465 
4466   if (!must_be_valid && common->invalid_utf)
4467     {
4468     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4469     jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0xd800);
4470     add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4471     if (backtracks != NULL)
4472       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4473     JUMPHERE(jump);
4474     return;
4475     }
4476 
4477   /* Skip low surrogate if necessary. */
4478   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4479   OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xdc00);
4480   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4481   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4482   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4483   return;
4484   }
4485 #elif PCRE2_CODE_UNIT_WIDTH == 32
4486 if (common->invalid_utf && !must_be_valid)
4487   {
4488   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4489   if (backtracks != NULL)
4490     {
4491     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4492     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4493     return;
4494     }
4495 
4496   OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x110000);
4497   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
4498   OP2(SLJIT_SHL,  TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4499   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4500   return;
4501   }
4502 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4503 #endif /* SUPPORT_UNICODE */
4504 
4505 SLJIT_UNUSED_ARG(backtracks);
4506 SLJIT_UNUSED_ARG(must_be_valid);
4507 
4508 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4509 }
4510 
check_newlinechar(compiler_common * common,int nltype,jump_list ** backtracks,BOOL jumpifmatch)4511 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
4512 {
4513 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
4514 DEFINE_COMPILER;
4515 struct sljit_jump *jump;
4516 
4517 if (nltype == NLTYPE_ANY)
4518   {
4519   add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4520   sljit_set_current_flags(compiler, SLJIT_SET_Z);
4521   add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
4522   }
4523 else if (nltype == NLTYPE_ANYCRLF)
4524   {
4525   if (jumpifmatch)
4526     {
4527     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
4528     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4529     }
4530   else
4531     {
4532     jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4533     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4534     JUMPHERE(jump);
4535     }
4536   }
4537 else
4538   {
4539   SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
4540   add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4541   }
4542 }
4543 
4544 #ifdef SUPPORT_UNICODE
4545 
4546 #if PCRE2_CODE_UNIT_WIDTH == 8
do_utfreadchar(compiler_common * common)4547 static void do_utfreadchar(compiler_common *common)
4548 {
4549 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
4550 of the character (>= 0xc0). Return char value in TMP1. */
4551 DEFINE_COMPILER;
4552 struct sljit_jump *jump;
4553 
4554 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4555 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4556 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4557 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4558 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4559 
4560 /* Searching for the first zero. */
4561 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4562 jump = JUMP(SLJIT_NOT_ZERO);
4563 /* Two byte sequence. */
4564 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3000);
4565 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4566 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4567 
4568 JUMPHERE(jump);
4569 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4570 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4571 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4572 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4573 
4574 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000);
4575 jump = JUMP(SLJIT_NOT_ZERO);
4576 /* Three byte sequence. */
4577 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0000);
4578 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4579 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4580 
4581 /* Four byte sequence. */
4582 JUMPHERE(jump);
4583 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4584 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0000);
4585 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4586 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4587 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4588 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4589 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4590 }
4591 
do_utfreadtype8(compiler_common * common)4592 static void do_utfreadtype8(compiler_common *common)
4593 {
4594 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
4595 of the character (>= 0xc0). Return value in TMP1. */
4596 DEFINE_COMPILER;
4597 struct sljit_jump *jump;
4598 struct sljit_jump *compare;
4599 
4600 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4601 
4602 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0x20);
4603 jump = JUMP(SLJIT_NOT_ZERO);
4604 /* Two byte sequence. */
4605 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4606 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4607 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
4608 /* The upper 5 bits are known at this point. */
4609 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
4610 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4611 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4612 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
4613 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4614 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4615 
4616 JUMPHERE(compare);
4617 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4618 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4619 
4620 /* We only have types for characters less than 256. */
4621 JUMPHERE(jump);
4622 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4623 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4624 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4625 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4626 }
4627 
do_utfreadchar_invalid(compiler_common * common)4628 static void do_utfreadchar_invalid(compiler_common *common)
4629 {
4630 /* Slow decoding a UTF-8 character. TMP1 contains the first byte
4631 of the character (>= 0xc0). Return char value in TMP1. STR_PTR is
4632 undefined for invalid characters. */
4633 DEFINE_COMPILER;
4634 sljit_s32 i;
4635 sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4636 struct sljit_jump *jump;
4637 struct sljit_jump *buffer_end_close;
4638 struct sljit_label *three_byte_entry;
4639 struct sljit_label *exit_invalid_label;
4640 struct sljit_jump *exit_invalid[11];
4641 
4642 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4643 
4644 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc2);
4645 
4646 /* Usually more than 3 characters remained in the subject buffer. */
4647 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4648 
4649 /* Not a valid start of a multi-byte sequence, no more bytes read. */
4650 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xf5 - 0xc2);
4651 
4652 buffer_end_close = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4653 
4654 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4655 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4656 /* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4657 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4658 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4659 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4660 
4661 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4662 jump = JUMP(SLJIT_NOT_ZERO);
4663 
4664 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4665 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4666 
4667 JUMPHERE(jump);
4668 
4669 /* Three-byte sequence. */
4670 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4671 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4672 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4673 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4674 if (has_cmov)
4675   {
4676   OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4677   SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0x20000, TMP1);
4678   exit_invalid[2] = NULL;
4679   }
4680 else
4681   exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4682 
4683 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000);
4684 jump = JUMP(SLJIT_NOT_ZERO);
4685 
4686 three_byte_entry = LABEL();
4687 
4688 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2d800);
4689 if (has_cmov)
4690   {
4691   OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4692   SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0xd800, TMP1);
4693   exit_invalid[3] = NULL;
4694   }
4695 else
4696   exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4697 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4698 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4699 
4700 if (has_cmov)
4701   {
4702   OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4703   SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4704   exit_invalid[4] = NULL;
4705   }
4706 else
4707   exit_invalid[4] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4708 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4709 
4710 JUMPHERE(jump);
4711 
4712 /* Four-byte sequence. */
4713 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4714 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4715 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4716 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4717 if (has_cmov)
4718   {
4719   OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4720   SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0, TMP1);
4721   exit_invalid[5] = NULL;
4722   }
4723 else
4724   exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4725 
4726 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc10000);
4727 if (has_cmov)
4728   {
4729   OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4730   SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000, TMP1);
4731   exit_invalid[6] = NULL;
4732   }
4733 else
4734   exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4735 
4736 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4737 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4738 
4739 JUMPHERE(buffer_end_close);
4740 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4741 exit_invalid[7] = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4742 
4743 /* Two-byte sequence. */
4744 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4745 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4746 /* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4747 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4748 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4749 exit_invalid[8] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4750 
4751 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4752 jump = JUMP(SLJIT_NOT_ZERO);
4753 
4754 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4755 
4756 /* Three-byte sequence. */
4757 JUMPHERE(jump);
4758 exit_invalid[9] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4759 
4760 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4761 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4762 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4763 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4764 if (has_cmov)
4765   {
4766   OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4767   SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4768   exit_invalid[10] = NULL;
4769   }
4770 else
4771   exit_invalid[10] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4772 
4773 /* One will be substracted from STR_PTR later. */
4774 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4775 
4776 /* Four byte sequences are not possible. */
4777 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x30000, three_byte_entry);
4778 
4779 exit_invalid_label = LABEL();
4780 for (i = 0; i < 11; i++)
4781   sljit_set_label(exit_invalid[i], exit_invalid_label);
4782 
4783 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4784 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4785 }
4786 
do_utfreadnewline_invalid(compiler_common * common)4787 static void do_utfreadnewline_invalid(compiler_common *common)
4788 {
4789 /* Slow decoding a UTF-8 character, specialized for newlines.
4790 TMP1 contains the first byte of the character (>= 0xc0). Return
4791 char value in TMP1. */
4792 DEFINE_COMPILER;
4793 struct sljit_label *loop;
4794 struct sljit_label *skip_start;
4795 struct sljit_label *three_byte_exit;
4796 struct sljit_jump *jump[5];
4797 
4798 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4799 
4800 if (common->nltype != NLTYPE_ANY)
4801   {
4802   SLJIT_ASSERT(common->nltype != NLTYPE_FIXED || common->newline < 128);
4803 
4804   /* All newlines are ascii, just skip intermediate octets. */
4805   jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4806   loop = LABEL();
4807   if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)) == SLJIT_SUCCESS)
4808     sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4809   else
4810     {
4811     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4812     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4813     }
4814 
4815   OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4816   CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4817   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4818 
4819   JUMPHERE(jump[0]);
4820 
4821   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4822   OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4823   return;
4824   }
4825 
4826 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4827 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4828 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4829 
4830 jump[1] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xc2);
4831 jump[2] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xe2);
4832 
4833 skip_start = LABEL();
4834 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4835 jump[3] = CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80);
4836 
4837 /* Skip intermediate octets. */
4838 loop = LABEL();
4839 jump[4] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4840 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4841 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4842 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4843 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4844 
4845 JUMPHERE(jump[3]);
4846 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4847 
4848 three_byte_exit = LABEL();
4849 JUMPHERE(jump[0]);
4850 JUMPHERE(jump[4]);
4851 
4852 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4853 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4854 
4855 /* Two byte long newline: 0x85. */
4856 JUMPHERE(jump[1]);
4857 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x85, skip_start);
4858 
4859 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x85);
4860 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4861 
4862 /* Three byte long newlines: 0x2028 and 0x2029. */
4863 JUMPHERE(jump[2]);
4864 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, skip_start);
4865 CMPTO(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0, three_byte_exit);
4866 
4867 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4868 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4869 
4870 OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 0x80);
4871 CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40, skip_start);
4872 
4873 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0x2000);
4874 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4875 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4876 }
4877 
do_utfmoveback_invalid(compiler_common * common)4878 static void do_utfmoveback_invalid(compiler_common *common)
4879 {
4880 /* Goes one character back. */
4881 DEFINE_COMPILER;
4882 sljit_s32 i;
4883 struct sljit_jump *jump;
4884 struct sljit_jump *buffer_start_close;
4885 struct sljit_label *exit_ok_label;
4886 struct sljit_label *exit_invalid_label;
4887 struct sljit_jump *exit_invalid[7];
4888 
4889 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4890 
4891 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4892 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
4893 
4894 /* Two-byte sequence. */
4895 buffer_start_close = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4896 
4897 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4898 
4899 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4900 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x20);
4901 
4902 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4903 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4904 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4905 
4906 /* Three-byte sequence. */
4907 JUMPHERE(jump);
4908 exit_invalid[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4909 
4910 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4911 
4912 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4913 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x10);
4914 
4915 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4916 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4917 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4918 
4919 /* Four-byte sequence. */
4920 JUMPHERE(jump);
4921 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4922 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40);
4923 
4924 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4925 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0);
4926 exit_invalid[3] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x05);
4927 
4928 exit_ok_label = LABEL();
4929 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4930 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4931 
4932 /* Two-byte sequence. */
4933 JUMPHERE(buffer_start_close);
4934 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4935 
4936 exit_invalid[4] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4937 
4938 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4939 
4940 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4941 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20, exit_ok_label);
4942 
4943 /* Three-byte sequence. */
4944 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4945 exit_invalid[5] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4946 exit_invalid[6] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4947 
4948 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4949 
4950 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4951 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10, exit_ok_label);
4952 
4953 /* Four-byte sequences are not possible. */
4954 
4955 exit_invalid_label = LABEL();
4956 sljit_set_label(exit_invalid[5], exit_invalid_label);
4957 sljit_set_label(exit_invalid[6], exit_invalid_label);
4958 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4959 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4960 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4961 
4962 JUMPHERE(exit_invalid[4]);
4963 /* -2 + 4 = 2 */
4964 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4965 
4966 exit_invalid_label = LABEL();
4967 for (i = 0; i < 4; i++)
4968   sljit_set_label(exit_invalid[i], exit_invalid_label);
4969 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4970 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(4));
4971 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4972 }
4973 
do_utfpeakcharback(compiler_common * common)4974 static void do_utfpeakcharback(compiler_common *common)
4975 {
4976 /* Peak a character back. Does not modify STR_PTR. */
4977 DEFINE_COMPILER;
4978 struct sljit_jump *jump[2];
4979 
4980 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4981 
4982 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4983 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4984 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20);
4985 
4986 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4987 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4988 jump[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10);
4989 
4990 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
4991 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4992 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
4993 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4994 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4995 
4996 JUMPHERE(jump[1]);
4997 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4998 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4999 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
5000 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5001 
5002 JUMPHERE(jump[0]);
5003 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5004 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
5005 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
5006 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5007 
5008 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5009 }
5010 
do_utfpeakcharback_invalid(compiler_common * common)5011 static void do_utfpeakcharback_invalid(compiler_common *common)
5012 {
5013 /* Peak a character back. Does not modify STR_PTR. */
5014 DEFINE_COMPILER;
5015 sljit_s32 i;
5016 sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
5017 struct sljit_jump *jump[2];
5018 struct sljit_label *two_byte_entry;
5019 struct sljit_label *three_byte_entry;
5020 struct sljit_label *exit_invalid_label;
5021 struct sljit_jump *exit_invalid[8];
5022 
5023 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5024 
5025 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
5026 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
5027 jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5028 
5029 /* Two-byte sequence. */
5030 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5031 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
5032 jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x1e);
5033 
5034 two_byte_entry = LABEL();
5035 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
5036 /* If TMP1 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
5037 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5038 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5039 
5040 JUMPHERE(jump[1]);
5041 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
5042 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5043 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
5044 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
5045 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5046 
5047 /* Three-byte sequence. */
5048 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
5049 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
5050 jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x10);
5051 
5052 three_byte_entry = LABEL();
5053 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
5054 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5055 
5056 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5057 if (has_cmov)
5058   {
5059   OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5060   SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, -0xd800, TMP1);
5061   exit_invalid[2] = NULL;
5062   }
5063 else
5064   exit_invalid[2] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5065 
5066 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5067 if (has_cmov)
5068   {
5069   OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5070   SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
5071   exit_invalid[3] = NULL;
5072   }
5073 else
5074   exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5075 
5076 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5077 
5078 JUMPHERE(jump[1]);
5079 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0 - 0x80);
5080 exit_invalid[4] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
5081 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
5082 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5083 
5084 /* Four-byte sequence. */
5085 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
5086 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
5087 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
5088 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18);
5089 /* ADD is used instead of OR because of the SUB 0x10000 above. */
5090 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5091 
5092 if (has_cmov)
5093   {
5094   OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
5095   SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000, TMP1);
5096   exit_invalid[5] = NULL;
5097   }
5098 else
5099   exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
5100 
5101 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
5102 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5103 
5104 JUMPHERE(jump[0]);
5105 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5106 jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5107 
5108 /* Two-byte sequence. */
5109 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5110 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
5111 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
5112 
5113 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
5114 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5115 exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
5116 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
5117 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5118 
5119 /* Three-byte sequence. */
5120 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
5121 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
5122 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x10, three_byte_entry);
5123 
5124 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5125 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5126 
5127 JUMPHERE(jump[0]);
5128 exit_invalid[7] = CMP(SLJIT_GREATER, TMP2, 0, STR_PTR, 0);
5129 
5130 /* Two-byte sequence. */
5131 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5132 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
5133 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
5134 
5135 exit_invalid_label = LABEL();
5136 for (i = 0; i < 8; i++)
5137   sljit_set_label(exit_invalid[i], exit_invalid_label);
5138 
5139 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5140 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5141 }
5142 
5143 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
5144 
5145 #if PCRE2_CODE_UNIT_WIDTH == 16
5146 
do_utfreadchar_invalid(compiler_common * common)5147 static void do_utfreadchar_invalid(compiler_common *common)
5148 {
5149 /* Slow decoding a UTF-16 character. TMP1 contains the first half
5150 of the character (>= 0xd800). Return char value in TMP1. STR_PTR is
5151 undefined for invalid characters. */
5152 DEFINE_COMPILER;
5153 struct sljit_jump *exit_invalid[3];
5154 
5155 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5156 
5157 /* TMP2 contains the high surrogate. */
5158 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
5159 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5160 
5161 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5162 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5163 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5164 
5165 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5166 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
5167 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
5168 
5169 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5170 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5171 
5172 JUMPHERE(exit_invalid[0]);
5173 JUMPHERE(exit_invalid[1]);
5174 JUMPHERE(exit_invalid[2]);
5175 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5176 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5177 }
5178 
do_utfreadnewline_invalid(compiler_common * common)5179 static void do_utfreadnewline_invalid(compiler_common *common)
5180 {
5181 /* Slow decoding a UTF-16 character, specialized for newlines.
5182 TMP1 contains the first half of the character (>= 0xd800). Return
5183 char value in TMP1. */
5184 
5185 DEFINE_COMPILER;
5186 struct sljit_jump *exit_invalid[2];
5187 
5188 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5189 
5190 /* TMP2 contains the high surrogate. */
5191 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5192 
5193 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5194 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
5195 
5196 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
5197 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
5198 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
5199 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
5200 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
5201 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5202 
5203 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5204 
5205 JUMPHERE(exit_invalid[0]);
5206 JUMPHERE(exit_invalid[1]);
5207 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5208 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5209 }
5210 
do_utfmoveback_invalid(compiler_common * common)5211 static void do_utfmoveback_invalid(compiler_common *common)
5212 {
5213 /* Goes one character back. */
5214 DEFINE_COMPILER;
5215 struct sljit_jump *exit_invalid[3];
5216 
5217 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5218 
5219 exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5220 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5221 
5222 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5223 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5224 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
5225 
5226 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5227 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
5228 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5229 
5230 JUMPHERE(exit_invalid[0]);
5231 JUMPHERE(exit_invalid[1]);
5232 JUMPHERE(exit_invalid[2]);
5233 
5234 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5235 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
5236 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5237 }
5238 
do_utfpeakcharback_invalid(compiler_common * common)5239 static void do_utfpeakcharback_invalid(compiler_common *common)
5240 {
5241 /* Peak a character back. Does not modify STR_PTR. */
5242 DEFINE_COMPILER;
5243 struct sljit_jump *jump;
5244 struct sljit_jump *exit_invalid[3];
5245 
5246 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5247 
5248 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000);
5249 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5250 exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
5251 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5252 
5253 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5254 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
5255 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
5256 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
5257 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5258 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5259 
5260 JUMPHERE(jump);
5261 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5262 
5263 JUMPHERE(exit_invalid[0]);
5264 JUMPHERE(exit_invalid[1]);
5265 JUMPHERE(exit_invalid[2]);
5266 
5267 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5268 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5269 }
5270 
5271 #endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
5272 
5273 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
5274 #define UCD_BLOCK_MASK 127
5275 #define UCD_BLOCK_SHIFT 7
5276 
do_getucd(compiler_common * common)5277 static void do_getucd(compiler_common *common)
5278 {
5279 /* Search the UCD record for the character comes in TMP1.
5280 Returns chartype in TMP1 and UCD offset in TMP2. */
5281 DEFINE_COMPILER;
5282 #if PCRE2_CODE_UNIT_WIDTH == 32
5283 struct sljit_jump *jump;
5284 #endif
5285 
5286 #if defined SLJIT_DEBUG && SLJIT_DEBUG
5287 /* dummy_ucd_record */
5288 const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5289 SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5290 SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5291 #endif
5292 
5293 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5294 
5295 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5296 
5297 #if PCRE2_CODE_UNIT_WIDTH == 32
5298 if (!common->utf)
5299   {
5300   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5301   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5302   JUMPHERE(jump);
5303   }
5304 #endif
5305 
5306 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5307 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5308 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5309 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5310 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5311 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5312 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5313 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5314 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5315 }
5316 
do_getucdtype(compiler_common * common)5317 static void do_getucdtype(compiler_common *common)
5318 {
5319 /* Search the UCD record for the character comes in TMP1.
5320 Returns chartype in TMP1 and UCD offset in TMP2. */
5321 DEFINE_COMPILER;
5322 #if PCRE2_CODE_UNIT_WIDTH == 32
5323 struct sljit_jump *jump;
5324 #endif
5325 
5326 #if defined SLJIT_DEBUG && SLJIT_DEBUG
5327 /* dummy_ucd_record */
5328 const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5329 SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5330 SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5331 #endif
5332 
5333 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5334 
5335 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5336 
5337 #if PCRE2_CODE_UNIT_WIDTH == 32
5338 if (!common->utf)
5339   {
5340   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5341   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5342   JUMPHERE(jump);
5343   }
5344 #endif
5345 
5346 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5347 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5348 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5349 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5350 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5351 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5352 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5353 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5354 
5355 /* TMP2 is multiplied by 12. Same as (TMP2 << 2) + ((TMP2 << 2) << 1). */
5356 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5357 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
5358 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5359 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 1);
5360 
5361 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5362 }
5363 
5364 #endif /* SUPPORT_UNICODE */
5365 
mainloop_entry(compiler_common * common)5366 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common)
5367 {
5368 DEFINE_COMPILER;
5369 struct sljit_label *mainloop;
5370 struct sljit_label *newlinelabel = NULL;
5371 struct sljit_jump *start;
5372 struct sljit_jump *end = NULL;
5373 struct sljit_jump *end2 = NULL;
5374 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5375 struct sljit_label *loop;
5376 struct sljit_jump *jump;
5377 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5378 jump_list *newline = NULL;
5379 sljit_u32 overall_options = common->re->overall_options;
5380 BOOL hascrorlf = (common->re->flags & PCRE2_HASCRORLF) != 0;
5381 BOOL newlinecheck = FALSE;
5382 BOOL readuchar = FALSE;
5383 
5384 if (!(hascrorlf || (overall_options & PCRE2_FIRSTLINE) != 0)
5385     && (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
5386   newlinecheck = TRUE;
5387 
5388 SLJIT_ASSERT(common->abort_label == NULL);
5389 
5390 if ((overall_options & PCRE2_FIRSTLINE) != 0)
5391   {
5392   /* Search for the end of the first line. */
5393   SLJIT_ASSERT(common->match_end_ptr != 0);
5394   OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5395 
5396   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5397     {
5398     mainloop = LABEL();
5399     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5400     end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5401     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5402     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5403     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
5404     CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
5405     JUMPHERE(end);
5406     OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5407     }
5408   else
5409     {
5410     end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5411     mainloop = LABEL();
5412     /* Continual stores does not cause data dependency. */
5413     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5414     read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
5415     check_newlinechar(common, common->nltype, &newline, TRUE);
5416     CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
5417     JUMPHERE(end);
5418     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5419     set_jumps(newline, LABEL());
5420     }
5421 
5422   OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5423   }
5424 else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0)
5425   {
5426   /* Check whether offset limit is set and valid. */
5427   SLJIT_ASSERT(common->match_end_ptr != 0);
5428 
5429   if (HAS_VIRTUAL_REGISTERS)
5430     {
5431     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5432     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5433     }
5434   else
5435     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5436 
5437   OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5438   end = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET);
5439   if (HAS_VIRTUAL_REGISTERS)
5440     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5441   else
5442     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
5443 
5444 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5445   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5446 #endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5447   if (HAS_VIRTUAL_REGISTERS)
5448     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5449 
5450   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
5451   end2 = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5452   OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5453   JUMPHERE(end2);
5454   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
5455   add_jump(compiler, &common->abort, CMP(SLJIT_LESS, TMP2, 0, STR_PTR, 0));
5456   JUMPHERE(end);
5457   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, TMP2, 0);
5458   }
5459 
5460 start = JUMP(SLJIT_JUMP);
5461 
5462 if (newlinecheck)
5463   {
5464   newlinelabel = LABEL();
5465   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5466   end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5467   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5468   OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
5469   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
5470 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5471   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5472 #endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5473   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5474   end2 = JUMP(SLJIT_JUMP);
5475   }
5476 
5477 mainloop = LABEL();
5478 
5479 /* Increasing the STR_PTR here requires one less jump in the most common case. */
5480 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5481 if (common->utf && !common->invalid_utf) readuchar = TRUE;
5482 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5483 if (newlinecheck) readuchar = TRUE;
5484 
5485 if (readuchar)
5486   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5487 
5488 if (newlinecheck)
5489   CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
5490 
5491 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5492 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5493 #if PCRE2_CODE_UNIT_WIDTH == 8
5494 if (common->invalid_utf)
5495   {
5496   /* Skip continuation code units. */
5497   loop = LABEL();
5498   jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5499   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5500   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5501   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5502   CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x40, loop);
5503   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5504   JUMPHERE(jump);
5505   }
5506 else if (common->utf)
5507   {
5508   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5509   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5510   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5511   JUMPHERE(jump);
5512   }
5513 #elif PCRE2_CODE_UNIT_WIDTH == 16
5514 if (common->invalid_utf)
5515   {
5516   /* Skip continuation code units. */
5517   loop = LABEL();
5518   jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5519   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5520   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5521   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5522   CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400, loop);
5523   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5524   JUMPHERE(jump);
5525   }
5526 else if (common->utf)
5527   {
5528   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5529 
5530   if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
5531     {
5532     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5533     OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5534     SELECT(SLJIT_LESS, STR_PTR, TMP2, 0, STR_PTR);
5535     }
5536   else
5537     {
5538     OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5539     OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
5540     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5541     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5542     }
5543   }
5544 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
5545 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5546 JUMPHERE(start);
5547 
5548 if (newlinecheck)
5549   {
5550   JUMPHERE(end);
5551   JUMPHERE(end2);
5552   }
5553 
5554 return mainloop;
5555 }
5556 
5557 
add_prefix_char(PCRE2_UCHAR chr,fast_forward_char_data * chars,BOOL last)5558 static SLJIT_INLINE void add_prefix_char(PCRE2_UCHAR chr, fast_forward_char_data *chars, BOOL last)
5559 {
5560 sljit_u32 i, count = chars->count;
5561 
5562 if (count == 255)
5563   return;
5564 
5565 if (count == 0)
5566   {
5567   chars->count = 1;
5568   chars->chars[0] = chr;
5569 
5570   if (last)
5571     chars->last_count = 1;
5572   return;
5573   }
5574 
5575 for (i = 0; i < count; i++)
5576   if (chars->chars[i] == chr)
5577     return;
5578 
5579 if (count >= MAX_DIFF_CHARS)
5580   {
5581   chars->count = 255;
5582   return;
5583   }
5584 
5585 chars->chars[count] = chr;
5586 chars->count = count + 1;
5587 
5588 if (last)
5589   chars->last_count++;
5590 }
5591 
scan_prefix(compiler_common * common,PCRE2_SPTR cc,fast_forward_char_data * chars,int max_chars,sljit_u32 * rec_count)5592 static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars, int max_chars, sljit_u32 *rec_count)
5593 {
5594 /* Recursive function, which scans prefix literals. */
5595 BOOL last, any, class, caseless;
5596 int len, repeat, len_save, consumed = 0;
5597 sljit_u32 chr; /* Any unicode character. */
5598 sljit_u8 *bytes, *bytes_end, byte;
5599 PCRE2_SPTR alternative, cc_save, oc;
5600 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5601 PCRE2_UCHAR othercase[4];
5602 #elif defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
5603 PCRE2_UCHAR othercase[2];
5604 #else
5605 PCRE2_UCHAR othercase[1];
5606 #endif
5607 
5608 repeat = 1;
5609 while (TRUE)
5610   {
5611   if (*rec_count == 0)
5612     return 0;
5613   (*rec_count)--;
5614 
5615   last = TRUE;
5616   any = FALSE;
5617   class = FALSE;
5618   caseless = FALSE;
5619 
5620   switch (*cc)
5621     {
5622     case OP_CHARI:
5623     caseless = TRUE;
5624     /* Fall through */
5625     case OP_CHAR:
5626     last = FALSE;
5627     cc++;
5628     break;
5629 
5630     case OP_SOD:
5631     case OP_SOM:
5632     case OP_SET_SOM:
5633     case OP_NOT_WORD_BOUNDARY:
5634     case OP_WORD_BOUNDARY:
5635     case OP_EODN:
5636     case OP_EOD:
5637     case OP_CIRC:
5638     case OP_CIRCM:
5639     case OP_DOLL:
5640     case OP_DOLLM:
5641     case OP_NOT_UCP_WORD_BOUNDARY:
5642     case OP_UCP_WORD_BOUNDARY:
5643     /* Zero width assertions. */
5644     cc++;
5645     continue;
5646 
5647     case OP_ASSERT:
5648     case OP_ASSERT_NOT:
5649     case OP_ASSERTBACK:
5650     case OP_ASSERTBACK_NOT:
5651     case OP_ASSERT_NA:
5652     case OP_ASSERTBACK_NA:
5653     cc = bracketend(cc);
5654     continue;
5655 
5656     case OP_PLUSI:
5657     case OP_MINPLUSI:
5658     case OP_POSPLUSI:
5659     caseless = TRUE;
5660     /* Fall through */
5661     case OP_PLUS:
5662     case OP_MINPLUS:
5663     case OP_POSPLUS:
5664     cc++;
5665     break;
5666 
5667     case OP_EXACTI:
5668     caseless = TRUE;
5669     /* Fall through */
5670     case OP_EXACT:
5671     repeat = GET2(cc, 1);
5672     last = FALSE;
5673     cc += 1 + IMM2_SIZE;
5674     break;
5675 
5676     case OP_QUERYI:
5677     case OP_MINQUERYI:
5678     case OP_POSQUERYI:
5679     caseless = TRUE;
5680     /* Fall through */
5681     case OP_QUERY:
5682     case OP_MINQUERY:
5683     case OP_POSQUERY:
5684     len = 1;
5685     cc++;
5686 #ifdef SUPPORT_UNICODE
5687     if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5688 #endif
5689     max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
5690     if (max_chars == 0)
5691       return consumed;
5692     last = FALSE;
5693     break;
5694 
5695     case OP_KET:
5696     cc += 1 + LINK_SIZE;
5697     continue;
5698 
5699     case OP_ALT:
5700     cc += GET(cc, 1);
5701     continue;
5702 
5703     case OP_ONCE:
5704     case OP_BRA:
5705     case OP_BRAPOS:
5706     case OP_CBRA:
5707     case OP_CBRAPOS:
5708     alternative = cc + GET(cc, 1);
5709     while (*alternative == OP_ALT)
5710       {
5711       max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
5712       if (max_chars == 0)
5713         return consumed;
5714       alternative += GET(alternative, 1);
5715       }
5716 
5717     if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
5718       cc += IMM2_SIZE;
5719     cc += 1 + LINK_SIZE;
5720     continue;
5721 
5722     case OP_CLASS:
5723 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5724     if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
5725       return consumed;
5726 #endif
5727     class = TRUE;
5728     break;
5729 
5730     case OP_NCLASS:
5731 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5732     if (common->utf) return consumed;
5733 #endif
5734     class = TRUE;
5735     break;
5736 
5737 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
5738     case OP_XCLASS:
5739 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5740     if (common->utf) return consumed;
5741 #endif
5742     any = TRUE;
5743     cc += GET(cc, 1);
5744     break;
5745 #endif
5746 
5747     case OP_DIGIT:
5748 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5749     if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
5750       return consumed;
5751 #endif
5752     any = TRUE;
5753     cc++;
5754     break;
5755 
5756     case OP_WHITESPACE:
5757 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5758     if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
5759       return consumed;
5760 #endif
5761     any = TRUE;
5762     cc++;
5763     break;
5764 
5765     case OP_WORDCHAR:
5766 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5767     if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
5768       return consumed;
5769 #endif
5770     any = TRUE;
5771     cc++;
5772     break;
5773 
5774     case OP_NOT:
5775     case OP_NOTI:
5776     cc++;
5777     /* Fall through. */
5778     case OP_NOT_DIGIT:
5779     case OP_NOT_WHITESPACE:
5780     case OP_NOT_WORDCHAR:
5781     case OP_ANY:
5782     case OP_ALLANY:
5783 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5784     if (common->utf) return consumed;
5785 #endif
5786     any = TRUE;
5787     cc++;
5788     break;
5789 
5790 #ifdef SUPPORT_UNICODE
5791     case OP_NOTPROP:
5792     case OP_PROP:
5793 #if PCRE2_CODE_UNIT_WIDTH != 32
5794     if (common->utf) return consumed;
5795 #endif
5796     any = TRUE;
5797     cc += 1 + 2;
5798     break;
5799 #endif
5800 
5801     case OP_TYPEEXACT:
5802     repeat = GET2(cc, 1);
5803     cc += 1 + IMM2_SIZE;
5804     continue;
5805 
5806     case OP_NOTEXACT:
5807     case OP_NOTEXACTI:
5808 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5809     if (common->utf) return consumed;
5810 #endif
5811     any = TRUE;
5812     repeat = GET2(cc, 1);
5813     cc += 1 + IMM2_SIZE + 1;
5814     break;
5815 
5816     default:
5817     return consumed;
5818     }
5819 
5820   if (any)
5821     {
5822     do
5823       {
5824       chars->count = 255;
5825 
5826       consumed++;
5827       if (--max_chars == 0)
5828         return consumed;
5829       chars++;
5830       }
5831     while (--repeat > 0);
5832 
5833     repeat = 1;
5834     continue;
5835     }
5836 
5837   if (class)
5838     {
5839     bytes = (sljit_u8*) (cc + 1);
5840     cc += 1 + 32 / sizeof(PCRE2_UCHAR);
5841 
5842     switch (*cc)
5843       {
5844       case OP_CRSTAR:
5845       case OP_CRMINSTAR:
5846       case OP_CRPOSSTAR:
5847       case OP_CRQUERY:
5848       case OP_CRMINQUERY:
5849       case OP_CRPOSQUERY:
5850       max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
5851       if (max_chars == 0)
5852         return consumed;
5853       break;
5854 
5855       default:
5856       case OP_CRPLUS:
5857       case OP_CRMINPLUS:
5858       case OP_CRPOSPLUS:
5859       break;
5860 
5861       case OP_CRRANGE:
5862       case OP_CRMINRANGE:
5863       case OP_CRPOSRANGE:
5864       repeat = GET2(cc, 1);
5865       if (repeat <= 0)
5866         return consumed;
5867       break;
5868       }
5869 
5870     do
5871       {
5872       if (bytes[31] & 0x80)
5873         chars->count = 255;
5874       else if (chars->count != 255)
5875         {
5876         bytes_end = bytes + 32;
5877         chr = 0;
5878         do
5879           {
5880           byte = *bytes++;
5881           SLJIT_ASSERT((chr & 0x7) == 0);
5882           if (byte == 0)
5883             chr += 8;
5884           else
5885             {
5886             do
5887               {
5888               if ((byte & 0x1) != 0)
5889                 add_prefix_char(chr, chars, TRUE);
5890               byte >>= 1;
5891               chr++;
5892               }
5893             while (byte != 0);
5894             chr = (chr + 7) & ~7;
5895             }
5896           }
5897         while (chars->count != 255 && bytes < bytes_end);
5898         bytes = bytes_end - 32;
5899         }
5900 
5901       consumed++;
5902       if (--max_chars == 0)
5903         return consumed;
5904       chars++;
5905       }
5906     while (--repeat > 0);
5907 
5908     switch (*cc)
5909       {
5910       case OP_CRSTAR:
5911       case OP_CRMINSTAR:
5912       case OP_CRPOSSTAR:
5913       return consumed;
5914 
5915       case OP_CRQUERY:
5916       case OP_CRMINQUERY:
5917       case OP_CRPOSQUERY:
5918       cc++;
5919       break;
5920 
5921       case OP_CRRANGE:
5922       case OP_CRMINRANGE:
5923       case OP_CRPOSRANGE:
5924       if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
5925         return consumed;
5926       cc += 1 + 2 * IMM2_SIZE;
5927       break;
5928       }
5929 
5930     repeat = 1;
5931     continue;
5932     }
5933 
5934   len = 1;
5935 #ifdef SUPPORT_UNICODE
5936   if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5937 #endif
5938 
5939   if (caseless && char_has_othercase(common, cc))
5940     {
5941 #ifdef SUPPORT_UNICODE
5942     if (common->utf)
5943       {
5944       GETCHAR(chr, cc);
5945       if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
5946         return consumed;
5947       }
5948     else
5949 #endif
5950       {
5951       chr = *cc;
5952 #ifdef SUPPORT_UNICODE
5953       if (common->ucp && chr > 127)
5954         othercase[0] = UCD_OTHERCASE(chr);
5955       else
5956 #endif
5957         othercase[0] = TABLE_GET(chr, common->fcc, chr);
5958       }
5959     }
5960   else
5961     {
5962     caseless = FALSE;
5963     othercase[0] = 0; /* Stops compiler warning - PH */
5964     }
5965 
5966   len_save = len;
5967   cc_save = cc;
5968   while (TRUE)
5969     {
5970     oc = othercase;
5971     do
5972       {
5973       len--;
5974       consumed++;
5975 
5976       chr = *cc;
5977       add_prefix_char(*cc, chars, len == 0);
5978 
5979       if (caseless)
5980         add_prefix_char(*oc, chars, len == 0);
5981 
5982       if (--max_chars == 0)
5983         return consumed;
5984       chars++;
5985       cc++;
5986       oc++;
5987       }
5988     while (len > 0);
5989 
5990     if (--repeat == 0)
5991       break;
5992 
5993     len = len_save;
5994     cc = cc_save;
5995     }
5996 
5997   repeat = 1;
5998   if (last)
5999     return consumed;
6000   }
6001 }
6002 
6003 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
jumpto_if_not_utf_char_start(struct sljit_compiler * compiler,sljit_s32 reg,struct sljit_label * label)6004 static void jumpto_if_not_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg, struct sljit_label *label)
6005 {
6006 #if PCRE2_CODE_UNIT_WIDTH == 8
6007 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
6008 CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0x80, label);
6009 #elif PCRE2_CODE_UNIT_WIDTH == 16
6010 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
6011 CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00, label);
6012 #else
6013 #error "Unknown code width"
6014 #endif
6015 }
6016 #endif
6017 
6018 #include "pcre2_jit_simd_inc.h"
6019 
6020 #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6021 
check_fast_forward_char_pair_simd(compiler_common * common,fast_forward_char_data * chars,int max)6022 static BOOL check_fast_forward_char_pair_simd(compiler_common *common, fast_forward_char_data *chars, int max)
6023 {
6024   sljit_s32 i, j, max_i = 0, max_j = 0;
6025   sljit_u32 max_pri = 0;
6026   sljit_s32 max_offset = max_fast_forward_char_pair_offset();
6027   PCRE2_UCHAR a1, a2, a_pri, b1, b2, b_pri;
6028 
6029   for (i = max - 1; i >= 1; i--)
6030     {
6031     if (chars[i].last_count > 2)
6032       {
6033       a1 = chars[i].chars[0];
6034       a2 = chars[i].chars[1];
6035       a_pri = chars[i].last_count;
6036 
6037       j = i - max_offset;
6038       if (j < 0)
6039         j = 0;
6040 
6041       while (j < i)
6042         {
6043         b_pri = chars[j].last_count;
6044         if (b_pri > 2 && (sljit_u32)a_pri + (sljit_u32)b_pri >= max_pri)
6045           {
6046           b1 = chars[j].chars[0];
6047           b2 = chars[j].chars[1];
6048 
6049           if (a1 != b1 && a1 != b2 && a2 != b1 && a2 != b2)
6050             {
6051             max_pri = a_pri + b_pri;
6052             max_i = i;
6053             max_j = j;
6054             }
6055           }
6056         j++;
6057         }
6058       }
6059     }
6060 
6061 if (max_pri == 0)
6062   return FALSE;
6063 
6064 fast_forward_char_pair_simd(common, max_i, chars[max_i].chars[0], chars[max_i].chars[1], max_j, chars[max_j].chars[0], chars[max_j].chars[1]);
6065 return TRUE;
6066 }
6067 
6068 #endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
6069 
fast_forward_first_char2(compiler_common * common,PCRE2_UCHAR char1,PCRE2_UCHAR char2,sljit_s32 offset)6070 static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
6071 {
6072 DEFINE_COMPILER;
6073 struct sljit_label *start;
6074 struct sljit_jump *match;
6075 struct sljit_jump *partial_quit;
6076 PCRE2_UCHAR mask;
6077 BOOL has_match_end = (common->match_end_ptr != 0);
6078 
6079 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);
6080 
6081 if (has_match_end)
6082   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6083 
6084 if (offset > 0)
6085   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
6086 
6087 if (has_match_end)
6088   {
6089   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6090 
6091   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
6092   OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
6093   SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);
6094   }
6095 
6096 #ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
6097 
6098 if (JIT_HAS_FAST_FORWARD_CHAR_SIMD)
6099   {
6100   fast_forward_char_simd(common, char1, char2, offset);
6101 
6102   if (offset > 0)
6103     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
6104 
6105   if (has_match_end)
6106     OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6107   return;
6108   }
6109 
6110 #endif
6111 
6112 start = LABEL();
6113 
6114 partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6115 if (common->mode == PCRE2_JIT_COMPLETE)
6116   add_jump(compiler, &common->failed_match, partial_quit);
6117 
6118 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6119 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6120 
6121 if (char1 == char2)
6122   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1, start);
6123 else
6124   {
6125   mask = char1 ^ char2;
6126   if (is_powerof2(mask))
6127     {
6128     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
6129     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask, start);
6130     }
6131   else
6132     {
6133     match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
6134     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char2, start);
6135     JUMPHERE(match);
6136     }
6137   }
6138 
6139 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6140 if (common->utf && offset > 0)
6141   {
6142   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-(offset + 1)));
6143   jumpto_if_not_utf_char_start(compiler, TMP1, start);
6144   }
6145 #endif
6146 
6147 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
6148 
6149 if (common->mode != PCRE2_JIT_COMPLETE)
6150   JUMPHERE(partial_quit);
6151 
6152 if (has_match_end)
6153   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6154 }
6155 
fast_forward_first_n_chars(compiler_common * common)6156 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
6157 {
6158 DEFINE_COMPILER;
6159 struct sljit_label *start;
6160 struct sljit_jump *match;
6161 fast_forward_char_data chars[MAX_N_CHARS];
6162 sljit_s32 offset;
6163 PCRE2_UCHAR mask;
6164 PCRE2_UCHAR *char_set, *char_set_end;
6165 int i, max, from;
6166 int range_right = -1, range_len;
6167 sljit_u8 *update_table = NULL;
6168 BOOL in_range;
6169 sljit_u32 rec_count;
6170 
6171 for (i = 0; i < MAX_N_CHARS; i++)
6172   {
6173   chars[i].count = 0;
6174   chars[i].last_count = 0;
6175   }
6176 
6177 rec_count = 10000;
6178 max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
6179 
6180 if (max < 1)
6181   return FALSE;
6182 
6183 /* Convert last_count to priority. */
6184 for (i = 0; i < max; i++)
6185   {
6186   SLJIT_ASSERT(chars[i].count > 0 && chars[i].last_count <= chars[i].count);
6187 
6188   if (chars[i].count == 1)
6189     {
6190     chars[i].last_count = (chars[i].last_count == 1) ? 7 : 5;
6191     /* Simplifies algorithms later. */
6192     chars[i].chars[1] = chars[i].chars[0];
6193     }
6194   else if (chars[i].count == 2)
6195     {
6196     SLJIT_ASSERT(chars[i].chars[0] != chars[i].chars[1]);
6197 
6198     if (is_powerof2(chars[i].chars[0] ^ chars[i].chars[1]))
6199       chars[i].last_count = (chars[i].last_count == 2) ? 6 : 4;
6200     else
6201       chars[i].last_count = (chars[i].last_count == 2) ? 3 : 2;
6202     }
6203   else
6204     chars[i].last_count = (chars[i].count == 255) ? 0 : 1;
6205   }
6206 
6207 #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6208 if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && check_fast_forward_char_pair_simd(common, chars, max))
6209   return TRUE;
6210 #endif
6211 
6212 in_range = FALSE;
6213 /* Prevent compiler "uninitialized" warning */
6214 from = 0;
6215 range_len = 4 /* minimum length */ - 1;
6216 for (i = 0; i <= max; i++)
6217   {
6218   if (in_range && (i - from) > range_len && (chars[i - 1].count < 255))
6219     {
6220     range_len = i - from;
6221     range_right = i - 1;
6222     }
6223 
6224   if (i < max && chars[i].count < 255)
6225     {
6226     SLJIT_ASSERT(chars[i].count > 0);
6227     if (!in_range)
6228       {
6229       in_range = TRUE;
6230       from = i;
6231       }
6232     }
6233   else
6234     in_range = FALSE;
6235   }
6236 
6237 if (range_right >= 0)
6238   {
6239   update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
6240   if (update_table == NULL)
6241     return TRUE;
6242   memset(update_table, IN_UCHARS(range_len), 256);
6243 
6244   for (i = 0; i < range_len; i++)
6245     {
6246     SLJIT_ASSERT(chars[range_right - i].count > 0 && chars[range_right - i].count < 255);
6247 
6248     char_set = chars[range_right - i].chars;
6249     char_set_end = char_set + chars[range_right - i].count;
6250     do
6251       {
6252       if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
6253         update_table[(*char_set) & 0xff] = IN_UCHARS(i);
6254       char_set++;
6255       }
6256     while (char_set < char_set_end);
6257     }
6258   }
6259 
6260 offset = -1;
6261 /* Scan forward. */
6262 for (i = 0; i < max; i++)
6263   {
6264   if (range_right == i)
6265     continue;
6266 
6267   if (offset == -1)
6268     {
6269     if (chars[i].last_count >= 2)
6270       offset = i;
6271     }
6272   else if (chars[offset].last_count < chars[i].last_count)
6273     offset = i;
6274   }
6275 
6276 SLJIT_ASSERT(offset == -1 || (chars[offset].count >= 1 && chars[offset].count <= 2));
6277 
6278 if (range_right < 0)
6279   {
6280   if (offset < 0)
6281     return FALSE;
6282   /* Works regardless the value is 1 or 2. */
6283   fast_forward_first_char2(common, chars[offset].chars[0], chars[offset].chars[1], offset);
6284   return TRUE;
6285   }
6286 
6287 SLJIT_ASSERT(range_right != offset);
6288 
6289 if (common->match_end_ptr != 0)
6290   {
6291   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6292   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6293   OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6294   add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6295   OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
6296   SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);
6297   }
6298 else
6299   {
6300   OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6301   add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6302   }
6303 
6304 SLJIT_ASSERT(range_right >= 0);
6305 
6306 if (!HAS_VIRTUAL_REGISTERS)
6307   OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
6308 
6309 start = LABEL();
6310 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6311 
6312 #if PCRE2_CODE_UNIT_WIDTH == 8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
6313 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
6314 #else
6315 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
6316 #endif
6317 
6318 if (!HAS_VIRTUAL_REGISTERS)
6319   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
6320 else
6321   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
6322 
6323 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6324 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
6325 
6326 if (offset >= 0)
6327   {
6328   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
6329   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6330 
6331   if (chars[offset].count == 1)
6332     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0], start);
6333   else
6334     {
6335     mask = chars[offset].chars[0] ^ chars[offset].chars[1];
6336     if (is_powerof2(mask))
6337       {
6338       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
6339       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0] | mask, start);
6340       }
6341     else
6342       {
6343       match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0]);
6344       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[1], start);
6345       JUMPHERE(match);
6346       }
6347     }
6348   }
6349 
6350 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6351 if (common->utf && offset != 0)
6352   {
6353   if (offset < 0)
6354     {
6355     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6356     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6357     }
6358   else
6359     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6360 
6361   jumpto_if_not_utf_char_start(compiler, TMP1, start);
6362 
6363   if (offset < 0)
6364     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6365   }
6366 #endif
6367 
6368 if (offset >= 0)
6369   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6370 
6371 if (common->match_end_ptr != 0)
6372   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6373 else
6374   OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6375 return TRUE;
6376 }
6377 
fast_forward_first_char(compiler_common * common)6378 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common)
6379 {
6380 PCRE2_UCHAR first_char = (PCRE2_UCHAR)(common->re->first_codeunit);
6381 PCRE2_UCHAR oc;
6382 
6383 oc = first_char;
6384 if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0)
6385   {
6386   oc = TABLE_GET(first_char, common->fcc, first_char);
6387 #if defined SUPPORT_UNICODE
6388   if (first_char > 127 && (common->utf || common->ucp))
6389     oc = UCD_OTHERCASE(first_char);
6390 #endif
6391   }
6392 
6393 fast_forward_first_char2(common, first_char, oc, 0);
6394 }
6395 
fast_forward_newline(compiler_common * common)6396 static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
6397 {
6398 DEFINE_COMPILER;
6399 struct sljit_label *loop;
6400 struct sljit_jump *lastchar = NULL;
6401 struct sljit_jump *firstchar;
6402 struct sljit_jump *quit = NULL;
6403 struct sljit_jump *foundcr = NULL;
6404 struct sljit_jump *notfoundnl;
6405 jump_list *newline = NULL;
6406 
6407 if (common->match_end_ptr != 0)
6408   {
6409   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6410   OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6411   }
6412 
6413 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6414   {
6415 #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6416   if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && common->mode == PCRE2_JIT_COMPLETE)
6417     {
6418     if (HAS_VIRTUAL_REGISTERS)
6419       {
6420       OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6421       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6422       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6423       }
6424     else
6425       {
6426       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6427       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6428       }
6429     firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6430 
6431     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6432     OP2U(SLJIT_SUB | SLJIT_SET_Z, STR_PTR, 0, TMP1, 0);
6433     OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_NOT_EQUAL);
6434 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6435     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6436 #endif
6437     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6438 
6439     fast_forward_char_pair_simd(common, 1, common->newline & 0xff, common->newline & 0xff, 0, (common->newline >> 8) & 0xff, (common->newline >> 8) & 0xff);
6440     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6441     }
6442   else
6443 #endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
6444     {
6445     lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6446     if (HAS_VIRTUAL_REGISTERS)
6447       {
6448       OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6449       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6450       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6451       }
6452     else
6453       {
6454       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6455       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6456       }
6457     firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6458 
6459     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
6460     OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, STR_PTR, 0, TMP1, 0);
6461     OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
6462 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6463     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
6464 #endif
6465     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6466 
6467     loop = LABEL();
6468     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6469     quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6470     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6471     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6472     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
6473     CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
6474 
6475     JUMPHERE(quit);
6476     JUMPHERE(lastchar);
6477     }
6478 
6479   JUMPHERE(firstchar);
6480 
6481   if (common->match_end_ptr != 0)
6482     OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6483   return;
6484   }
6485 
6486 if (HAS_VIRTUAL_REGISTERS)
6487   {
6488   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6489   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6490   }
6491 else
6492   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6493 
6494 /* Example: match /^/ to \r\n from offset 1. */
6495 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6496 
6497 if (common->nltype == NLTYPE_ANY)
6498   move_back(common, NULL, FALSE);
6499 else
6500   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6501 
6502 loop = LABEL();
6503 common->ff_newline_shortcut = loop;
6504 
6505 #ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
6506 if (JIT_HAS_FAST_FORWARD_CHAR_SIMD && (common->nltype == NLTYPE_FIXED || common->nltype == NLTYPE_ANYCRLF))
6507   {
6508   if (common->nltype == NLTYPE_ANYCRLF)
6509     {
6510     fast_forward_char_simd(common, CHAR_CR, CHAR_LF, 0);
6511     if (common->mode != PCRE2_JIT_COMPLETE)
6512       lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6513 
6514     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6515     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6516     quit = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6517     }
6518    else
6519     {
6520     fast_forward_char_simd(common, common->newline, common->newline, 0);
6521 
6522     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6523     if (common->mode != PCRE2_JIT_COMPLETE)
6524       {
6525       OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
6526       SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
6527       }
6528     }
6529   }
6530 else
6531 #endif /* JIT_HAS_FAST_FORWARD_CHAR_SIMD */
6532   {
6533   read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
6534   lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6535   if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6536     foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6537   check_newlinechar(common, common->nltype, &newline, FALSE);
6538   set_jumps(newline, loop);
6539   }
6540 
6541 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6542   {
6543   if (quit == NULL)
6544     {
6545     quit = JUMP(SLJIT_JUMP);
6546     JUMPHERE(foundcr);
6547     }
6548 
6549   notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6550   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6551   OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NL);
6552   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
6553 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6554   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6555 #endif
6556   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6557   JUMPHERE(notfoundnl);
6558   JUMPHERE(quit);
6559   }
6560 
6561 if (lastchar)
6562   JUMPHERE(lastchar);
6563 JUMPHERE(firstchar);
6564 
6565 if (common->match_end_ptr != 0)
6566   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6567 }
6568 
6569 static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
6570 
fast_forward_start_bits(compiler_common * common)6571 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common)
6572 {
6573 DEFINE_COMPILER;
6574 const sljit_u8 *start_bits = common->re->start_bitmap;
6575 struct sljit_label *start;
6576 struct sljit_jump *partial_quit;
6577 #if PCRE2_CODE_UNIT_WIDTH != 8
6578 struct sljit_jump *found = NULL;
6579 #endif
6580 jump_list *matches = NULL;
6581 
6582 if (common->match_end_ptr != 0)
6583   {
6584   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6585   OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
6586   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6587   OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
6588   SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);
6589   }
6590 
6591 start = LABEL();
6592 
6593 partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6594 if (common->mode == PCRE2_JIT_COMPLETE)
6595   add_jump(compiler, &common->failed_match, partial_quit);
6596 
6597 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6598 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6599 
6600 if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &matches))
6601   {
6602 #if PCRE2_CODE_UNIT_WIDTH != 8
6603   if ((start_bits[31] & 0x80) != 0)
6604     found = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255);
6605   else
6606     CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255, start);
6607 #elif defined SUPPORT_UNICODE
6608   if (common->utf && is_char7_bitset(start_bits, FALSE))
6609     CMPTO(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 127, start);
6610 #endif
6611   OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6612   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6613   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
6614   if (!HAS_VIRTUAL_REGISTERS)
6615     {
6616     OP2(SLJIT_SHL, TMP3, 0, SLJIT_IMM, 1, TMP2, 0);
6617     OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP3, 0);
6618     }
6619   else
6620     {
6621     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6622     OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
6623     }
6624   JUMPTO(SLJIT_ZERO, start);
6625   }
6626 else
6627   set_jumps(matches, start);
6628 
6629 #if PCRE2_CODE_UNIT_WIDTH != 8
6630 if (found != NULL)
6631   JUMPHERE(found);
6632 #endif
6633 
6634 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6635 
6636 if (common->mode != PCRE2_JIT_COMPLETE)
6637   JUMPHERE(partial_quit);
6638 
6639 if (common->match_end_ptr != 0)
6640   OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
6641 }
6642 
search_requested_char(compiler_common * common,PCRE2_UCHAR req_char,BOOL caseless,BOOL has_firstchar)6643 static SLJIT_INLINE jump_list *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar)
6644 {
6645 DEFINE_COMPILER;
6646 struct sljit_label *loop;
6647 struct sljit_jump *toolong;
6648 struct sljit_jump *already_found;
6649 struct sljit_jump *found;
6650 struct sljit_jump *found_oc = NULL;
6651 jump_list *not_found = NULL;
6652 sljit_u32 oc, bit;
6653 
6654 SLJIT_ASSERT(common->req_char_ptr != 0);
6655 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(REQ_CU_MAX) * 100);
6656 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
6657 toolong = CMP(SLJIT_LESS, TMP2, 0, STR_END, 0);
6658 already_found = CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0);
6659 
6660 if (has_firstchar)
6661   OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6662 else
6663   OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
6664 
6665 oc = req_char;
6666 if (caseless)
6667   {
6668   oc = TABLE_GET(req_char, common->fcc, req_char);
6669 #if defined SUPPORT_UNICODE
6670   if (req_char > 127 && (common->utf || common->ucp))
6671     oc = UCD_OTHERCASE(req_char);
6672 #endif
6673   }
6674 
6675 #ifdef JIT_HAS_FAST_REQUESTED_CHAR_SIMD
6676 if (JIT_HAS_FAST_REQUESTED_CHAR_SIMD)
6677   {
6678   not_found = fast_requested_char_simd(common, req_char, oc);
6679   }
6680 else
6681 #endif
6682   {
6683   loop = LABEL();
6684   add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
6685 
6686   OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
6687 
6688   if (req_char == oc)
6689     found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6690   else
6691     {
6692     bit = req_char ^ oc;
6693     if (is_powerof2(bit))
6694       {
6695        OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
6696       found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
6697       }
6698     else
6699       {
6700       found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6701       found_oc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
6702       }
6703     }
6704   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6705   JUMPTO(SLJIT_JUMP, loop);
6706 
6707   JUMPHERE(found);
6708   if (found_oc)
6709     JUMPHERE(found_oc);
6710   }
6711 
6712 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
6713 
6714 JUMPHERE(already_found);
6715 JUMPHERE(toolong);
6716 return not_found;
6717 }
6718 
do_revertframes(compiler_common * common)6719 static void do_revertframes(compiler_common *common)
6720 {
6721 DEFINE_COMPILER;
6722 struct sljit_jump *jump;
6723 struct sljit_label *mainloop;
6724 
6725 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
6726 GET_LOCAL_BASE(TMP1, 0, 0);
6727 
6728 /* Drop frames until we reach STACK_TOP. */
6729 mainloop = LABEL();
6730 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -SSIZE_OF(sw));
6731 OP2U(SLJIT_SUB | SLJIT_SET_SIG_LESS_EQUAL | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0);
6732 jump = JUMP(SLJIT_SIG_LESS_EQUAL);
6733 
6734 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6735 if (HAS_VIRTUAL_REGISTERS)
6736   {
6737   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6738   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw)));
6739   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw));
6740   }
6741 else
6742   {
6743   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6744   OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw)));
6745   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw));
6746   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
6747   GET_LOCAL_BASE(TMP1, 0, 0);
6748   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0);
6749   }
6750 JUMPTO(SLJIT_JUMP, mainloop);
6751 
6752 JUMPHERE(jump);
6753 sljit_set_current_flags(compiler, SLJIT_CURRENT_FLAGS_SUB | SLJIT_CURRENT_FLAGS_COMPARE | SLJIT_SET_SIG_LESS_EQUAL | SLJIT_SET_Z);
6754 jump = JUMP(SLJIT_NOT_ZERO /* SIG_LESS */);
6755 /* End of reverting values. */
6756 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
6757 
6758 JUMPHERE(jump);
6759 OP2(SLJIT_SUB, TMP2, 0, SLJIT_IMM, 0, TMP2, 0);
6760 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6761 if (HAS_VIRTUAL_REGISTERS)
6762   {
6763   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6764   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
6765   }
6766 else
6767   {
6768   OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6769   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
6770   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0);
6771   }
6772 JUMPTO(SLJIT_JUMP, mainloop);
6773 }
6774 
6775 #ifdef SUPPORT_UNICODE
6776 #define UCPCAT(bit) (1 << (bit))
6777 #define UCPCAT2(bit1, bit2) (UCPCAT(bit1) | UCPCAT(bit2))
6778 #define UCPCAT3(bit1, bit2, bit3) (UCPCAT(bit1) | UCPCAT(bit2) | UCPCAT(bit3))
6779 #define UCPCAT_RANGE(start, end) (((1 << ((end) + 1)) - 1) - ((1 << (start)) - 1))
6780 #define UCPCAT_L UCPCAT_RANGE(ucp_Ll, ucp_Lu)
6781 #define UCPCAT_N UCPCAT_RANGE(ucp_Nd, ucp_No)
6782 #define UCPCAT_ALL ((1 << (ucp_Zs + 1)) - 1)
6783 #endif
6784 
check_wordboundary(compiler_common * common,BOOL ucp)6785 static void check_wordboundary(compiler_common *common, BOOL ucp)
6786 {
6787 DEFINE_COMPILER;
6788 struct sljit_jump *skipread;
6789 jump_list *skipread_list = NULL;
6790 #ifdef SUPPORT_UNICODE
6791 struct sljit_label *valid_utf;
6792 jump_list *invalid_utf1 = NULL;
6793 #endif /* SUPPORT_UNICODE */
6794 jump_list *invalid_utf2 = NULL;
6795 #if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE
6796 struct sljit_jump *jump;
6797 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 || SUPPORT_UNICODE */
6798 
6799 SLJIT_UNUSED_ARG(ucp);
6800 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
6801 
6802 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6803 /* Get type of the previous char, and put it to TMP3. */
6804 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6805 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6806 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
6807 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6808 
6809 #ifdef SUPPORT_UNICODE
6810 if (common->invalid_utf)
6811   {
6812   peek_char_back(common, READ_CHAR_MAX, &invalid_utf1);
6813 
6814   if (common->mode != PCRE2_JIT_COMPLETE)
6815     {
6816     OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
6817     OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
6818     move_back(common, NULL, TRUE);
6819     check_start_used_ptr(common);
6820     OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
6821     OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
6822     }
6823   }
6824 else
6825 #endif /* SUPPORT_UNICODE */
6826   {
6827   if (common->mode == PCRE2_JIT_COMPLETE)
6828     peek_char_back(common, READ_CHAR_MAX, NULL);
6829   else
6830     {
6831     move_back(common, NULL, TRUE);
6832     check_start_used_ptr(common);
6833     read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR);
6834     }
6835   }
6836 
6837 /* Testing char type. */
6838 #ifdef SUPPORT_UNICODE
6839 if (ucp)
6840   {
6841   add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6842   OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP1, 0);
6843   OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N);
6844   OP_FLAGS(SLJIT_MOV, TMP3, 0, SLJIT_NOT_ZERO);
6845   }
6846 else
6847 #endif /* SUPPORT_UNICODE */
6848   {
6849 #if PCRE2_CODE_UNIT_WIDTH != 8
6850   jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6851 #elif defined SUPPORT_UNICODE
6852   /* Here TMP3 has already been zeroed. */
6853   jump = NULL;
6854   if (common->utf)
6855     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6856 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6857   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
6858   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
6859   OP2(SLJIT_AND, TMP3, 0, TMP1, 0, SLJIT_IMM, 1);
6860 #if PCRE2_CODE_UNIT_WIDTH != 8
6861   JUMPHERE(jump);
6862 #elif defined SUPPORT_UNICODE
6863   if (jump != NULL)
6864     JUMPHERE(jump);
6865 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6866   }
6867 JUMPHERE(skipread);
6868 
6869 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6870 check_str_end(common, &skipread_list);
6871 peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf2);
6872 
6873 /* Testing char type. This is a code duplication. */
6874 #ifdef SUPPORT_UNICODE
6875 
6876 valid_utf = LABEL();
6877 
6878 if (ucp)
6879   {
6880   add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6881   OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP1, 0);
6882   OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N);
6883   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
6884   }
6885 else
6886 #endif /* SUPPORT_UNICODE */
6887   {
6888 #if PCRE2_CODE_UNIT_WIDTH != 8
6889   /* TMP2 may be destroyed by peek_char. */
6890   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6891   jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6892 #elif defined SUPPORT_UNICODE
6893   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6894   jump = NULL;
6895   if (common->utf)
6896     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6897 #endif
6898   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
6899   OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
6900   OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
6901 #if PCRE2_CODE_UNIT_WIDTH != 8
6902   JUMPHERE(jump);
6903 #elif defined SUPPORT_UNICODE
6904   if (jump != NULL)
6905     JUMPHERE(jump);
6906 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6907   }
6908 set_jumps(skipread_list, LABEL());
6909 
6910 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6911 OP2(SLJIT_XOR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, TMP3, 0);
6912 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6913 
6914 #ifdef SUPPORT_UNICODE
6915 if (common->invalid_utf)
6916   {
6917   set_jumps(invalid_utf1, LABEL());
6918 
6919   peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, NULL);
6920   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR, valid_utf);
6921 
6922   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6923   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, -1);
6924   OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6925 
6926   set_jumps(invalid_utf2, LABEL());
6927   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6928   OP1(SLJIT_MOV, TMP2, 0, TMP3, 0);
6929   OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6930   }
6931 #endif /* SUPPORT_UNICODE */
6932 }
6933 
optimize_class_ranges(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)6934 static BOOL optimize_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6935 {
6936 /* May destroy TMP1. */
6937 DEFINE_COMPILER;
6938 int ranges[MAX_CLASS_RANGE_SIZE];
6939 sljit_u8 bit, cbit, all;
6940 int i, byte, length = 0;
6941 
6942 bit = bits[0] & 0x1;
6943 /* All bits will be zero or one (since bit is zero or one). */
6944 all = -bit;
6945 
6946 for (i = 0; i < 256; )
6947   {
6948   byte = i >> 3;
6949   if ((i & 0x7) == 0 && bits[byte] == all)
6950     i += 8;
6951   else
6952     {
6953     cbit = (bits[byte] >> (i & 0x7)) & 0x1;
6954     if (cbit != bit)
6955       {
6956       if (length >= MAX_CLASS_RANGE_SIZE)
6957         return FALSE;
6958       ranges[length] = i;
6959       length++;
6960       bit = cbit;
6961       all = -cbit;
6962       }
6963     i++;
6964     }
6965   }
6966 
6967 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
6968   {
6969   if (length >= MAX_CLASS_RANGE_SIZE)
6970     return FALSE;
6971   ranges[length] = 256;
6972   length++;
6973   }
6974 
6975 if (length < 0 || length > 4)
6976   return FALSE;
6977 
6978 bit = bits[0] & 0x1;
6979 if (invert) bit ^= 0x1;
6980 
6981 /* No character is accepted. */
6982 if (length == 0 && bit == 0)
6983   add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6984 
6985 switch(length)
6986   {
6987   case 0:
6988   /* When bit != 0, all characters are accepted. */
6989   return TRUE;
6990 
6991   case 1:
6992   add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6993   return TRUE;
6994 
6995   case 2:
6996   if (ranges[0] + 1 != ranges[1])
6997     {
6998     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6999     add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7000     }
7001   else
7002     add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7003   return TRUE;
7004 
7005   case 3:
7006   if (bit != 0)
7007     {
7008     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
7009     if (ranges[0] + 1 != ranges[1])
7010       {
7011       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7012       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7013       }
7014     else
7015       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7016     return TRUE;
7017     }
7018 
7019   add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
7020   if (ranges[1] + 1 != ranges[2])
7021     {
7022     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
7023     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
7024     }
7025   else
7026     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
7027   return TRUE;
7028 
7029   case 4:
7030   if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
7031       && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
7032       && (ranges[1] & (ranges[2] - ranges[0])) == 0
7033       && is_powerof2(ranges[2] - ranges[0]))
7034     {
7035     SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
7036     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
7037     if (ranges[2] + 1 != ranges[3])
7038       {
7039       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
7040       add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
7041       }
7042     else
7043       add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
7044     return TRUE;
7045     }
7046 
7047   if (bit != 0)
7048     {
7049     i = 0;
7050     if (ranges[0] + 1 != ranges[1])
7051       {
7052       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7053       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7054       i = ranges[0];
7055       }
7056     else
7057       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7058 
7059     if (ranges[2] + 1 != ranges[3])
7060       {
7061       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
7062       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
7063       }
7064     else
7065       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
7066     return TRUE;
7067     }
7068 
7069   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7070   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
7071   if (ranges[1] + 1 != ranges[2])
7072     {
7073     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
7074     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
7075     }
7076   else
7077     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7078   return TRUE;
7079 
7080   default:
7081   SLJIT_UNREACHABLE();
7082   return FALSE;
7083   }
7084 }
7085 
optimize_class_chars(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)7086 static BOOL optimize_class_chars(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
7087 {
7088 /* May destroy TMP1. */
7089 DEFINE_COMPILER;
7090 uint16_t char_list[MAX_CLASS_CHARS_SIZE];
7091 uint8_t byte;
7092 sljit_s32 type;
7093 int i, j, k, len, c;
7094 
7095 if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
7096   return FALSE;
7097 
7098 len = 0;
7099 
7100 for (i = 0; i < 32; i++)
7101   {
7102   byte = bits[i];
7103 
7104   if (nclass)
7105     byte = ~byte;
7106 
7107   j = 0;
7108   while (byte != 0)
7109     {
7110     if (byte & 0x1)
7111       {
7112       c = i * 8 + j;
7113 
7114       k = len;
7115 
7116       if ((c & 0x20) != 0)
7117         {
7118         for (k = 0; k < len; k++)
7119           if (char_list[k] == c - 0x20)
7120             {
7121             char_list[k] |= 0x120;
7122             break;
7123             }
7124         }
7125 
7126       if (k == len)
7127         {
7128         if (len >= MAX_CLASS_CHARS_SIZE)
7129           return FALSE;
7130 
7131         char_list[len++] = (uint16_t) c;
7132         }
7133       }
7134 
7135     byte >>= 1;
7136     j++;
7137     }
7138   }
7139 
7140 if (len == 0) return FALSE;  /* Should never occur, but stops analyzers complaining. */
7141 
7142 i = 0;
7143 j = 0;
7144 
7145 if (char_list[0] == 0)
7146   {
7147   i++;
7148   OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0);
7149   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_ZERO);
7150   }
7151 else
7152   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
7153 
7154 while (i < len)
7155   {
7156   if ((char_list[i] & 0x100) != 0)
7157     j++;
7158   else
7159     {
7160     OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i]);
7161     SELECT(SLJIT_ZERO, TMP2, TMP1, 0, TMP2);
7162     }
7163   i++;
7164   }
7165 
7166 if (j != 0)
7167   {
7168   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x20);
7169 
7170   for (i = 0; i < len; i++)
7171     if ((char_list[i] & 0x100) != 0)
7172       {
7173       j--;
7174       OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff);
7175       SELECT(SLJIT_ZERO, TMP2, TMP1, 0, TMP2);
7176       }
7177   }
7178 
7179 if (invert)
7180   nclass = !nclass;
7181 
7182 type = nclass ? SLJIT_NOT_EQUAL : SLJIT_EQUAL;
7183 add_jump(compiler, backtracks, CMP(type, TMP2, 0, SLJIT_IMM, 0));
7184 return TRUE;
7185 }
7186 
optimize_class(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)7187 static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
7188 {
7189 /* May destroy TMP1. */
7190 if (optimize_class_ranges(common, bits, nclass, invert, backtracks))
7191   return TRUE;
7192 return optimize_class_chars(common, bits, nclass, invert, backtracks);
7193 }
7194 
check_anynewline(compiler_common * common)7195 static void check_anynewline(compiler_common *common)
7196 {
7197 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7198 DEFINE_COMPILER;
7199 
7200 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
7201 
7202 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
7203 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
7204 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7205 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
7206 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7207 #if PCRE2_CODE_UNIT_WIDTH == 8
7208 if (common->utf)
7209   {
7210 #endif
7211   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7212   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7213   OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
7214 #if PCRE2_CODE_UNIT_WIDTH == 8
7215   }
7216 #endif
7217 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7218 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7219 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7220 }
7221 
check_hspace(compiler_common * common)7222 static void check_hspace(compiler_common *common)
7223 {
7224 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7225 DEFINE_COMPILER;
7226 
7227 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
7228 
7229 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x09);
7230 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7231 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x20);
7232 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7233 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xa0);
7234 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7235 #if PCRE2_CODE_UNIT_WIDTH == 8
7236 if (common->utf)
7237   {
7238 #endif
7239   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7240   OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x1680);
7241   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7242   OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e);
7243   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7244   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
7245   OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
7246   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7247   OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
7248   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7249   OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
7250   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7251   OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
7252 #if PCRE2_CODE_UNIT_WIDTH == 8
7253   }
7254 #endif
7255 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7256 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7257 
7258 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7259 }
7260 
check_vspace(compiler_common * common)7261 static void check_vspace(compiler_common *common)
7262 {
7263 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7264 DEFINE_COMPILER;
7265 
7266 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
7267 
7268 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
7269 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
7270 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7271 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
7272 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7273 #if PCRE2_CODE_UNIT_WIDTH == 8
7274 if (common->utf)
7275   {
7276 #endif
7277   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7278   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7279   OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
7280 #if PCRE2_CODE_UNIT_WIDTH == 8
7281   }
7282 #endif
7283 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7284 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7285 
7286 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7287 }
7288 
do_casefulcmp(compiler_common * common)7289 static void do_casefulcmp(compiler_common *common)
7290 {
7291 DEFINE_COMPILER;
7292 struct sljit_jump *jump;
7293 struct sljit_label *label;
7294 int char1_reg;
7295 int char2_reg;
7296 
7297 if (HAS_VIRTUAL_REGISTERS)
7298   {
7299   char1_reg = STR_END;
7300   char2_reg = STACK_TOP;
7301   }
7302 else
7303   {
7304   char1_reg = TMP3;
7305   char2_reg = RETURN_ADDR;
7306   }
7307 
7308 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7309 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7310 
7311 if (char1_reg == STR_END)
7312   {
7313   OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0);
7314   OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
7315   }
7316 
7317 if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7318   {
7319   label = LABEL();
7320   sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7321   sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7322   jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7323   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7324   JUMPTO(SLJIT_NOT_ZERO, label);
7325 
7326   JUMPHERE(jump);
7327   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7328   }
7329 else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7330   {
7331   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7332   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7333 
7334   label = LABEL();
7335   sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7336   sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7337   jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7338   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7339   JUMPTO(SLJIT_NOT_ZERO, label);
7340 
7341   JUMPHERE(jump);
7342   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7343   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7344   }
7345 else
7346   {
7347   label = LABEL();
7348   OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7349   OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7350   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7351   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7352   jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7353   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7354   JUMPTO(SLJIT_NOT_ZERO, label);
7355 
7356   JUMPHERE(jump);
7357   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7358   }
7359 
7360 if (char1_reg == STR_END)
7361   {
7362   OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0);
7363   OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);
7364   }
7365 
7366 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7367 }
7368 
do_caselesscmp(compiler_common * common)7369 static void do_caselesscmp(compiler_common *common)
7370 {
7371 DEFINE_COMPILER;
7372 struct sljit_jump *jump;
7373 struct sljit_label *label;
7374 int char1_reg = STR_END;
7375 int char2_reg;
7376 int lcc_table;
7377 int opt_type = 0;
7378 
7379 if (HAS_VIRTUAL_REGISTERS)
7380   {
7381   char2_reg = STACK_TOP;
7382   lcc_table = STACK_LIMIT;
7383   }
7384 else
7385   {
7386   char2_reg = RETURN_ADDR;
7387   lcc_table = TMP3;
7388   }
7389 
7390 if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7391   opt_type = 1;
7392 else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7393   opt_type = 2;
7394 
7395 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7396 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7397 
7398 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0);
7399 
7400 if (char2_reg == STACK_TOP)
7401   {
7402   OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0);
7403   OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0);
7404   }
7405 
7406 OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
7407 
7408 if (opt_type == 1)
7409   {
7410   label = LABEL();
7411   sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7412   sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7413   }
7414 else if (opt_type == 2)
7415   {
7416   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7417   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7418 
7419   label = LABEL();
7420   sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7421   sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7422   }
7423 else
7424   {
7425   label = LABEL();
7426   OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7427   OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7428   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7429   }
7430 
7431 #if PCRE2_CODE_UNIT_WIDTH != 8
7432 jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255);
7433 #endif
7434 OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0);
7435 #if PCRE2_CODE_UNIT_WIDTH != 8
7436 JUMPHERE(jump);
7437 jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255);
7438 #endif
7439 OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0);
7440 #if PCRE2_CODE_UNIT_WIDTH != 8
7441 JUMPHERE(jump);
7442 #endif
7443 
7444 if (opt_type == 0)
7445   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7446 
7447 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7448 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7449 JUMPTO(SLJIT_NOT_ZERO, label);
7450 
7451 JUMPHERE(jump);
7452 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7453 
7454 if (opt_type == 2)
7455   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7456 
7457 if (char2_reg == STACK_TOP)
7458   {
7459   OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0);
7460   OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0);
7461   }
7462 
7463 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
7464 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7465 }
7466 
byte_sequence_compare(compiler_common * common,BOOL caseless,PCRE2_SPTR cc,compare_context * context,jump_list ** backtracks)7467 static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc,
7468     compare_context *context, jump_list **backtracks)
7469 {
7470 DEFINE_COMPILER;
7471 unsigned int othercasebit = 0;
7472 PCRE2_SPTR othercasechar = NULL;
7473 #ifdef SUPPORT_UNICODE
7474 int utflength;
7475 #endif
7476 
7477 if (caseless && char_has_othercase(common, cc))
7478   {
7479   othercasebit = char_get_othercase_bit(common, cc);
7480   SLJIT_ASSERT(othercasebit);
7481   /* Extracting bit difference info. */
7482 #if PCRE2_CODE_UNIT_WIDTH == 8
7483   othercasechar = cc + (othercasebit >> 8);
7484   othercasebit &= 0xff;
7485 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7486   /* Note that this code only handles characters in the BMP. If there
7487   ever are characters outside the BMP whose othercase differs in only one
7488   bit from itself (there currently are none), this code will need to be
7489   revised for PCRE2_CODE_UNIT_WIDTH == 32. */
7490   othercasechar = cc + (othercasebit >> 9);
7491   if ((othercasebit & 0x100) != 0)
7492     othercasebit = (othercasebit & 0xff) << 8;
7493   else
7494     othercasebit &= 0xff;
7495 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7496   }
7497 
7498 if (context->sourcereg == -1)
7499   {
7500 #if PCRE2_CODE_UNIT_WIDTH == 8
7501 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7502   if (context->length >= 4)
7503     OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7504   else if (context->length >= 2)
7505     OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7506   else
7507 #endif
7508     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7509 #elif PCRE2_CODE_UNIT_WIDTH == 16
7510 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7511   if (context->length >= 4)
7512     OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7513   else
7514 #endif
7515     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7516 #elif PCRE2_CODE_UNIT_WIDTH == 32
7517   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7518 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7519   context->sourcereg = TMP2;
7520   }
7521 
7522 #ifdef SUPPORT_UNICODE
7523 utflength = 1;
7524 if (common->utf && HAS_EXTRALEN(*cc))
7525   utflength += GET_EXTRALEN(*cc);
7526 
7527 do
7528   {
7529 #endif
7530 
7531   context->length -= IN_UCHARS(1);
7532 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7533 
7534   /* Unaligned read is supported. */
7535   if (othercasebit != 0 && othercasechar == cc)
7536     {
7537     context->c.asuchars[context->ucharptr] = *cc | othercasebit;
7538     context->oc.asuchars[context->ucharptr] = othercasebit;
7539     }
7540   else
7541     {
7542     context->c.asuchars[context->ucharptr] = *cc;
7543     context->oc.asuchars[context->ucharptr] = 0;
7544     }
7545   context->ucharptr++;
7546 
7547 #if PCRE2_CODE_UNIT_WIDTH == 8
7548   if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
7549 #else
7550   if (context->ucharptr >= 2 || context->length == 0)
7551 #endif
7552     {
7553     if (context->length >= 4)
7554       OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7555     else if (context->length >= 2)
7556       OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7557 #if PCRE2_CODE_UNIT_WIDTH == 8
7558     else if (context->length >= 1)
7559       OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7560 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7561     context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7562 
7563     switch(context->ucharptr)
7564       {
7565       case 4 / sizeof(PCRE2_UCHAR):
7566       if (context->oc.asint != 0)
7567         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
7568       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
7569       break;
7570 
7571       case 2 / sizeof(PCRE2_UCHAR):
7572       if (context->oc.asushort != 0)
7573         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
7574       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
7575       break;
7576 
7577 #if PCRE2_CODE_UNIT_WIDTH == 8
7578       case 1:
7579       if (context->oc.asbyte != 0)
7580         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
7581       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
7582       break;
7583 #endif
7584 
7585       default:
7586       SLJIT_UNREACHABLE();
7587       break;
7588       }
7589     context->ucharptr = 0;
7590     }
7591 
7592 #else
7593 
7594   /* Unaligned read is unsupported or in 32 bit mode. */
7595   if (context->length >= 1)
7596     OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7597 
7598   context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7599 
7600   if (othercasebit != 0 && othercasechar == cc)
7601     {
7602     OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
7603     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
7604     }
7605   else
7606     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
7607 
7608 #endif
7609 
7610   cc++;
7611 #ifdef SUPPORT_UNICODE
7612   utflength--;
7613   }
7614 while (utflength > 0);
7615 #endif
7616 
7617 return cc;
7618 }
7619 
7620 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
7621 
7622 #define SET_CHAR_OFFSET(value) \
7623   if ((value) != charoffset) \
7624     { \
7625     if ((value) < charoffset) \
7626       OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
7627     else \
7628       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
7629     } \
7630   charoffset = (value);
7631 
7632 static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr);
7633 
7634 #ifdef SUPPORT_UNICODE
7635 #define XCLASS_SAVE_CHAR 0x001
7636 #define XCLASS_CHAR_SAVED 0x002
7637 #define XCLASS_HAS_TYPE 0x004
7638 #define XCLASS_HAS_SCRIPT 0x008
7639 #define XCLASS_HAS_SCRIPT_EXTENSION 0x010
7640 #define XCLASS_HAS_BOOL 0x020
7641 #define XCLASS_HAS_BIDICL 0x040
7642 #define XCLASS_NEEDS_UCD (XCLASS_HAS_TYPE | XCLASS_HAS_SCRIPT | XCLASS_HAS_SCRIPT_EXTENSION | XCLASS_HAS_BOOL | XCLASS_HAS_BIDICL)
7643 #define XCLASS_SCRIPT_EXTENSION_NOTPROP 0x080
7644 #define XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR 0x100
7645 #define XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0 0x200
7646 #endif /* SUPPORT_UNICODE */
7647 
compile_xclass_matchingpath(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks)7648 static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
7649 {
7650 DEFINE_COMPILER;
7651 jump_list *found = NULL;
7652 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
7653 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
7654 struct sljit_jump *jump = NULL;
7655 PCRE2_SPTR ccbegin;
7656 int compares, invertcmp, numberofcmps;
7657 #if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7658 BOOL utf = common->utf;
7659 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
7660 
7661 #ifdef SUPPORT_UNICODE
7662 sljit_u32 unicode_status = 0;
7663 sljit_u32 category_list = 0;
7664 sljit_u32 items;
7665 int typereg = TMP1;
7666 const sljit_u32 *other_cases;
7667 #endif /* SUPPORT_UNICODE */
7668 
7669 /* Scanning the necessary info. */
7670 cc++;
7671 ccbegin = cc;
7672 compares = 0;
7673 
7674 if (cc[-1] & XCL_MAP)
7675   {
7676   min = 0;
7677   cc += 32 / sizeof(PCRE2_UCHAR);
7678   }
7679 
7680 while (*cc != XCL_END)
7681   {
7682   compares++;
7683 
7684   if (*cc == XCL_SINGLE)
7685     {
7686     cc ++;
7687     GETCHARINCTEST(c, cc);
7688     if (c > max) max = c;
7689     if (c < min) min = c;
7690 #ifdef SUPPORT_UNICODE
7691     unicode_status |= XCLASS_SAVE_CHAR;
7692 #endif /* SUPPORT_UNICODE */
7693     }
7694   else if (*cc == XCL_RANGE)
7695     {
7696     cc ++;
7697     GETCHARINCTEST(c, cc);
7698     if (c < min) min = c;
7699     GETCHARINCTEST(c, cc);
7700     if (c > max) max = c;
7701 #ifdef SUPPORT_UNICODE
7702     unicode_status |= XCLASS_SAVE_CHAR;
7703 #endif /* SUPPORT_UNICODE */
7704     }
7705 #ifdef SUPPORT_UNICODE
7706   else
7707     {
7708     SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7709     cc++;
7710 
7711     if (*cc == PT_CLIST && cc[-1] == XCL_PROP)
7712       {
7713       other_cases = PRIV(ucd_caseless_sets) + cc[1];
7714       while (*other_cases != NOTACHAR)
7715         {
7716         if (*other_cases > max) max = *other_cases;
7717         if (*other_cases < min) min = *other_cases;
7718         other_cases++;
7719         }
7720       }
7721     else
7722       {
7723       max = READ_CHAR_MAX;
7724       min = 0;
7725       }
7726 
7727     items = 0;
7728 
7729     switch(*cc)
7730       {
7731       case PT_ANY:
7732       /* Any either accepts everything or ignored. */
7733       if (cc[-1] == XCL_PROP)
7734         items = UCPCAT_ALL;
7735       else
7736         compares--;
7737       break;
7738 
7739       case PT_LAMP:
7740       items = UCPCAT3(ucp_Lu, ucp_Ll, ucp_Lt);
7741       break;
7742 
7743       case PT_GC:
7744       items = UCPCAT_RANGE(PRIV(ucp_typerange)[(int)cc[1] * 2], PRIV(ucp_typerange)[(int)cc[1] * 2 + 1]);
7745       break;
7746 
7747       case PT_PC:
7748       items = UCPCAT(cc[1]);
7749       break;
7750 
7751       case PT_WORD:
7752       items = UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N;
7753       break;
7754 
7755       case PT_ALNUM:
7756       items = UCPCAT_L | UCPCAT_N;
7757       break;
7758 
7759       case PT_SCX:
7760       unicode_status |= XCLASS_HAS_SCRIPT_EXTENSION;
7761       if (cc[-1] == XCL_NOTPROP)
7762         {
7763         unicode_status |= XCLASS_SCRIPT_EXTENSION_NOTPROP;
7764         break;
7765         }
7766       compares++;
7767       /* Fall through */
7768 
7769       case PT_SC:
7770       unicode_status |= XCLASS_HAS_SCRIPT;
7771       break;
7772 
7773       case PT_SPACE:
7774       case PT_PXSPACE:
7775       case PT_PXGRAPH:
7776       case PT_PXPRINT:
7777       case PT_PXPUNCT:
7778       unicode_status |= XCLASS_SAVE_CHAR | XCLASS_HAS_TYPE;
7779       break;
7780 
7781       case PT_CLIST:
7782       case PT_UCNC:
7783       case PT_PXXDIGIT:
7784       unicode_status |= XCLASS_SAVE_CHAR;
7785       break;
7786 
7787       case PT_BOOL:
7788       unicode_status |= XCLASS_HAS_BOOL;
7789       break;
7790 
7791       case PT_BIDICL:
7792       unicode_status |= XCLASS_HAS_BIDICL;
7793       break;
7794 
7795       default:
7796       SLJIT_UNREACHABLE();
7797       break;
7798       }
7799 
7800     if (items > 0)
7801       {
7802       if (cc[-1] == XCL_NOTPROP)
7803         items ^= UCPCAT_ALL;
7804       category_list |= items;
7805       unicode_status |= XCLASS_HAS_TYPE;
7806       compares--;
7807       }
7808 
7809     cc += 2;
7810     }
7811 #endif /* SUPPORT_UNICODE */
7812   }
7813 
7814 #ifdef SUPPORT_UNICODE
7815 if (category_list == UCPCAT_ALL)
7816   {
7817   /* All characters are accepted, same as dotall. */
7818   compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
7819   if (list == backtracks)
7820     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7821   return;
7822   }
7823 
7824 if (compares == 0 && category_list == 0)
7825   {
7826   /* No characters are accepted, same as (*F) or dotall. */
7827   compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
7828   if (list != backtracks)
7829     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7830   return;
7831   }
7832 #else /* !SUPPORT_UNICODE */
7833 SLJIT_ASSERT(compares > 0);
7834 #endif /* SUPPORT_UNICODE */
7835 
7836 /* We are not necessary in utf mode even in 8 bit mode. */
7837 cc = ccbegin;
7838 if ((cc[-1] & XCL_NOT) != 0)
7839   read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR);
7840 else
7841   {
7842 #ifdef SUPPORT_UNICODE
7843   read_char(common, min, max, (unicode_status & XCLASS_NEEDS_UCD) ? backtracks : NULL, 0);
7844 #else /* !SUPPORT_UNICODE */
7845   read_char(common, min, max, NULL, 0);
7846 #endif /* SUPPORT_UNICODE */
7847   }
7848 
7849 if ((cc[-1] & XCL_HASPROP) == 0)
7850   {
7851   if ((cc[-1] & XCL_MAP) != 0)
7852     {
7853     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7854     if (!optimize_class(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
7855       {
7856       OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7857       OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7858       OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7859       OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7860       OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
7861       add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
7862       }
7863 
7864     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7865     JUMPHERE(jump);
7866 
7867     cc += 32 / sizeof(PCRE2_UCHAR);
7868     }
7869   else
7870     {
7871     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
7872     add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
7873     }
7874   }
7875 else if ((cc[-1] & XCL_MAP) != 0)
7876   {
7877   OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7878 #ifdef SUPPORT_UNICODE
7879   unicode_status |= XCLASS_CHAR_SAVED;
7880 #endif /* SUPPORT_UNICODE */
7881   if (!optimize_class(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
7882     {
7883 #if PCRE2_CODE_UNIT_WIDTH == 8
7884     jump = NULL;
7885     if (common->utf)
7886 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7887       jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7888 
7889     OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7890     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7891     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7892     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7893     OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
7894     add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
7895 
7896 #if PCRE2_CODE_UNIT_WIDTH == 8
7897     if (common->utf)
7898 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7899       JUMPHERE(jump);
7900     }
7901 
7902   OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7903   cc += 32 / sizeof(PCRE2_UCHAR);
7904   }
7905 
7906 #ifdef SUPPORT_UNICODE
7907 if (unicode_status & XCLASS_NEEDS_UCD)
7908   {
7909   if ((unicode_status & (XCLASS_SAVE_CHAR | XCLASS_CHAR_SAVED)) == XCLASS_SAVE_CHAR)
7910     OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7911 
7912 #if PCRE2_CODE_UNIT_WIDTH == 32
7913   if (!common->utf)
7914     {
7915     jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
7916     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
7917     JUMPHERE(jump);
7918     }
7919 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
7920 
7921   OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7922   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
7923   OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
7924   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
7925   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7926   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7927   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
7928   OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
7929   OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
7930   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
7931   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
7932 
7933   ccbegin = cc;
7934 
7935   if (category_list != 0)
7936     compares++;
7937 
7938   if (unicode_status & XCLASS_HAS_BIDICL)
7939     {
7940     OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
7941     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BIDICLASS_SHIFT);
7942 
7943     while (*cc != XCL_END)
7944       {
7945       if (*cc == XCL_SINGLE)
7946         {
7947         cc ++;
7948         GETCHARINCTEST(c, cc);
7949         }
7950       else if (*cc == XCL_RANGE)
7951         {
7952         cc ++;
7953         GETCHARINCTEST(c, cc);
7954         GETCHARINCTEST(c, cc);
7955         }
7956       else
7957         {
7958         SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7959         cc++;
7960         if (*cc == PT_BIDICL)
7961           {
7962           compares--;
7963           invertcmp = (compares == 0 && list != backtracks);
7964           if (cc[-1] == XCL_NOTPROP)
7965             invertcmp ^= 0x1;
7966           jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
7967           add_jump(compiler, compares > 0 ? list : backtracks, jump);
7968           }
7969         cc += 2;
7970         }
7971       }
7972 
7973     cc = ccbegin;
7974     }
7975 
7976   if (unicode_status & XCLASS_HAS_BOOL)
7977     {
7978     OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, bprops));
7979     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BPROPS_MASK);
7980     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
7981 
7982     while (*cc != XCL_END)
7983       {
7984       if (*cc == XCL_SINGLE)
7985         {
7986         cc ++;
7987         GETCHARINCTEST(c, cc);
7988         }
7989       else if (*cc == XCL_RANGE)
7990         {
7991         cc ++;
7992         GETCHARINCTEST(c, cc);
7993         GETCHARINCTEST(c, cc);
7994         }
7995       else
7996         {
7997         SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7998         cc++;
7999         if (*cc == PT_BOOL)
8000           {
8001           compares--;
8002           invertcmp = (compares == 0 && list != backtracks);
8003           if (cc[-1] == XCL_NOTPROP)
8004             invertcmp ^= 0x1;
8005 
8006           OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_boolprop_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)1 << (cc[1] & 0x1f));
8007           add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
8008           }
8009         cc += 2;
8010         }
8011       }
8012 
8013     cc = ccbegin;
8014     }
8015 
8016   if (unicode_status & XCLASS_HAS_SCRIPT)
8017     {
8018     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
8019 
8020     while (*cc != XCL_END)
8021       {
8022       if (*cc == XCL_SINGLE)
8023         {
8024         cc ++;
8025         GETCHARINCTEST(c, cc);
8026         }
8027       else if (*cc == XCL_RANGE)
8028         {
8029         cc ++;
8030         GETCHARINCTEST(c, cc);
8031         GETCHARINCTEST(c, cc);
8032         }
8033       else
8034         {
8035         SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
8036         cc++;
8037         switch (*cc)
8038           {
8039           case PT_SCX:
8040           if (cc[-1] == XCL_NOTPROP)
8041             break;
8042           /* Fall through */
8043 
8044           case PT_SC:
8045           compares--;
8046           invertcmp = (compares == 0 && list != backtracks);
8047           if (cc[-1] == XCL_NOTPROP)
8048             invertcmp ^= 0x1;
8049 
8050           add_jump(compiler, compares > 0 ? list : backtracks, CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]));
8051           }
8052         cc += 2;
8053         }
8054       }
8055 
8056     cc = ccbegin;
8057     }
8058 
8059   if (unicode_status & XCLASS_HAS_SCRIPT_EXTENSION)
8060     {
8061     OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
8062     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_SCRIPTX_MASK);
8063     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
8064 
8065     if (unicode_status & XCLASS_SCRIPT_EXTENSION_NOTPROP)
8066       {
8067       if (unicode_status & XCLASS_HAS_TYPE)
8068         {
8069         if (unicode_status & XCLASS_SAVE_CHAR)
8070           {
8071           OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP2, 0);
8072           unicode_status |= XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0;
8073           }
8074         else
8075           {
8076           OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0);
8077           unicode_status |= XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR;
8078           }
8079         }
8080       OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
8081       }
8082 
8083     while (*cc != XCL_END)
8084       {
8085       if (*cc == XCL_SINGLE)
8086         {
8087         cc ++;
8088         GETCHARINCTEST(c, cc);
8089         }
8090       else if (*cc == XCL_RANGE)
8091         {
8092         cc ++;
8093         GETCHARINCTEST(c, cc);
8094         GETCHARINCTEST(c, cc);
8095         }
8096       else
8097         {
8098         SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
8099         cc++;
8100         if (*cc == PT_SCX)
8101           {
8102           compares--;
8103           invertcmp = (compares == 0 && list != backtracks);
8104 
8105           jump = NULL;
8106           if (cc[-1] == XCL_NOTPROP)
8107             {
8108             jump = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, (int)cc[1]);
8109             if (invertcmp)
8110               {
8111               add_jump(compiler, backtracks, jump);
8112               jump = NULL;
8113               }
8114             invertcmp ^= 0x1;
8115             }
8116 
8117           OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_script_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)1 << (cc[1] & 0x1f));
8118           add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
8119 
8120           if (jump != NULL)
8121             JUMPHERE(jump);
8122           }
8123         cc += 2;
8124         }
8125       }
8126 
8127     if (unicode_status & XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0)
8128       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
8129     else if (unicode_status & XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR)
8130       OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0);
8131     cc = ccbegin;
8132     }
8133 
8134   if (unicode_status & XCLASS_SAVE_CHAR)
8135     OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
8136 
8137   if (unicode_status & XCLASS_HAS_TYPE)
8138     {
8139     if (unicode_status & XCLASS_SAVE_CHAR)
8140       typereg = RETURN_ADDR;
8141 
8142     OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
8143     OP2(SLJIT_SHL, typereg, 0, SLJIT_IMM, 1, TMP2, 0);
8144 
8145     if (category_list > 0)
8146       {
8147       compares--;
8148       invertcmp = (compares == 0 && list != backtracks);
8149       OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, category_list);
8150       add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
8151       }
8152     }
8153   }
8154 #endif /* SUPPORT_UNICODE */
8155 
8156 /* Generating code. */
8157 charoffset = 0;
8158 numberofcmps = 0;
8159 
8160 while (*cc != XCL_END)
8161   {
8162   compares--;
8163   invertcmp = (compares == 0 && list != backtracks);
8164   jump = NULL;
8165 
8166   if (*cc == XCL_SINGLE)
8167     {
8168     cc ++;
8169     GETCHARINCTEST(c, cc);
8170 
8171     if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
8172       {
8173       OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8174       OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8175       numberofcmps++;
8176       }
8177     else if (numberofcmps > 0)
8178       {
8179       OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8180       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
8181       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8182       numberofcmps = 0;
8183       }
8184     else
8185       {
8186       jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8187       numberofcmps = 0;
8188       }
8189     }
8190   else if (*cc == XCL_RANGE)
8191     {
8192     cc ++;
8193     GETCHARINCTEST(c, cc);
8194     SET_CHAR_OFFSET(c);
8195     GETCHARINCTEST(c, cc);
8196 
8197     if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
8198       {
8199       OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8200       OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8201       numberofcmps++;
8202       }
8203     else if (numberofcmps > 0)
8204       {
8205       OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8206       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
8207       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8208       numberofcmps = 0;
8209       }
8210     else
8211       {
8212       jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8213       numberofcmps = 0;
8214       }
8215     }
8216 #ifdef SUPPORT_UNICODE
8217   else
8218     {
8219     SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
8220     if (*cc == XCL_NOTPROP)
8221       invertcmp ^= 0x1;
8222     cc++;
8223     switch(*cc)
8224       {
8225       case PT_ANY:
8226       case PT_LAMP:
8227       case PT_GC:
8228       case PT_PC:
8229       case PT_SC:
8230       case PT_SCX:
8231       case PT_BOOL:
8232       case PT_BIDICL:
8233       case PT_WORD:
8234       case PT_ALNUM:
8235       compares++;
8236       /* Already handled. */
8237       break;
8238 
8239       case PT_SPACE:
8240       case PT_PXSPACE:
8241       SET_CHAR_OFFSET(9);
8242       OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
8243       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8244 
8245       OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
8246       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8247 
8248       OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
8249       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8250 
8251       OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Zl, ucp_Zs));
8252       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_ZERO);
8253       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8254       break;
8255 
8256       case PT_CLIST:
8257       other_cases = PRIV(ucd_caseless_sets) + cc[1];
8258 
8259       /* At least three characters are required.
8260          Otherwise this case would be handled by the normal code path. */
8261       SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
8262       SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
8263 
8264       /* Optimizing character pairs, if their difference is power of 2. */
8265       if (is_powerof2(other_cases[1] ^ other_cases[0]))
8266         {
8267         if (charoffset == 0)
8268           OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8269         else
8270           {
8271           OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
8272           OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8273           }
8274         OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[1]);
8275         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8276         other_cases += 2;
8277         }
8278       else if (is_powerof2(other_cases[2] ^ other_cases[1]))
8279         {
8280         if (charoffset == 0)
8281           OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
8282         else
8283           {
8284           OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
8285           OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8286           }
8287         OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[2]);
8288         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8289 
8290         OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
8291         OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
8292 
8293         other_cases += 3;
8294         }
8295       else
8296         {
8297         OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
8298         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8299         }
8300 
8301       while (*other_cases != NOTACHAR)
8302         {
8303         OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
8304         OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
8305         }
8306       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8307       break;
8308 
8309       case PT_UCNC:
8310       OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
8311       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8312       OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
8313       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8314       OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
8315       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8316 
8317       SET_CHAR_OFFSET(0xa0);
8318       OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
8319       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8320       SET_CHAR_OFFSET(0);
8321       OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
8322       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
8323       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8324       break;
8325 
8326       case PT_PXGRAPH:
8327       OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Cc, ucp_Cs) | UCPCAT_RANGE(ucp_Zl, ucp_Zs));
8328       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
8329 
8330       OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT(ucp_Cf));
8331       jump = JUMP(SLJIT_ZERO);
8332 
8333       c = charoffset;
8334       /* In case of ucp_Cf, we overwrite the result. */
8335       SET_CHAR_OFFSET(0x2066);
8336       OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
8337       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8338 
8339       OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
8340       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8341 
8342       OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
8343       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8344 
8345       /* Restore charoffset. */
8346       SET_CHAR_OFFSET(c);
8347 
8348       JUMPHERE(jump);
8349       jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
8350       break;
8351 
8352       case PT_PXPRINT:
8353       OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Cc, ucp_Cs) | UCPCAT2(ucp_Zl, ucp_Zp));
8354       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
8355 
8356       OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT(ucp_Cf));
8357       jump = JUMP(SLJIT_ZERO);
8358 
8359       c = charoffset;
8360       /* In case of ucp_Cf, we overwrite the result. */
8361       SET_CHAR_OFFSET(0x2066);
8362       OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
8363       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8364 
8365       OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
8366       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8367 
8368       /* Restore charoffset. */
8369       SET_CHAR_OFFSET(c);
8370 
8371       JUMPHERE(jump);
8372       jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
8373       break;
8374 
8375       case PT_PXPUNCT:
8376       OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Sc, ucp_So));
8377       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
8378 
8379       SET_CHAR_OFFSET(0);
8380       OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x7f);
8381       OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
8382 
8383       OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Pc, ucp_Ps));
8384       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_ZERO);
8385       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8386       break;
8387 
8388       case PT_PXXDIGIT:
8389       SET_CHAR_OFFSET(CHAR_A);
8390       OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, ~0x20);
8391       OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP2, 0, SLJIT_IMM, CHAR_F - CHAR_A);
8392       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8393 
8394       SET_CHAR_OFFSET(CHAR_0);
8395       OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_9 - CHAR_0);
8396       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8397 
8398       SET_CHAR_OFFSET(0xff10);
8399       jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 0xff46 - 0xff10);
8400 
8401       OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff19 - 0xff10);
8402       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8403 
8404       SET_CHAR_OFFSET(0xff21);
8405       OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff26 - 0xff21);
8406       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8407 
8408       SET_CHAR_OFFSET(0xff41);
8409       OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff46 - 0xff41);
8410       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8411 
8412       SET_CHAR_OFFSET(0xff10);
8413 
8414       JUMPHERE(jump);
8415       OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0);
8416       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8417       break;
8418 
8419       default:
8420       SLJIT_UNREACHABLE();
8421       break;
8422       }
8423     cc += 2;
8424     }
8425 #endif /* SUPPORT_UNICODE */
8426 
8427   if (jump != NULL)
8428     add_jump(compiler, compares > 0 ? list : backtracks, jump);
8429   }
8430 
8431 SLJIT_ASSERT(compares == 0);
8432 if (found != NULL)
8433   set_jumps(found, LABEL());
8434 }
8435 
8436 #undef SET_TYPE_OFFSET
8437 #undef SET_CHAR_OFFSET
8438 
8439 #endif
8440 
compile_simple_assertion_matchingpath(compiler_common * common,PCRE2_UCHAR type,PCRE2_SPTR cc,jump_list ** backtracks)8441 static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks)
8442 {
8443 DEFINE_COMPILER;
8444 struct sljit_jump *jump[4];
8445 
8446 switch(type)
8447   {
8448   case OP_SOD:
8449   if (HAS_VIRTUAL_REGISTERS)
8450     {
8451     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8452     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8453     }
8454   else
8455     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8456   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8457   return cc;
8458 
8459   case OP_SOM:
8460   if (HAS_VIRTUAL_REGISTERS)
8461     {
8462     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8463     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
8464     }
8465   else
8466     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
8467   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8468   return cc;
8469 
8470   case OP_NOT_WORD_BOUNDARY:
8471   case OP_WORD_BOUNDARY:
8472   case OP_NOT_UCP_WORD_BOUNDARY:
8473   case OP_UCP_WORD_BOUNDARY:
8474   add_jump(compiler, (type == OP_NOT_WORD_BOUNDARY || type == OP_WORD_BOUNDARY) ? &common->wordboundary : &common->ucp_wordboundary, JUMP(SLJIT_FAST_CALL));
8475 #ifdef SUPPORT_UNICODE
8476   if (common->invalid_utf)
8477     {
8478     add_jump(compiler, backtracks, CMP((type == OP_NOT_WORD_BOUNDARY || type == OP_NOT_UCP_WORD_BOUNDARY) ? SLJIT_NOT_EQUAL : SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8479     return cc;
8480     }
8481 #endif /* SUPPORT_UNICODE */
8482   sljit_set_current_flags(compiler, SLJIT_SET_Z);
8483   add_jump(compiler, backtracks, JUMP((type == OP_NOT_WORD_BOUNDARY || type == OP_NOT_UCP_WORD_BOUNDARY) ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8484   return cc;
8485 
8486   case OP_EODN:
8487   /* Requires rather complex checks. */
8488   jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
8489   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8490     {
8491     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8492     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8493     if (common->mode == PCRE2_JIT_COMPLETE)
8494       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8495     else
8496       {
8497       jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
8498       OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, STR_END, 0);
8499       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
8500       OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
8501       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
8502       add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
8503       check_partial(common, TRUE);
8504       add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8505       JUMPHERE(jump[1]);
8506       }
8507     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8508     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8509     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8510     }
8511   else if (common->nltype == NLTYPE_FIXED)
8512     {
8513     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8514     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8515     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8516     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
8517     }
8518   else
8519     {
8520     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8521     jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
8522     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8523     OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, TMP2, 0, STR_END, 0);
8524     jump[2] = JUMP(SLJIT_GREATER);
8525     add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
8526     /* Equal. */
8527     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8528     jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
8529     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8530 
8531     JUMPHERE(jump[1]);
8532     if (common->nltype == NLTYPE_ANYCRLF)
8533       {
8534       OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8535       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
8536       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
8537       }
8538     else
8539       {
8540       OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
8541       read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
8542       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
8543       add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
8544       sljit_set_current_flags(compiler, SLJIT_SET_Z);
8545       add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
8546       OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
8547       }
8548     JUMPHERE(jump[2]);
8549     JUMPHERE(jump[3]);
8550     }
8551   JUMPHERE(jump[0]);
8552   if (common->mode != PCRE2_JIT_COMPLETE)
8553     check_partial(common, TRUE);
8554   return cc;
8555 
8556   case OP_EOD:
8557   add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8558   if (common->mode != PCRE2_JIT_COMPLETE)
8559     check_partial(common, TRUE);
8560   return cc;
8561 
8562   case OP_DOLL:
8563   if (HAS_VIRTUAL_REGISTERS)
8564     {
8565     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8566     OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8567     }
8568   else
8569     OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8570   add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8571 
8572   if (!common->endonly)
8573     compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
8574   else
8575     {
8576     add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8577     check_partial(common, FALSE);
8578     }
8579   return cc;
8580 
8581   case OP_DOLLM:
8582   jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
8583   if (HAS_VIRTUAL_REGISTERS)
8584     {
8585     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8586     OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8587     }
8588   else
8589     OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8590   add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8591   check_partial(common, FALSE);
8592   jump[0] = JUMP(SLJIT_JUMP);
8593   JUMPHERE(jump[1]);
8594 
8595   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8596     {
8597     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8598     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8599     if (common->mode == PCRE2_JIT_COMPLETE)
8600       add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
8601     else
8602       {
8603       jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
8604       /* STR_PTR = STR_END - IN_UCHARS(1) */
8605       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8606       check_partial(common, TRUE);
8607       add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8608       JUMPHERE(jump[1]);
8609       }
8610 
8611     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8612     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8613     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8614     }
8615   else
8616     {
8617     peek_char(common, common->nlmax, TMP3, 0, NULL);
8618     check_newlinechar(common, common->nltype, backtracks, FALSE);
8619     }
8620   JUMPHERE(jump[0]);
8621   return cc;
8622 
8623   case OP_CIRC:
8624   if (HAS_VIRTUAL_REGISTERS)
8625     {
8626     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8627     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
8628     add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8629     OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8630     add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8631     }
8632   else
8633     {
8634     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8635     add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8636     OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8637     add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8638     }
8639   return cc;
8640 
8641   case OP_CIRCM:
8642   /* TMP2 might be used by peek_char_back. */
8643   if (HAS_VIRTUAL_REGISTERS)
8644     {
8645     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8646     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8647     jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8648     OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8649     }
8650   else
8651     {
8652     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8653     jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8654     OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8655     }
8656   add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8657   jump[0] = JUMP(SLJIT_JUMP);
8658   JUMPHERE(jump[1]);
8659 
8660   if (!common->alt_circumflex)
8661     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8662 
8663   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8664     {
8665     OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8666     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, TMP2, 0));
8667     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
8668     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
8669     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8670     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8671     }
8672   else
8673     {
8674     peek_char_back(common, common->nlmax, backtracks);
8675     check_newlinechar(common, common->nltype, backtracks, FALSE);
8676     }
8677   JUMPHERE(jump[0]);
8678   return cc;
8679   }
8680 SLJIT_UNREACHABLE();
8681 return cc;
8682 }
8683 
8684 #ifdef SUPPORT_UNICODE
8685 
8686 #if PCRE2_CODE_UNIT_WIDTH != 32
8687 
do_extuni_utf(jit_arguments * args,PCRE2_SPTR cc)8688 static PCRE2_SPTR SLJIT_FUNC do_extuni_utf(jit_arguments *args, PCRE2_SPTR cc)
8689 {
8690 PCRE2_SPTR start_subject = args->begin;
8691 PCRE2_SPTR end_subject = args->end;
8692 int lgb, rgb, ricount;
8693 PCRE2_SPTR prevcc, endcc, bptr;
8694 BOOL first = TRUE;
8695 uint32_t c;
8696 
8697 prevcc = cc;
8698 endcc = NULL;
8699 do
8700   {
8701   GETCHARINC(c, cc);
8702   rgb = UCD_GRAPHBREAK(c);
8703 
8704   if (first)
8705     {
8706     lgb = rgb;
8707     endcc = cc;
8708     first = FALSE;
8709     continue;
8710     }
8711 
8712   if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8713     break;
8714 
8715   /* Not breaking between Regional Indicators is allowed only if there
8716   are an even number of preceding RIs. */
8717 
8718   if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
8719     {
8720     ricount = 0;
8721     bptr = prevcc;
8722 
8723     /* bptr is pointing to the left-hand character */
8724     while (bptr > start_subject)
8725       {
8726       bptr--;
8727       BACKCHAR(bptr);
8728       GETCHAR(c, bptr);
8729 
8730       if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator)
8731         break;
8732 
8733       ricount++;
8734       }
8735 
8736     if ((ricount & 1) != 0) break;  /* Grapheme break required */
8737     }
8738 
8739   /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8740   allows any number of them before a following Extended_Pictographic. */
8741 
8742   if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8743        lgb != ucp_gbExtended_Pictographic)
8744     lgb = rgb;
8745 
8746   prevcc = endcc;
8747   endcc = cc;
8748   }
8749 while (cc < end_subject);
8750 
8751 return endcc;
8752 }
8753 
8754 #endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
8755 
do_extuni_utf_invalid(jit_arguments * args,PCRE2_SPTR cc)8756 static PCRE2_SPTR SLJIT_FUNC do_extuni_utf_invalid(jit_arguments *args, PCRE2_SPTR cc)
8757 {
8758 PCRE2_SPTR start_subject = args->begin;
8759 PCRE2_SPTR end_subject = args->end;
8760 int lgb, rgb, ricount;
8761 PCRE2_SPTR prevcc, endcc, bptr;
8762 BOOL first = TRUE;
8763 uint32_t c;
8764 
8765 prevcc = cc;
8766 endcc = NULL;
8767 do
8768   {
8769   GETCHARINC_INVALID(c, cc, end_subject, break);
8770   rgb = UCD_GRAPHBREAK(c);
8771 
8772   if (first)
8773     {
8774     lgb = rgb;
8775     endcc = cc;
8776     first = FALSE;
8777     continue;
8778     }
8779 
8780   if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8781     break;
8782 
8783   /* Not breaking between Regional Indicators is allowed only if there
8784   are an even number of preceding RIs. */
8785 
8786   if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
8787     {
8788     ricount = 0;
8789     bptr = prevcc;
8790 
8791     /* bptr is pointing to the left-hand character */
8792     while (bptr > start_subject)
8793       {
8794       GETCHARBACK_INVALID(c, bptr, start_subject, break);
8795 
8796       if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator)
8797         break;
8798 
8799       ricount++;
8800       }
8801 
8802     if ((ricount & 1) != 0)
8803       break;  /* Grapheme break required */
8804     }
8805 
8806   /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8807   allows any number of them before a following Extended_Pictographic. */
8808 
8809   if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8810        lgb != ucp_gbExtended_Pictographic)
8811     lgb = rgb;
8812 
8813   prevcc = endcc;
8814   endcc = cc;
8815   }
8816 while (cc < end_subject);
8817 
8818 return endcc;
8819 }
8820 
do_extuni_no_utf(jit_arguments * args,PCRE2_SPTR cc)8821 static PCRE2_SPTR SLJIT_FUNC do_extuni_no_utf(jit_arguments *args, PCRE2_SPTR cc)
8822 {
8823 PCRE2_SPTR start_subject = args->begin;
8824 PCRE2_SPTR end_subject = args->end;
8825 int lgb, rgb, ricount;
8826 PCRE2_SPTR bptr;
8827 uint32_t c;
8828 
8829 /* Patch by PH */
8830 /* GETCHARINC(c, cc); */
8831 c = *cc++;
8832 
8833 #if PCRE2_CODE_UNIT_WIDTH == 32
8834 if (c >= 0x110000)
8835   return cc;
8836 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8837 lgb = UCD_GRAPHBREAK(c);
8838 
8839 while (cc < end_subject)
8840   {
8841   c = *cc;
8842 #if PCRE2_CODE_UNIT_WIDTH == 32
8843   if (c >= 0x110000)
8844     break;
8845 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8846   rgb = UCD_GRAPHBREAK(c);
8847 
8848   if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8849     break;
8850 
8851   /* Not breaking between Regional Indicators is allowed only if there
8852   are an even number of preceding RIs. */
8853 
8854   if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
8855     {
8856     ricount = 0;
8857     bptr = cc - 1;
8858 
8859     /* bptr is pointing to the left-hand character */
8860     while (bptr > start_subject)
8861       {
8862       bptr--;
8863       c = *bptr;
8864 #if PCRE2_CODE_UNIT_WIDTH == 32
8865       if (c >= 0x110000)
8866         break;
8867 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8868 
8869       if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break;
8870 
8871       ricount++;
8872       }
8873 
8874     if ((ricount & 1) != 0)
8875       break;  /* Grapheme break required */
8876     }
8877 
8878   /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8879   allows any number of them before a following Extended_Pictographic. */
8880 
8881   if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8882        lgb != ucp_gbExtended_Pictographic)
8883     lgb = rgb;
8884 
8885   cc++;
8886   }
8887 
8888 return cc;
8889 }
8890 
8891 #endif /* SUPPORT_UNICODE */
8892 
compile_char1_matchingpath(compiler_common * common,PCRE2_UCHAR type,PCRE2_SPTR cc,jump_list ** backtracks,BOOL check_str_ptr)8893 static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr)
8894 {
8895 DEFINE_COMPILER;
8896 int length;
8897 unsigned int c, oc, bit;
8898 compare_context context;
8899 struct sljit_jump *jump[3];
8900 jump_list *end_list;
8901 #ifdef SUPPORT_UNICODE
8902 PCRE2_UCHAR propdata[5];
8903 #endif /* SUPPORT_UNICODE */
8904 
8905 switch(type)
8906   {
8907   case OP_NOT_DIGIT:
8908   case OP_DIGIT:
8909   /* Digits are usually 0-9, so it is worth to optimize them. */
8910   if (check_str_ptr)
8911     detect_partial_match(common, backtracks);
8912 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8913   if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_digit, FALSE))
8914     read_char7_type(common, backtracks, type == OP_NOT_DIGIT);
8915   else
8916 #endif
8917     read_char8_type(common, backtracks, type == OP_NOT_DIGIT);
8918     /* Flip the starting bit in the negative case. */
8919   OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_digit);
8920   add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8921   return cc;
8922 
8923   case OP_NOT_WHITESPACE:
8924   case OP_WHITESPACE:
8925   if (check_str_ptr)
8926     detect_partial_match(common, backtracks);
8927 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8928   if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_space, FALSE))
8929     read_char7_type(common, backtracks, type == OP_NOT_WHITESPACE);
8930   else
8931 #endif
8932     read_char8_type(common, backtracks, type == OP_NOT_WHITESPACE);
8933   OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_space);
8934   add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8935   return cc;
8936 
8937   case OP_NOT_WORDCHAR:
8938   case OP_WORDCHAR:
8939   if (check_str_ptr)
8940     detect_partial_match(common, backtracks);
8941 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8942   if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_word, FALSE))
8943     read_char7_type(common, backtracks, type == OP_NOT_WORDCHAR);
8944   else
8945 #endif
8946     read_char8_type(common, backtracks, type == OP_NOT_WORDCHAR);
8947   OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_word);
8948   add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8949   return cc;
8950 
8951   case OP_ANY:
8952   if (check_str_ptr)
8953     detect_partial_match(common, backtracks);
8954   read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
8955   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8956     {
8957     jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
8958     end_list = NULL;
8959     if (common->mode != PCRE2_JIT_PARTIAL_HARD)
8960       add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8961     else
8962       check_str_end(common, &end_list);
8963 
8964     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8965     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
8966     set_jumps(end_list, LABEL());
8967     JUMPHERE(jump[0]);
8968     }
8969   else
8970     check_newlinechar(common, common->nltype, backtracks, TRUE);
8971   return cc;
8972 
8973   case OP_ALLANY:
8974   if (check_str_ptr)
8975     detect_partial_match(common, backtracks);
8976 #ifdef SUPPORT_UNICODE
8977   if (common->utf && common->invalid_utf)
8978     {
8979     read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR);
8980     return cc;
8981     }
8982 #endif /* SUPPORT_UNICODE */
8983 
8984   skip_valid_char(common);
8985   return cc;
8986 
8987   case OP_ANYBYTE:
8988   if (check_str_ptr)
8989     detect_partial_match(common, backtracks);
8990   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8991   return cc;
8992 
8993 #ifdef SUPPORT_UNICODE
8994   case OP_NOTPROP:
8995   case OP_PROP:
8996   propdata[0] = XCL_HASPROP;
8997   propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
8998   propdata[2] = cc[0];
8999   propdata[3] = cc[1];
9000   propdata[4] = XCL_END;
9001   if (check_str_ptr)
9002     detect_partial_match(common, backtracks);
9003   compile_xclass_matchingpath(common, propdata, backtracks);
9004   return cc + 2;
9005 #endif
9006 
9007   case OP_ANYNL:
9008   if (check_str_ptr)
9009     detect_partial_match(common, backtracks);
9010   read_char(common, common->bsr_nlmin, common->bsr_nlmax, NULL, 0);
9011   jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
9012   /* We don't need to handle soft partial matching case. */
9013   end_list = NULL;
9014   if (common->mode != PCRE2_JIT_PARTIAL_HARD)
9015     add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
9016   else
9017     check_str_end(common, &end_list);
9018   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
9019   jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
9020   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9021   jump[2] = JUMP(SLJIT_JUMP);
9022   JUMPHERE(jump[0]);
9023   check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
9024   set_jumps(end_list, LABEL());
9025   JUMPHERE(jump[1]);
9026   JUMPHERE(jump[2]);
9027   return cc;
9028 
9029   case OP_NOT_HSPACE:
9030   case OP_HSPACE:
9031   if (check_str_ptr)
9032     detect_partial_match(common, backtracks);
9033 
9034   if (type == OP_NOT_HSPACE)
9035     read_char(common, 0x9, 0x3000, backtracks, READ_CHAR_UPDATE_STR_PTR);
9036   else
9037     read_char(common, 0x9, 0x3000, NULL, 0);
9038 
9039   add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
9040   sljit_set_current_flags(compiler, SLJIT_SET_Z);
9041   add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
9042   return cc;
9043 
9044   case OP_NOT_VSPACE:
9045   case OP_VSPACE:
9046   if (check_str_ptr)
9047     detect_partial_match(common, backtracks);
9048 
9049   if (type == OP_NOT_VSPACE)
9050     read_char(common, 0xa, 0x2029, backtracks, READ_CHAR_UPDATE_STR_PTR);
9051   else
9052     read_char(common, 0xa, 0x2029, NULL, 0);
9053 
9054   add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
9055   sljit_set_current_flags(compiler, SLJIT_SET_Z);
9056   add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
9057   return cc;
9058 
9059 #ifdef SUPPORT_UNICODE
9060   case OP_EXTUNI:
9061   if (check_str_ptr)
9062     detect_partial_match(common, backtracks);
9063 
9064   SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
9065   OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
9066 
9067 #if PCRE2_CODE_UNIT_WIDTH != 32
9068   sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
9069     common->utf ? (common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_utf)) : SLJIT_FUNC_ADDR(do_extuni_no_utf));
9070   if (common->invalid_utf)
9071     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
9072 #else
9073   sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
9074     common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_no_utf));
9075   if (common->invalid_utf)
9076     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
9077 #endif
9078 
9079   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
9080 
9081   if (common->mode == PCRE2_JIT_PARTIAL_HARD)
9082     {
9083     jump[0] = CMP(SLJIT_LESS, SLJIT_RETURN_REG, 0, STR_END, 0);
9084     /* Since we successfully read a char above, partial matching must occure. */
9085     check_partial(common, TRUE);
9086     JUMPHERE(jump[0]);
9087     }
9088   return cc;
9089 #endif
9090 
9091   case OP_CHAR:
9092   case OP_CHARI:
9093   length = 1;
9094 #ifdef SUPPORT_UNICODE
9095   if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
9096 #endif
9097 
9098   if (check_str_ptr && common->mode != PCRE2_JIT_COMPLETE)
9099     detect_partial_match(common, backtracks);
9100 
9101   if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)
9102     {
9103     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
9104     if (length > 1 || (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE))
9105       add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
9106 
9107     context.length = IN_UCHARS(length);
9108     context.sourcereg = -1;
9109 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
9110     context.ucharptr = 0;
9111 #endif
9112     return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
9113     }
9114 
9115 #ifdef SUPPORT_UNICODE
9116   if (common->utf)
9117     {
9118     GETCHAR(c, cc);
9119     }
9120   else
9121 #endif
9122     c = *cc;
9123 
9124   SLJIT_ASSERT(type == OP_CHARI && char_has_othercase(common, cc));
9125 
9126   if (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE)
9127     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
9128 
9129   oc = char_othercase(common, c);
9130   read_char(common, c < oc ? c : oc, c > oc ? c : oc, NULL, 0);
9131 
9132   SLJIT_ASSERT(!is_powerof2(c ^ oc));
9133 
9134   if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
9135     {
9136     OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, oc);
9137     SELECT(SLJIT_EQUAL, TMP1, SLJIT_IMM, c, TMP1);
9138     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9139     }
9140   else
9141     {
9142     jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
9143     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
9144     JUMPHERE(jump[0]);
9145     }
9146   return cc + length;
9147 
9148   case OP_NOT:
9149   case OP_NOTI:
9150   if (check_str_ptr)
9151     detect_partial_match(common, backtracks);
9152 
9153   length = 1;
9154 #ifdef SUPPORT_UNICODE
9155   if (common->utf)
9156     {
9157 #if PCRE2_CODE_UNIT_WIDTH == 8
9158     c = *cc;
9159     if (c < 128 && !common->invalid_utf)
9160       {
9161       OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
9162       if (type == OP_NOT || !char_has_othercase(common, cc))
9163         add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9164       else
9165         {
9166         /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
9167         OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
9168         add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
9169         }
9170       /* Skip the variable-length character. */
9171       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9172       jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
9173       OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
9174       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
9175       JUMPHERE(jump[0]);
9176       return cc + 1;
9177       }
9178     else
9179 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
9180       {
9181       GETCHARLEN(c, cc, length);
9182       }
9183     }
9184   else
9185 #endif /* SUPPORT_UNICODE */
9186     c = *cc;
9187 
9188   if (type == OP_NOT || !char_has_othercase(common, cc))
9189     {
9190     read_char(common, c, c, backtracks, READ_CHAR_UPDATE_STR_PTR);
9191     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9192     }
9193   else
9194     {
9195     oc = char_othercase(common, c);
9196     read_char(common, c < oc ? c : oc, c > oc ? c : oc, backtracks, READ_CHAR_UPDATE_STR_PTR);
9197     bit = c ^ oc;
9198     if (is_powerof2(bit))
9199       {
9200       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
9201       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
9202       }
9203     else
9204       {
9205       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9206       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
9207       }
9208     }
9209   return cc + length;
9210 
9211   case OP_CLASS:
9212   case OP_NCLASS:
9213   if (check_str_ptr)
9214     detect_partial_match(common, backtracks);
9215 
9216 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
9217   bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
9218   if (type == OP_NCLASS)
9219     read_char(common, 0, bit, backtracks, READ_CHAR_UPDATE_STR_PTR);
9220   else
9221     read_char(common, 0, bit, NULL, 0);
9222 #else
9223   if (type == OP_NCLASS)
9224     read_char(common, 0, 255, backtracks, READ_CHAR_UPDATE_STR_PTR);
9225   else
9226     read_char(common, 0, 255, NULL, 0);
9227 #endif
9228 
9229   if (optimize_class(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
9230     return cc + 32 / sizeof(PCRE2_UCHAR);
9231 
9232 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
9233   jump[0] = NULL;
9234   if (common->utf)
9235     {
9236     jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
9237     if (type == OP_CLASS)
9238       {
9239       add_jump(compiler, backtracks, jump[0]);
9240       jump[0] = NULL;
9241       }
9242     }
9243 #elif PCRE2_CODE_UNIT_WIDTH != 8
9244   jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
9245   if (type == OP_CLASS)
9246     {
9247     add_jump(compiler, backtracks, jump[0]);
9248     jump[0] = NULL;
9249     }
9250 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
9251 
9252   OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
9253   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
9254   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
9255   OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
9256   OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
9257   add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
9258 
9259 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
9260   if (jump[0] != NULL)
9261     JUMPHERE(jump[0]);
9262 #endif
9263   return cc + 32 / sizeof(PCRE2_UCHAR);
9264 
9265 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
9266   case OP_XCLASS:
9267   if (check_str_ptr)
9268     detect_partial_match(common, backtracks);
9269   compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
9270   return cc + GET(cc, 0) - 1;
9271 #endif
9272   }
9273 SLJIT_UNREACHABLE();
9274 return cc;
9275 }
9276 
compile_charn_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,jump_list ** backtracks)9277 static SLJIT_INLINE PCRE2_SPTR compile_charn_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, jump_list **backtracks)
9278 {
9279 /* This function consumes at least one input character. */
9280 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
9281 DEFINE_COMPILER;
9282 PCRE2_SPTR ccbegin = cc;
9283 compare_context context;
9284 int size;
9285 
9286 context.length = 0;
9287 do
9288   {
9289   if (cc >= ccend)
9290     break;
9291 
9292   if (*cc == OP_CHAR)
9293     {
9294     size = 1;
9295 #ifdef SUPPORT_UNICODE
9296     if (common->utf && HAS_EXTRALEN(cc[1]))
9297       size += GET_EXTRALEN(cc[1]);
9298 #endif
9299     }
9300   else if (*cc == OP_CHARI)
9301     {
9302     size = 1;
9303 #ifdef SUPPORT_UNICODE
9304     if (common->utf)
9305       {
9306       if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
9307         size = 0;
9308       else if (HAS_EXTRALEN(cc[1]))
9309         size += GET_EXTRALEN(cc[1]);
9310       }
9311     else
9312 #endif
9313     if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
9314       size = 0;
9315     }
9316   else
9317     size = 0;
9318 
9319   cc += 1 + size;
9320   context.length += IN_UCHARS(size);
9321   }
9322 while (size > 0 && context.length <= 128);
9323 
9324 cc = ccbegin;
9325 if (context.length > 0)
9326   {
9327   /* We have a fixed-length byte sequence. */
9328   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
9329   add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
9330 
9331   context.sourcereg = -1;
9332 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
9333   context.ucharptr = 0;
9334 #endif
9335   do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
9336   return cc;
9337   }
9338 
9339 /* A non-fixed length character will be checked if length == 0. */
9340 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
9341 }
9342 
9343 /* Forward definitions. */
9344 static void compile_matchingpath(compiler_common *, PCRE2_SPTR, PCRE2_SPTR, backtrack_common *);
9345 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
9346 
9347 #define PUSH_BACKTRACK(size, ccstart, error) \
9348   do \
9349     { \
9350     backtrack = sljit_alloc_memory(compiler, (size)); \
9351     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9352       return error; \
9353     memset(backtrack, 0, size); \
9354     backtrack->prev = parent->top; \
9355     backtrack->cc = (ccstart); \
9356     parent->top = backtrack; \
9357     } \
9358   while (0)
9359 
9360 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
9361   do \
9362     { \
9363     backtrack = sljit_alloc_memory(compiler, (size)); \
9364     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9365       return; \
9366     memset(backtrack, 0, size); \
9367     backtrack->prev = parent->top; \
9368     backtrack->cc = (ccstart); \
9369     parent->top = backtrack; \
9370     } \
9371   while (0)
9372 
9373 #define BACKTRACK_AS(type) ((type *)backtrack)
9374 
compile_dnref_search(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks)9375 static void compile_dnref_search(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
9376 {
9377 /* The OVECTOR offset goes to TMP2. */
9378 DEFINE_COMPILER;
9379 int count = GET2(cc, 1 + IMM2_SIZE);
9380 PCRE2_SPTR slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
9381 unsigned int offset;
9382 jump_list *found = NULL;
9383 
9384 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
9385 
9386 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
9387 
9388 count--;
9389 while (count-- > 0)
9390   {
9391   offset = GET2(slot, 0) << 1;
9392   GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9393   add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9394   slot += common->name_entry_size;
9395   }
9396 
9397 offset = GET2(slot, 0) << 1;
9398 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9399 if (backtracks != NULL && !common->unset_backref)
9400   add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9401 
9402 set_jumps(found, LABEL());
9403 }
9404 
compile_ref_matchingpath(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks,BOOL withchecks,BOOL emptyfail)9405 static void compile_ref_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
9406 {
9407 DEFINE_COMPILER;
9408 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9409 int offset = 0;
9410 struct sljit_jump *jump = NULL;
9411 struct sljit_jump *partial;
9412 struct sljit_jump *nopartial;
9413 #if defined SUPPORT_UNICODE
9414 struct sljit_label *loop;
9415 struct sljit_label *caseless_loop;
9416 jump_list *no_match = NULL;
9417 int source_reg = COUNT_MATCH;
9418 int source_end_reg = ARGUMENTS;
9419 int char1_reg = STACK_LIMIT;
9420 #endif /* SUPPORT_UNICODE */
9421 
9422 if (ref)
9423   {
9424   offset = GET2(cc, 1) << 1;
9425   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9426   /* OVECTOR(1) contains the "string begin - 1" constant. */
9427   if (withchecks && !common->unset_backref)
9428     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9429   }
9430 else
9431   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9432 
9433 #if defined SUPPORT_UNICODE
9434 if (common->utf && *cc == OP_REFI)
9435   {
9436   SLJIT_ASSERT(common->iref_ptr != 0);
9437 
9438   if (ref)
9439     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9440   else
9441     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9442 
9443   if (withchecks && emptyfail)
9444     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0));
9445 
9446   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr, source_reg, 0);
9447   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw), source_end_reg, 0);
9448   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2, char1_reg, 0);
9449 
9450   OP1(SLJIT_MOV, source_reg, 0, TMP1, 0);
9451   OP1(SLJIT_MOV, source_end_reg, 0, TMP2, 0);
9452 
9453   loop = LABEL();
9454   jump = CMP(SLJIT_GREATER_EQUAL, source_reg, 0, source_end_reg, 0);
9455   partial = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
9456 
9457   /* Read original character. It must be a valid UTF character. */
9458   OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
9459   OP1(SLJIT_MOV, STR_PTR, 0, source_reg, 0);
9460 
9461   read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR | READ_CHAR_VALID_UTF);
9462 
9463   OP1(SLJIT_MOV, source_reg, 0, STR_PTR, 0);
9464   OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
9465   OP1(SLJIT_MOV, char1_reg, 0, TMP1, 0);
9466 
9467   /* Read second character. */
9468   read_char(common, 0, READ_CHAR_MAX, &no_match, READ_CHAR_UPDATE_STR_PTR);
9469 
9470   CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9471 
9472   OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
9473 
9474   add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
9475 
9476   OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
9477   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
9478   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
9479 
9480   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records));
9481 
9482   OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, other_case));
9483   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, caseset));
9484   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
9485   CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9486 
9487   add_jump(compiler, &no_match, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9488   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
9489   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_caseless_sets));
9490 
9491   caseless_loop = LABEL();
9492   OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9493   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(uint32_t));
9494   OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, TMP1, 0, char1_reg, 0);
9495   JUMPTO(SLJIT_EQUAL, loop);
9496   JUMPTO(SLJIT_LESS, caseless_loop);
9497 
9498   set_jumps(no_match, LABEL());
9499   if (common->mode == PCRE2_JIT_COMPLETE)
9500     JUMPHERE(partial);
9501 
9502   OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9503   OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9504   OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9505   add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9506 
9507   if (common->mode != PCRE2_JIT_COMPLETE)
9508     {
9509     JUMPHERE(partial);
9510     OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9511     OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9512     OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9513 
9514     check_partial(common, FALSE);
9515     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9516     }
9517 
9518   JUMPHERE(jump);
9519   OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9520   OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9521   OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9522   return;
9523   }
9524 else
9525 #endif /* SUPPORT_UNICODE */
9526   {
9527   if (ref)
9528     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
9529   else
9530     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
9531 
9532   if (withchecks)
9533     jump = JUMP(SLJIT_ZERO);
9534 
9535   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
9536   partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
9537   if (common->mode == PCRE2_JIT_COMPLETE)
9538     add_jump(compiler, backtracks, partial);
9539 
9540   add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9541   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9542 
9543   if (common->mode != PCRE2_JIT_COMPLETE)
9544     {
9545     nopartial = JUMP(SLJIT_JUMP);
9546     JUMPHERE(partial);
9547     /* TMP2 -= STR_END - STR_PTR */
9548     OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
9549     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
9550     partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
9551     OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
9552     add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9553     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9554     JUMPHERE(partial);
9555     check_partial(common, FALSE);
9556     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9557     JUMPHERE(nopartial);
9558     }
9559   }
9560 
9561 if (jump != NULL)
9562   {
9563   if (emptyfail)
9564     add_jump(compiler, backtracks, jump);
9565   else
9566     JUMPHERE(jump);
9567   }
9568 }
9569 
compile_ref_iterator_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9570 static SLJIT_INLINE PCRE2_SPTR compile_ref_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9571 {
9572 DEFINE_COMPILER;
9573 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9574 backtrack_common *backtrack;
9575 PCRE2_UCHAR type;
9576 int offset = 0;
9577 struct sljit_label *label;
9578 struct sljit_jump *zerolength;
9579 struct sljit_jump *jump = NULL;
9580 PCRE2_SPTR ccbegin = cc;
9581 int min = 0, max = 0;
9582 BOOL minimize;
9583 
9584 PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
9585 
9586 if (ref)
9587   offset = GET2(cc, 1) << 1;
9588 else
9589   cc += IMM2_SIZE;
9590 type = cc[1 + IMM2_SIZE];
9591 
9592 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
9593 minimize = (type & 0x1) != 0;
9594 switch(type)
9595   {
9596   case OP_CRSTAR:
9597   case OP_CRMINSTAR:
9598   min = 0;
9599   max = 0;
9600   cc += 1 + IMM2_SIZE + 1;
9601   break;
9602   case OP_CRPLUS:
9603   case OP_CRMINPLUS:
9604   min = 1;
9605   max = 0;
9606   cc += 1 + IMM2_SIZE + 1;
9607   break;
9608   case OP_CRQUERY:
9609   case OP_CRMINQUERY:
9610   min = 0;
9611   max = 1;
9612   cc += 1 + IMM2_SIZE + 1;
9613   break;
9614   case OP_CRRANGE:
9615   case OP_CRMINRANGE:
9616   min = GET2(cc, 1 + IMM2_SIZE + 1);
9617   max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
9618   cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
9619   break;
9620   default:
9621   SLJIT_UNREACHABLE();
9622   break;
9623   }
9624 
9625 if (!minimize)
9626   {
9627   if (min == 0)
9628     {
9629     allocate_stack(common, 2);
9630     if (ref)
9631       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9632     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9633     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9634     /* Temporary release of STR_PTR. */
9635     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9636     /* Handles both invalid and empty cases. Since the minimum repeat,
9637     is zero the invalid case is basically the same as an empty case. */
9638     if (ref)
9639       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9640     else
9641       {
9642       compile_dnref_search(common, ccbegin, NULL);
9643       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9644       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9645       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9646       }
9647     /* Restore if not zero length. */
9648     OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9649     }
9650   else
9651     {
9652     allocate_stack(common, 1);
9653     if (ref)
9654       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9655     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9656 
9657     if (ref)
9658       {
9659       if (!common->unset_backref)
9660         add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9661       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9662       }
9663     else
9664       {
9665       compile_dnref_search(common, ccbegin, &backtrack->own_backtracks);
9666       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9667       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9668       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9669       }
9670     }
9671 
9672   if (min > 1 || max > 1)
9673     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
9674 
9675   label = LABEL();
9676   if (!ref)
9677     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
9678   compile_ref_matchingpath(common, ccbegin, &backtrack->own_backtracks, FALSE, FALSE);
9679 
9680   if (min > 1 || max > 1)
9681     {
9682     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
9683     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9684     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
9685     if (min > 1)
9686       CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
9687     if (max > 1)
9688       {
9689       jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
9690       allocate_stack(common, 1);
9691       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9692       JUMPTO(SLJIT_JUMP, label);
9693       JUMPHERE(jump);
9694       }
9695     }
9696 
9697   if (max == 0)
9698     {
9699     /* Includes min > 1 case as well. */
9700     allocate_stack(common, 1);
9701     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9702     JUMPTO(SLJIT_JUMP, label);
9703     }
9704 
9705   JUMPHERE(zerolength);
9706   BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9707 
9708   count_match(common);
9709   return cc;
9710   }
9711 
9712 allocate_stack(common, ref ? 2 : 3);
9713 if (ref)
9714   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9715 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9716 if (type != OP_CRMINSTAR)
9717   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9718 
9719 if (min == 0)
9720   {
9721   /* Handles both invalid and empty cases. Since the minimum repeat,
9722   is zero the invalid case is basically the same as an empty case. */
9723   if (ref)
9724     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9725   else
9726     {
9727     compile_dnref_search(common, ccbegin, NULL);
9728     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9729     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9730     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9731     }
9732   /* Length is non-zero, we can match real repeats. */
9733   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9734   jump = JUMP(SLJIT_JUMP);
9735   }
9736 else
9737   {
9738   if (ref)
9739     {
9740     if (!common->unset_backref)
9741       add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9742     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9743     }
9744   else
9745     {
9746     compile_dnref_search(common, ccbegin, &backtrack->own_backtracks);
9747     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9748     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9749     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9750     }
9751   }
9752 
9753 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9754 if (max > 0)
9755   add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
9756 
9757 if (!ref)
9758   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
9759 compile_ref_matchingpath(common, ccbegin, &backtrack->own_backtracks, TRUE, TRUE);
9760 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9761 
9762 if (min > 1)
9763   {
9764   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9765   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9766   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9767   CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
9768   }
9769 else if (max > 0)
9770   OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
9771 
9772 if (jump != NULL)
9773   JUMPHERE(jump);
9774 JUMPHERE(zerolength);
9775 
9776 count_match(common);
9777 return cc;
9778 }
9779 
compile_recurse_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9780 static SLJIT_INLINE PCRE2_SPTR compile_recurse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9781 {
9782 DEFINE_COMPILER;
9783 backtrack_common *backtrack;
9784 recurse_entry *entry = common->entries;
9785 recurse_entry *prev = NULL;
9786 sljit_sw start = GET(cc, 1);
9787 PCRE2_SPTR start_cc;
9788 BOOL needs_control_head;
9789 
9790 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
9791 
9792 /* Inlining simple patterns. */
9793 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
9794   {
9795   start_cc = common->start + start;
9796   compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
9797   BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
9798   return cc + 1 + LINK_SIZE;
9799   }
9800 
9801 while (entry != NULL)
9802   {
9803   if (entry->start == start)
9804     break;
9805   prev = entry;
9806   entry = entry->next;
9807   }
9808 
9809 if (entry == NULL)
9810   {
9811   entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
9812   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9813     return NULL;
9814   entry->next = NULL;
9815   entry->entry_label = NULL;
9816   entry->backtrack_label = NULL;
9817   entry->entry_calls = NULL;
9818   entry->backtrack_calls = NULL;
9819   entry->start = start;
9820 
9821   if (prev != NULL)
9822     prev->next = entry;
9823   else
9824     common->entries = entry;
9825   }
9826 
9827 BACKTRACK_AS(recurse_backtrack)->entry = entry;
9828 
9829 if (entry->entry_label == NULL)
9830   add_jump(compiler, &entry->entry_calls, JUMP(SLJIT_FAST_CALL));
9831 else
9832   JUMPTO(SLJIT_FAST_CALL, entry->entry_label);
9833 /* Leave if the match is failed. */
9834 add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
9835 BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL();
9836 return cc + 1 + LINK_SIZE;
9837 }
9838 
do_callout_jit(struct jit_arguments * arguments,pcre2_callout_block * callout_block,PCRE2_SPTR * jit_ovector)9839 static sljit_s32 SLJIT_FUNC SLJIT_FUNC_ATTRIBUTE do_callout_jit(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
9840 {
9841 PCRE2_SPTR begin;
9842 PCRE2_SIZE *ovector;
9843 sljit_u32 oveccount, capture_top;
9844 
9845 if (arguments->callout == NULL)
9846   return 0;
9847 
9848 SLJIT_COMPILE_ASSERT(sizeof (PCRE2_SIZE) <= sizeof (sljit_sw), pcre2_size_must_be_lower_than_sljit_sw_size);
9849 
9850 begin = arguments->begin;
9851 ovector = (PCRE2_SIZE*)(callout_block + 1);
9852 oveccount = callout_block->capture_top;
9853 
9854 SLJIT_ASSERT(oveccount >= 1);
9855 
9856 callout_block->version = 2;
9857 callout_block->callout_flags = 0;
9858 
9859 /* Offsets in subject. */
9860 callout_block->subject_length = arguments->end - arguments->begin;
9861 callout_block->start_match = jit_ovector[0] - begin;
9862 callout_block->current_position = (PCRE2_SPTR)callout_block->offset_vector - begin;
9863 callout_block->subject = begin;
9864 
9865 /* Convert and copy the JIT offset vector to the ovector array. */
9866 callout_block->capture_top = 1;
9867 callout_block->offset_vector = ovector;
9868 
9869 ovector[0] = PCRE2_UNSET;
9870 ovector[1] = PCRE2_UNSET;
9871 ovector += 2;
9872 jit_ovector += 2;
9873 capture_top = 1;
9874 
9875 /* Convert pointers to sizes. */
9876 while (--oveccount != 0)
9877   {
9878   capture_top++;
9879 
9880   ovector[0] = (PCRE2_SIZE)(jit_ovector[0] - begin);
9881   ovector[1] = (PCRE2_SIZE)(jit_ovector[1] - begin);
9882 
9883   if (ovector[0] != PCRE2_UNSET)
9884     callout_block->capture_top = capture_top;
9885 
9886   ovector += 2;
9887   jit_ovector += 2;
9888   }
9889 
9890 return (arguments->callout)(callout_block, arguments->callout_data);
9891 }
9892 
9893 #define CALLOUT_ARG_OFFSET(arg) \
9894     SLJIT_OFFSETOF(pcre2_callout_block, arg)
9895 
compile_callout_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9896 static SLJIT_INLINE PCRE2_SPTR compile_callout_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9897 {
9898 DEFINE_COMPILER;
9899 backtrack_common *backtrack;
9900 sljit_s32 mov_opcode;
9901 unsigned int callout_length = (*cc == OP_CALLOUT)
9902     ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);
9903 sljit_sw value1;
9904 sljit_sw value2;
9905 sljit_sw value3;
9906 sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * SSIZE_OF(sw);
9907 
9908 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
9909 
9910 callout_arg_size = (sizeof(pcre2_callout_block) + callout_arg_size + sizeof(sljit_sw) - 1) / sizeof(sljit_sw);
9911 
9912 allocate_stack(common, callout_arg_size);
9913 
9914 SLJIT_ASSERT(common->capture_last_ptr != 0);
9915 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
9916 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9917 value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0;
9918 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1);
9919 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
9920 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_top), SLJIT_IMM, common->re->top_bracket + 1);
9921 
9922 /* These pointer sized fields temporarly stores internal variables. */
9923 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
9924 
9925 if (common->mark_ptr != 0)
9926   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
9927 mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
9928 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 1));
9929 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 1 + LINK_SIZE));
9930 
9931 if (*cc == OP_CALLOUT)
9932   {
9933   value1 = 0;
9934   value2 = 0;
9935   value3 = 0;
9936   }
9937 else
9938   {
9939   value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1);
9940   value2 = (callout_length - (1 + 4*LINK_SIZE + 2));
9941   value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE));
9942   }
9943 
9944 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);
9945 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);
9946 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3);
9947 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
9948 
9949 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
9950 
9951 /* Needed to save important temporary registers. */
9952 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
9953 /* SLJIT_R0 = arguments */
9954 OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
9955 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
9956 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(32, W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_callout_jit));
9957 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
9958 free_stack(common, callout_arg_size);
9959 
9960 /* Check return value. */
9961 OP2U(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
9962 add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_SIG_GREATER));
9963 if (common->abort_label == NULL)
9964   add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL) /* SIG_LESS */);
9965 else
9966   JUMPTO(SLJIT_NOT_EQUAL /* SIG_LESS */, common->abort_label);
9967 return cc + callout_length;
9968 }
9969 
9970 #undef CALLOUT_ARG_SIZE
9971 #undef CALLOUT_ARG_OFFSET
9972 
compile_reverse_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9973 static PCRE2_SPTR compile_reverse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9974 {
9975 DEFINE_COMPILER;
9976 backtrack_common *backtrack = NULL;
9977 jump_list **reverse_failed;
9978 unsigned int lmin, lmax;
9979 #ifdef SUPPORT_UNICODE
9980 struct sljit_jump *jump;
9981 struct sljit_label *label;
9982 #endif
9983 
9984 SLJIT_ASSERT(parent->top == NULL);
9985 
9986 if (*cc == OP_REVERSE)
9987   {
9988   reverse_failed = &parent->own_backtracks;
9989   lmin = GET2(cc, 1);
9990   lmax = lmin;
9991   cc += 1 + IMM2_SIZE;
9992 
9993   SLJIT_ASSERT(lmin > 0);
9994   }
9995 else
9996   {
9997   SLJIT_ASSERT(*cc == OP_VREVERSE);
9998   PUSH_BACKTRACK(sizeof(vreverse_backtrack), cc, NULL);
9999 
10000   reverse_failed = &backtrack->own_backtracks;
10001   lmin = GET2(cc, 1);
10002   lmax = GET2(cc, 1 + IMM2_SIZE);
10003   cc += 1 + 2 * IMM2_SIZE;
10004 
10005   SLJIT_ASSERT(lmin < lmax);
10006   }
10007 
10008 if (HAS_VIRTUAL_REGISTERS)
10009   {
10010   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
10011   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
10012   }
10013 else
10014   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
10015 
10016 #ifdef SUPPORT_UNICODE
10017 if (common->utf)
10018   {
10019   if (lmin > 0)
10020     {
10021     OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, lmin);
10022     label = LABEL();
10023     add_jump(compiler, reverse_failed, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0));
10024     move_back(common, reverse_failed, FALSE);
10025     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
10026     JUMPTO(SLJIT_NOT_ZERO, label);
10027     }
10028 
10029   if (lmin < lmax)
10030     {
10031     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
10032 
10033     OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, lmax - lmin);
10034     label = LABEL();
10035     jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
10036     move_back(common, reverse_failed, FALSE);
10037     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
10038     JUMPTO(SLJIT_NOT_ZERO, label);
10039 
10040     JUMPHERE(jump);
10041     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
10042     }
10043   }
10044 else
10045 #endif
10046   {
10047   if (lmin > 0)
10048     {
10049     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(lmin));
10050     add_jump(compiler, reverse_failed, CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0));
10051     }
10052 
10053   if (lmin < lmax)
10054     {
10055     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
10056 
10057     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(lmax - lmin));
10058     OP2U(SLJIT_SUB | SLJIT_SET_LESS, STR_PTR, 0, TMP2, 0);
10059     SELECT(SLJIT_LESS, STR_PTR, TMP2, 0, STR_PTR);
10060 
10061     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
10062     }
10063   }
10064 
10065 check_start_used_ptr(common);
10066 
10067 if (lmin < lmax)
10068   BACKTRACK_AS(vreverse_backtrack)->matchingpath = LABEL();
10069 
10070 return cc;
10071 }
10072 
assert_needs_str_ptr_saving(PCRE2_SPTR cc)10073 static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(PCRE2_SPTR cc)
10074 {
10075 while (TRUE)
10076   {
10077   switch (*cc)
10078     {
10079     case OP_CALLOUT_STR:
10080     cc += GET(cc, 1 + 2*LINK_SIZE);
10081     break;
10082 
10083     case OP_NOT_WORD_BOUNDARY:
10084     case OP_WORD_BOUNDARY:
10085     case OP_CIRC:
10086     case OP_CIRCM:
10087     case OP_DOLL:
10088     case OP_DOLLM:
10089     case OP_CALLOUT:
10090     case OP_ALT:
10091     case OP_NOT_UCP_WORD_BOUNDARY:
10092     case OP_UCP_WORD_BOUNDARY:
10093     cc += PRIV(OP_lengths)[*cc];
10094     break;
10095 
10096     case OP_KET:
10097     return FALSE;
10098 
10099     default:
10100     return TRUE;
10101     }
10102   }
10103 }
10104 
compile_assert_matchingpath(compiler_common * common,PCRE2_SPTR cc,assert_backtrack * backtrack,BOOL conditional)10105 static PCRE2_SPTR compile_assert_matchingpath(compiler_common *common, PCRE2_SPTR cc, assert_backtrack *backtrack, BOOL conditional)
10106 {
10107 DEFINE_COMPILER;
10108 int framesize;
10109 int extrasize;
10110 BOOL local_quit_available = FALSE;
10111 BOOL needs_control_head;
10112 BOOL end_block_size = 0;
10113 BOOL has_vreverse;
10114 int private_data_ptr;
10115 backtrack_common altbacktrack;
10116 PCRE2_SPTR ccbegin;
10117 PCRE2_UCHAR opcode;
10118 PCRE2_UCHAR bra = OP_BRA;
10119 jump_list *tmp = NULL;
10120 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.own_backtracks;
10121 jump_list **found;
10122 /* Saving previous accept variables. */
10123 BOOL save_local_quit_available = common->local_quit_available;
10124 BOOL save_in_positive_assertion = common->in_positive_assertion;
10125 then_trap_backtrack *save_then_trap = common->then_trap;
10126 struct sljit_label *save_quit_label = common->quit_label;
10127 struct sljit_label *save_accept_label = common->accept_label;
10128 jump_list *save_quit = common->quit;
10129 jump_list *save_positive_assertion_quit = common->positive_assertion_quit;
10130 jump_list *save_accept = common->accept;
10131 struct sljit_jump *jump;
10132 struct sljit_jump *brajump = NULL;
10133 
10134 /* Assert captures then. */
10135 common->then_trap = NULL;
10136 
10137 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
10138   {
10139   SLJIT_ASSERT(!conditional);
10140   bra = *cc;
10141   cc++;
10142   }
10143 
10144 private_data_ptr = PRIVATE_DATA(cc);
10145 SLJIT_ASSERT(private_data_ptr != 0);
10146 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
10147 backtrack->framesize = framesize;
10148 backtrack->private_data_ptr = private_data_ptr;
10149 opcode = *cc;
10150 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
10151 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
10152 ccbegin = cc;
10153 cc += GET(cc, 1);
10154 
10155 if (bra == OP_BRAMINZERO)
10156   {
10157   /* This is a braminzero backtrack path. */
10158   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10159   free_stack(common, 1);
10160   brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10161   }
10162 
10163 if ((opcode == OP_ASSERTBACK || opcode == OP_ASSERTBACK_NOT) && find_vreverse(ccbegin))
10164   end_block_size = 3;
10165 
10166 if (framesize < 0)
10167   {
10168   extrasize = 1;
10169   if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
10170     extrasize = 0;
10171 
10172   extrasize += end_block_size;
10173 
10174   if (needs_control_head)
10175     extrasize++;
10176 
10177   if (framesize == no_frame)
10178     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10179 
10180   if (extrasize > 0)
10181     allocate_stack(common, extrasize);
10182 
10183   if (needs_control_head)
10184     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10185 
10186   if (extrasize > 0)
10187     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10188 
10189   if (needs_control_head)
10190     {
10191     SLJIT_ASSERT(extrasize == end_block_size + 2);
10192     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
10193     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP1, 0);
10194     }
10195   }
10196 else
10197   {
10198   extrasize = (needs_control_head ? 3 : 2) + end_block_size;
10199 
10200   OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
10201   allocate_stack(common, framesize + extrasize);
10202 
10203   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10204   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10205   if (needs_control_head)
10206     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10207   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10208 
10209   if (needs_control_head)
10210     {
10211     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 2), TMP1, 0);
10212     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP2, 0);
10213     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
10214     }
10215   else
10216     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP1, 0);
10217 
10218   init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize);
10219   }
10220 
10221 if (end_block_size > 0)
10222   {
10223   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_END, 0);
10224   OP1(SLJIT_MOV, STR_END, 0, STR_PTR, 0);
10225   }
10226 
10227 memset(&altbacktrack, 0, sizeof(backtrack_common));
10228 if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT))
10229   {
10230   /* Control verbs cannot escape from these asserts. */
10231   local_quit_available = TRUE;
10232   common->local_quit_available = TRUE;
10233   common->quit_label = NULL;
10234   common->quit = NULL;
10235   }
10236 
10237 common->in_positive_assertion = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK);
10238 common->positive_assertion_quit = NULL;
10239 
10240 while (1)
10241   {
10242   common->accept_label = NULL;
10243   common->accept = NULL;
10244   altbacktrack.top = NULL;
10245   altbacktrack.own_backtracks = NULL;
10246 
10247   if (*ccbegin == OP_ALT && extrasize > 0)
10248     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10249 
10250   altbacktrack.cc = ccbegin;
10251   ccbegin += 1 + LINK_SIZE;
10252 
10253   has_vreverse = (*ccbegin == OP_VREVERSE);
10254   if (*ccbegin == OP_REVERSE || has_vreverse)
10255     ccbegin = compile_reverse_matchingpath(common, ccbegin, &altbacktrack);
10256 
10257   compile_matchingpath(common, ccbegin, cc, &altbacktrack);
10258   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10259     {
10260     if (local_quit_available)
10261       {
10262       common->local_quit_available = save_local_quit_available;
10263       common->quit_label = save_quit_label;
10264       common->quit = save_quit;
10265       }
10266     common->in_positive_assertion = save_in_positive_assertion;
10267     common->then_trap = save_then_trap;
10268     common->accept_label = save_accept_label;
10269     common->positive_assertion_quit = save_positive_assertion_quit;
10270     common->accept = save_accept;
10271     return NULL;
10272     }
10273 
10274   if (has_vreverse)
10275     {
10276     SLJIT_ASSERT(altbacktrack.top != NULL);
10277     add_jump(compiler, &altbacktrack.top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
10278     }
10279 
10280   common->accept_label = LABEL();
10281   if (common->accept != NULL)
10282     set_jumps(common->accept, common->accept_label);
10283 
10284   /* Reset stack. */
10285   if (framesize < 0)
10286     {
10287     if (framesize == no_frame)
10288       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10289     else if (extrasize > 0)
10290       free_stack(common, extrasize);
10291 
10292     if (end_block_size > 0)
10293       OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
10294 
10295     if (needs_control_head)
10296       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10297     }
10298   else
10299     {
10300     if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
10301       {
10302       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10303       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
10304 
10305       if (end_block_size > 0)
10306         OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 2));
10307 
10308       if (needs_control_head)
10309         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10310       }
10311     else
10312       {
10313       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10314 
10315       if (end_block_size > 0)
10316         OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize + 1));
10317 
10318       if (needs_control_head)
10319         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));
10320       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10321       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
10322       }
10323     }
10324 
10325   if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
10326     {
10327     /* We know that STR_PTR was stored on the top of the stack. */
10328     if (conditional)
10329       {
10330       if (extrasize > 0)
10331         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-end_block_size - (needs_control_head ? 2 : 1)));
10332       }
10333     else if (bra == OP_BRAZERO)
10334       {
10335       if (framesize < 0)
10336         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
10337       else
10338         {
10339         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
10340         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize));
10341         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10342         }
10343       OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10344       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10345       }
10346     else if (framesize >= 0)
10347       {
10348       /* For OP_BRA and OP_BRAMINZERO. */
10349       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
10350       }
10351     }
10352   add_jump(compiler, found, JUMP(SLJIT_JUMP));
10353 
10354   compile_backtrackingpath(common, altbacktrack.top);
10355   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10356     {
10357     if (local_quit_available)
10358       {
10359       common->local_quit_available = save_local_quit_available;
10360       common->quit_label = save_quit_label;
10361       common->quit = save_quit;
10362       }
10363     common->in_positive_assertion = save_in_positive_assertion;
10364     common->then_trap = save_then_trap;
10365     common->accept_label = save_accept_label;
10366     common->positive_assertion_quit = save_positive_assertion_quit;
10367     common->accept = save_accept;
10368     return NULL;
10369     }
10370   set_jumps(altbacktrack.own_backtracks, LABEL());
10371 
10372   if (*cc != OP_ALT)
10373     break;
10374 
10375   ccbegin = cc;
10376   cc += GET(cc, 1);
10377   }
10378 
10379 if (local_quit_available)
10380   {
10381   SLJIT_ASSERT(common->positive_assertion_quit == NULL);
10382   /* Makes the check less complicated below. */
10383   common->positive_assertion_quit = common->quit;
10384   }
10385 
10386 /* None of them matched. */
10387 if (common->positive_assertion_quit != NULL)
10388   {
10389   jump = JUMP(SLJIT_JUMP);
10390   set_jumps(common->positive_assertion_quit, LABEL());
10391   SLJIT_ASSERT(framesize != no_stack);
10392   if (framesize < 0)
10393     OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
10394   else
10395     {
10396     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10397     add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10398     OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (extrasize + 1) * sizeof(sljit_sw));
10399     }
10400   JUMPHERE(jump);
10401   }
10402 
10403 if (end_block_size > 0)
10404   OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10405 
10406 if (needs_control_head)
10407   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1));
10408 
10409 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
10410   {
10411   /* Assert is failed. */
10412   if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
10413     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10414 
10415   if (framesize < 0)
10416     {
10417     /* The topmost item should be 0. */
10418     if (bra == OP_BRAZERO)
10419       {
10420       if (extrasize >= 2)
10421         free_stack(common, extrasize - 1);
10422       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10423       }
10424     else if (extrasize > 0)
10425       free_stack(common, extrasize);
10426     }
10427   else
10428     {
10429     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
10430     /* The topmost item should be 0. */
10431     if (bra == OP_BRAZERO)
10432       {
10433       free_stack(common, framesize + extrasize - 1);
10434       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10435       }
10436     else
10437       free_stack(common, framesize + extrasize);
10438     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10439     }
10440   jump = JUMP(SLJIT_JUMP);
10441   if (bra != OP_BRAZERO)
10442     add_jump(compiler, target, jump);
10443 
10444   /* Assert is successful. */
10445   set_jumps(tmp, LABEL());
10446   if (framesize < 0)
10447     {
10448     /* We know that STR_PTR was stored on the top of the stack. */
10449     if (extrasize > 0)
10450       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
10451 
10452     /* Keep the STR_PTR on the top of the stack. */
10453     if (bra == OP_BRAZERO)
10454       {
10455       /* This allocation is always successful. */
10456       OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10457       if (extrasize >= 2)
10458         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10459       }
10460     else if (bra == OP_BRAMINZERO)
10461       {
10462       OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10463       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10464       }
10465     }
10466   else
10467     {
10468     if (bra == OP_BRA)
10469       {
10470       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10471       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
10472       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
10473       }
10474     else
10475       {
10476       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10477       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + end_block_size + 2) * sizeof(sljit_sw));
10478 
10479       if (extrasize == 2 + end_block_size)
10480         {
10481         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10482         if (bra == OP_BRAMINZERO)
10483           OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10484         }
10485       else
10486         {
10487         SLJIT_ASSERT(extrasize == 3 + end_block_size);
10488         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10489         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
10490         }
10491       }
10492     }
10493 
10494   if (bra == OP_BRAZERO)
10495     {
10496     backtrack->matchingpath = LABEL();
10497     SET_LABEL(jump, backtrack->matchingpath);
10498     }
10499   else if (bra == OP_BRAMINZERO)
10500     {
10501     JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
10502     JUMPHERE(brajump);
10503     if (framesize >= 0)
10504       {
10505       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10506       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10507       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
10508       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
10509       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10510       }
10511     set_jumps(backtrack->common.own_backtracks, LABEL());
10512     }
10513   }
10514 else
10515   {
10516   /* AssertNot is successful. */
10517   if (framesize < 0)
10518     {
10519     if (extrasize > 0)
10520       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10521 
10522     if (bra != OP_BRA)
10523       {
10524       if (extrasize >= 2)
10525         free_stack(common, extrasize - 1);
10526       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10527       }
10528     else if (extrasize > 0)
10529       free_stack(common, extrasize);
10530     }
10531   else
10532     {
10533     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10534     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
10535     /* The topmost item should be 0. */
10536     if (bra != OP_BRA)
10537       {
10538       free_stack(common, framesize + extrasize - 1);
10539       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10540       }
10541     else
10542       free_stack(common, framesize + extrasize);
10543     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10544     }
10545 
10546   if (bra == OP_BRAZERO)
10547     backtrack->matchingpath = LABEL();
10548   else if (bra == OP_BRAMINZERO)
10549     {
10550     JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
10551     JUMPHERE(brajump);
10552     }
10553 
10554   if (bra != OP_BRA)
10555     {
10556     SLJIT_ASSERT(found == &backtrack->common.own_backtracks);
10557     set_jumps(backtrack->common.own_backtracks, LABEL());
10558     backtrack->common.own_backtracks = NULL;
10559     }
10560   }
10561 
10562 if (local_quit_available)
10563   {
10564   common->local_quit_available = save_local_quit_available;
10565   common->quit_label = save_quit_label;
10566   common->quit = save_quit;
10567   }
10568 common->in_positive_assertion = save_in_positive_assertion;
10569 common->then_trap = save_then_trap;
10570 common->accept_label = save_accept_label;
10571 common->positive_assertion_quit = save_positive_assertion_quit;
10572 common->accept = save_accept;
10573 return cc + 1 + LINK_SIZE;
10574 }
10575 
match_once_common(compiler_common * common,PCRE2_UCHAR ket,int framesize,int private_data_ptr,BOOL has_alternatives,BOOL needs_control_head)10576 static SLJIT_INLINE void match_once_common(compiler_common *common, PCRE2_UCHAR ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
10577 {
10578 DEFINE_COMPILER;
10579 int stacksize;
10580 
10581 if (framesize < 0)
10582   {
10583   if (framesize == no_frame)
10584     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10585   else
10586     {
10587     stacksize = needs_control_head ? 1 : 0;
10588     if (ket != OP_KET || has_alternatives)
10589       stacksize++;
10590 
10591     if (stacksize > 0)
10592       free_stack(common, stacksize);
10593     }
10594 
10595   if (needs_control_head)
10596     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1));
10597 
10598   /* TMP2 which is set here used by OP_KETRMAX below. */
10599   if (ket == OP_KETRMAX)
10600     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10601   else if (ket == OP_KETRMIN)
10602     {
10603     /* Move the STR_PTR to the private_data_ptr. */
10604     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10605     }
10606   }
10607 else
10608   {
10609   stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
10610   OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
10611   if (needs_control_head)
10612     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10613 
10614   if (ket == OP_KETRMAX)
10615     {
10616     /* TMP2 which is set here used by OP_KETRMAX below. */
10617     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10618     }
10619   }
10620 if (needs_control_head)
10621   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
10622 }
10623 
match_capture_common(compiler_common * common,int stacksize,int offset,int private_data_ptr)10624 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
10625 {
10626 DEFINE_COMPILER;
10627 
10628 if (common->capture_last_ptr != 0)
10629   {
10630   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
10631   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10632   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10633   stacksize++;
10634   }
10635 if (common->optimized_cbracket[offset >> 1] == 0)
10636   {
10637   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
10638   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
10639   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10640   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10641   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
10642   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10643   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10644   stacksize += 2;
10645   }
10646 return stacksize;
10647 }
10648 
do_script_run(PCRE2_SPTR ptr,PCRE2_SPTR endptr)10649 static PCRE2_SPTR SLJIT_FUNC do_script_run(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10650 {
10651   if (PRIV(script_run)(ptr, endptr, FALSE))
10652     return endptr;
10653   return NULL;
10654 }
10655 
10656 #ifdef SUPPORT_UNICODE
10657 
do_script_run_utf(PCRE2_SPTR ptr,PCRE2_SPTR endptr)10658 static PCRE2_SPTR SLJIT_FUNC do_script_run_utf(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10659 {
10660   if (PRIV(script_run)(ptr, endptr, TRUE))
10661     return endptr;
10662   return NULL;
10663 }
10664 
10665 #endif /* SUPPORT_UNICODE */
10666 
match_script_run_common(compiler_common * common,int private_data_ptr,backtrack_common * parent)10667 static void match_script_run_common(compiler_common *common, int private_data_ptr, backtrack_common *parent)
10668 {
10669 DEFINE_COMPILER;
10670 
10671 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
10672 
10673 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10674 #ifdef SUPPORT_UNICODE
10675 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
10676   common->utf ? SLJIT_FUNC_ADDR(do_script_run_utf) : SLJIT_FUNC_ADDR(do_script_run));
10677 #else
10678 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_script_run));
10679 #endif
10680 
10681 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
10682 add_jump(compiler, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
10683 }
10684 
10685 /*
10686   Handling bracketed expressions is probably the most complex part.
10687 
10688   Stack layout naming characters:
10689     S - Push the current STR_PTR
10690     0 - Push a 0 (NULL)
10691     A - Push the current STR_PTR. Needed for restoring the STR_PTR
10692         before the next alternative. Not pushed if there are no alternatives.
10693     M - Any values pushed by the current alternative. Can be empty, or anything.
10694     C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
10695     L - Push the previous local (pointed by localptr) to the stack
10696    () - opional values stored on the stack
10697   ()* - optonal, can be stored multiple times
10698 
10699   The following list shows the regular expression templates, their PCRE byte codes
10700   and stack layout supported by pcre-sljit.
10701 
10702   (?:)                     OP_BRA     | OP_KET                A M
10703   ()                       OP_CBRA    | OP_KET                C M
10704   (?:)+                    OP_BRA     | OP_KETRMAX        0   A M S   ( A M S )*
10705                            OP_SBRA    | OP_KETRMAX        0   L M S   ( L M S )*
10706   (?:)+?                   OP_BRA     | OP_KETRMIN        0   A M S   ( A M S )*
10707                            OP_SBRA    | OP_KETRMIN        0   L M S   ( L M S )*
10708   ()+                      OP_CBRA    | OP_KETRMAX        0   C M S   ( C M S )*
10709                            OP_SCBRA   | OP_KETRMAX        0   C M S   ( C M S )*
10710   ()+?                     OP_CBRA    | OP_KETRMIN        0   C M S   ( C M S )*
10711                            OP_SCBRA   | OP_KETRMIN        0   C M S   ( C M S )*
10712   (?:)?    OP_BRAZERO    | OP_BRA     | OP_KET            S ( A M 0 )
10713   (?:)??   OP_BRAMINZERO | OP_BRA     | OP_KET            S ( A M 0 )
10714   ()?      OP_BRAZERO    | OP_CBRA    | OP_KET            S ( C M 0 )
10715   ()??     OP_BRAMINZERO | OP_CBRA    | OP_KET            S ( C M 0 )
10716   (?:)*    OP_BRAZERO    | OP_BRA     | OP_KETRMAX      S 0 ( A M S )*
10717            OP_BRAZERO    | OP_SBRA    | OP_KETRMAX      S 0 ( L M S )*
10718   (?:)*?   OP_BRAMINZERO | OP_BRA     | OP_KETRMIN      S 0 ( A M S )*
10719            OP_BRAMINZERO | OP_SBRA    | OP_KETRMIN      S 0 ( L M S )*
10720   ()*      OP_BRAZERO    | OP_CBRA    | OP_KETRMAX      S 0 ( C M S )*
10721            OP_BRAZERO    | OP_SCBRA   | OP_KETRMAX      S 0 ( C M S )*
10722   ()*?     OP_BRAMINZERO | OP_CBRA    | OP_KETRMIN      S 0 ( C M S )*
10723            OP_BRAMINZERO | OP_SCBRA   | OP_KETRMIN      S 0 ( C M S )*
10724 
10725 
10726   Stack layout naming characters:
10727     A - Push the alternative index (starting from 0) on the stack.
10728         Not pushed if there is no alternatives.
10729     M - Any values pushed by the current alternative. Can be empty, or anything.
10730 
10731   The next list shows the possible content of a bracket:
10732   (|)     OP_*BRA    | OP_ALT ...         M A
10733   (?()|)  OP_*COND   | OP_ALT             M A
10734   (?>|)   OP_ONCE    | OP_ALT ...         [stack trace] M A
10735                                           Or nothing, if trace is unnecessary
10736 */
10737 
compile_bracket_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)10738 static PCRE2_SPTR compile_bracket_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10739 {
10740 DEFINE_COMPILER;
10741 backtrack_common *backtrack;
10742 PCRE2_UCHAR opcode;
10743 int private_data_ptr = 0;
10744 int offset = 0;
10745 int i, stacksize;
10746 int repeat_ptr = 0, repeat_length = 0;
10747 int repeat_type = 0, repeat_count = 0;
10748 PCRE2_SPTR ccbegin;
10749 PCRE2_SPTR matchingpath;
10750 PCRE2_SPTR slot;
10751 PCRE2_UCHAR bra = OP_BRA;
10752 PCRE2_UCHAR ket;
10753 assert_backtrack *assert;
10754 BOOL has_alternatives;
10755 BOOL needs_control_head = FALSE;
10756 BOOL has_vreverse = FALSE;
10757 struct sljit_jump *jump;
10758 struct sljit_jump *skip;
10759 struct sljit_label *rmax_label = NULL;
10760 struct sljit_jump *braminzero = NULL;
10761 
10762 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
10763 
10764 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
10765   {
10766   bra = *cc;
10767   cc++;
10768   opcode = *cc;
10769   }
10770 
10771 opcode = *cc;
10772 ccbegin = cc;
10773 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
10774 ket = *matchingpath;
10775 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
10776   {
10777   repeat_ptr = PRIVATE_DATA(matchingpath);
10778   repeat_length = PRIVATE_DATA(matchingpath + 1);
10779   repeat_type = PRIVATE_DATA(matchingpath + 2);
10780   repeat_count = PRIVATE_DATA(matchingpath + 3);
10781   SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
10782   if (repeat_type == OP_UPTO)
10783     ket = OP_KETRMAX;
10784   if (repeat_type == OP_MINUPTO)
10785     ket = OP_KETRMIN;
10786   }
10787 
10788 matchingpath = ccbegin + 1 + LINK_SIZE;
10789 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
10790 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
10791 cc += GET(cc, 1);
10792 
10793 has_alternatives = *cc == OP_ALT;
10794 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
10795   {
10796   SLJIT_COMPILE_ASSERT(OP_DNRREF == OP_RREF + 1 && OP_FALSE == OP_RREF + 2 && OP_TRUE == OP_RREF + 3,
10797     compile_time_checks_must_be_grouped_together);
10798   has_alternatives = ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL) ? FALSE : TRUE;
10799   }
10800 
10801 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
10802   opcode = OP_SCOND;
10803 
10804 if (opcode == OP_CBRA || opcode == OP_SCBRA)
10805   {
10806   /* Capturing brackets has a pre-allocated space. */
10807   offset = GET2(ccbegin, 1 + LINK_SIZE);
10808   if (common->optimized_cbracket[offset] == 0)
10809     {
10810     private_data_ptr = OVECTOR_PRIV(offset);
10811     offset <<= 1;
10812     }
10813   else
10814     {
10815     offset <<= 1;
10816     private_data_ptr = OVECTOR(offset);
10817     }
10818   BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10819   matchingpath += IMM2_SIZE;
10820   }
10821 else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_ONCE || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
10822   {
10823   /* Other brackets simply allocate the next entry. */
10824   private_data_ptr = PRIVATE_DATA(ccbegin);
10825   SLJIT_ASSERT(private_data_ptr != 0);
10826   BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10827   if (opcode == OP_ONCE)
10828     BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
10829   }
10830 
10831 /* Instructions before the first alternative. */
10832 stacksize = 0;
10833 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10834   stacksize++;
10835 if (bra == OP_BRAZERO)
10836   stacksize++;
10837 
10838 if (stacksize > 0)
10839   allocate_stack(common, stacksize);
10840 
10841 stacksize = 0;
10842 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10843   {
10844   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10845   stacksize++;
10846   }
10847 
10848 if (bra == OP_BRAZERO)
10849   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10850 
10851 if (bra == OP_BRAMINZERO)
10852   {
10853   /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
10854   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10855   if (ket != OP_KETRMIN)
10856     {
10857     free_stack(common, 1);
10858     braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10859     }
10860   else if (opcode == OP_ONCE || opcode >= OP_SBRA)
10861     {
10862     jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10863     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10864     /* Nothing stored during the first run. */
10865     skip = JUMP(SLJIT_JUMP);
10866     JUMPHERE(jump);
10867     /* Checking zero-length iteration. */
10868     if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10869       {
10870       /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
10871       braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10872       }
10873     else
10874       {
10875       /* Except when the whole stack frame must be saved. */
10876       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10877       braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));
10878       }
10879     JUMPHERE(skip);
10880     }
10881   else
10882     {
10883     jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10884     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10885     JUMPHERE(jump);
10886     }
10887   }
10888 
10889 if (repeat_type != 0)
10890   {
10891   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
10892   if (repeat_type == OP_EXACT)
10893     rmax_label = LABEL();
10894   }
10895 
10896 if (ket == OP_KETRMIN)
10897   BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10898 
10899 if (ket == OP_KETRMAX)
10900   {
10901   rmax_label = LABEL();
10902   if (has_alternatives && opcode >= OP_BRA && opcode < OP_SBRA && repeat_type == 0)
10903     BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
10904   }
10905 
10906 /* Handling capturing brackets and alternatives. */
10907 if (opcode == OP_ONCE)
10908   {
10909   stacksize = 0;
10910   if (needs_control_head)
10911     {
10912     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10913     stacksize++;
10914     }
10915 
10916   if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10917     {
10918     /* Neither capturing brackets nor recursions are found in the block. */
10919     if (ket == OP_KETRMIN)
10920       {
10921       stacksize += 2;
10922       if (!needs_control_head)
10923         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10924       }
10925     else
10926       {
10927       if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
10928         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10929       if (ket == OP_KETRMAX || has_alternatives)
10930         stacksize++;
10931       }
10932 
10933     if (stacksize > 0)
10934       allocate_stack(common, stacksize);
10935 
10936     stacksize = 0;
10937     if (needs_control_head)
10938       {
10939       stacksize++;
10940       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10941       }
10942 
10943     if (ket == OP_KETRMIN)
10944       {
10945       if (needs_control_head)
10946         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10947       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10948       if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
10949         OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
10950       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
10951       }
10952     else if (ket == OP_KETRMAX || has_alternatives)
10953       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10954     }
10955   else
10956     {
10957     if (ket != OP_KET || has_alternatives)
10958       stacksize++;
10959 
10960     stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
10961     allocate_stack(common, stacksize);
10962 
10963     if (needs_control_head)
10964       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10965 
10966     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10967     OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10968 
10969     stacksize = needs_control_head ? 1 : 0;
10970     if (ket != OP_KET || has_alternatives)
10971       {
10972       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10973       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10974       stacksize++;
10975       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10976       }
10977     else
10978       {
10979       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10980       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10981       }
10982     init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1);
10983     }
10984   }
10985 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
10986   {
10987   /* Saving the previous values. */
10988   if (common->optimized_cbracket[offset >> 1] != 0)
10989     {
10990     SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
10991     allocate_stack(common, 2);
10992     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10993     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
10994     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10995     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
10996     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
10997     }
10998   else
10999     {
11000     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11001     allocate_stack(common, 1);
11002     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
11003     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11004     }
11005   }
11006 else if (opcode == OP_ASSERTBACK_NA && PRIVATE_DATA(ccbegin + 1))
11007   {
11008   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11009   allocate_stack(common, 4);
11010   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
11011   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
11012   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), STR_END, 0);
11013   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11014   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
11015   OP1(SLJIT_MOV, STR_END, 0, STR_PTR, 0);
11016 
11017   has_vreverse = (*matchingpath == OP_VREVERSE);
11018   if (*matchingpath == OP_REVERSE || has_vreverse)
11019     matchingpath = compile_reverse_matchingpath(common, matchingpath, backtrack);
11020   }
11021 else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
11022   {
11023   /* Saving the previous value. */
11024   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11025   allocate_stack(common, 1);
11026   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
11027   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11028 
11029   if (*matchingpath == OP_REVERSE)
11030     matchingpath = compile_reverse_matchingpath(common, matchingpath, backtrack);
11031   }
11032 else if (has_alternatives)
11033   {
11034   /* Pushing the starting string pointer. */
11035   allocate_stack(common, 1);
11036   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11037   }
11038 
11039 /* Generating code for the first alternative. */
11040 if (opcode == OP_COND || opcode == OP_SCOND)
11041   {
11042   if (*matchingpath == OP_CREF)
11043     {
11044     SLJIT_ASSERT(has_alternatives);
11045     add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
11046       CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
11047     matchingpath += 1 + IMM2_SIZE;
11048     }
11049   else if (*matchingpath == OP_DNCREF)
11050     {
11051     SLJIT_ASSERT(has_alternatives);
11052 
11053     i = GET2(matchingpath, 1 + IMM2_SIZE);
11054     slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
11055     OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
11056     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
11057     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
11058     slot += common->name_entry_size;
11059     i--;
11060     while (i-- > 0)
11061       {
11062       OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
11063       OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);
11064       slot += common->name_entry_size;
11065       }
11066     OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
11067     add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
11068     matchingpath += 1 + 2 * IMM2_SIZE;
11069     }
11070   else if ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL)
11071     {
11072     /* Never has other case. */
11073     BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
11074     SLJIT_ASSERT(!has_alternatives);
11075 
11076     if (*matchingpath == OP_TRUE)
11077       {
11078       stacksize = 1;
11079       matchingpath++;
11080       }
11081     else if (*matchingpath == OP_FALSE || *matchingpath == OP_FAIL)
11082       stacksize = 0;
11083     else if (*matchingpath == OP_RREF)
11084       {
11085       stacksize = GET2(matchingpath, 1);
11086       if (common->currententry == NULL)
11087         stacksize = 0;
11088       else if (stacksize == RREF_ANY)
11089         stacksize = 1;
11090       else if (common->currententry->start == 0)
11091         stacksize = stacksize == 0;
11092       else
11093         stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
11094 
11095       if (stacksize != 0)
11096         matchingpath += 1 + IMM2_SIZE;
11097       }
11098     else
11099       {
11100       if (common->currententry == NULL || common->currententry->start == 0)
11101         stacksize = 0;
11102       else
11103         {
11104         stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
11105         slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
11106         i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
11107         while (stacksize > 0)
11108           {
11109           if ((int)GET2(slot, 0) == i)
11110             break;
11111           slot += common->name_entry_size;
11112           stacksize--;
11113           }
11114         }
11115 
11116       if (stacksize != 0)
11117         matchingpath += 1 + 2 * IMM2_SIZE;
11118       }
11119 
11120       /* The stacksize == 0 is a common "else" case. */
11121       if (stacksize == 0)
11122         {
11123         if (*cc == OP_ALT)
11124           {
11125           matchingpath = cc + 1 + LINK_SIZE;
11126           cc += GET(cc, 1);
11127           }
11128         else
11129           matchingpath = cc;
11130         }
11131     }
11132   else
11133     {
11134     SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
11135     /* Similar code as PUSH_BACKTRACK macro. */
11136     assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
11137     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11138       return NULL;
11139     memset(assert, 0, sizeof(assert_backtrack));
11140     assert->common.cc = matchingpath;
11141     BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
11142     matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
11143     }
11144   }
11145 
11146 compile_matchingpath(common, matchingpath, cc, backtrack);
11147 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11148   return NULL;
11149 
11150 switch (opcode)
11151   {
11152   case OP_ASSERTBACK_NA:
11153     if (has_vreverse)
11154       {
11155       SLJIT_ASSERT(backtrack->top != NULL && PRIVATE_DATA(ccbegin + 1));
11156       add_jump(compiler, &backtrack->top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
11157       }
11158 
11159     if (PRIVATE_DATA(ccbegin + 1))
11160       OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
11161     break;
11162   case OP_ASSERT_NA:
11163     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11164     break;
11165   case OP_ONCE:
11166     match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
11167     break;
11168   case OP_SCRIPT_RUN:
11169     match_script_run_common(common, private_data_ptr, backtrack);
11170     break;
11171   }
11172 
11173 stacksize = 0;
11174 if (repeat_type == OP_MINUPTO)
11175   {
11176   /* We need to preserve the counter. TMP2 will be used below. */
11177   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
11178   stacksize++;
11179   }
11180 if (ket != OP_KET || bra != OP_BRA)
11181   stacksize++;
11182 if (offset != 0)
11183   {
11184   if (common->capture_last_ptr != 0)
11185     stacksize++;
11186   if (common->optimized_cbracket[offset >> 1] == 0)
11187     stacksize += 2;
11188   }
11189 if (has_alternatives && opcode != OP_ONCE)
11190   stacksize++;
11191 
11192 if (stacksize > 0)
11193   allocate_stack(common, stacksize);
11194 
11195 stacksize = 0;
11196 if (repeat_type == OP_MINUPTO)
11197   {
11198   /* TMP2 was set above. */
11199   OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
11200   stacksize++;
11201   }
11202 
11203 if (ket != OP_KET || bra != OP_BRA)
11204   {
11205   if (ket != OP_KET)
11206     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
11207   else
11208     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
11209   stacksize++;
11210   }
11211 
11212 if (offset != 0)
11213   stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
11214 
11215 /* Skip and count the other alternatives. */
11216 i = 1;
11217 while (*cc == OP_ALT)
11218   {
11219   cc += GET(cc, 1);
11220   i++;
11221   }
11222 
11223 if (has_alternatives)
11224   {
11225   if (opcode != OP_ONCE)
11226     {
11227     if (i <= 3)
11228       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
11229     else
11230       BACKTRACK_AS(bracket_backtrack)->u.matching_put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
11231     }
11232   if (ket != OP_KETRMAX)
11233     BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
11234   }
11235 
11236 /* Must be after the matchingpath label. */
11237 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
11238   {
11239   SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
11240   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11241   }
11242 
11243 if (ket == OP_KETRMAX)
11244   {
11245   if (repeat_type != 0)
11246     {
11247     if (has_alternatives)
11248       BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
11249     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
11250     JUMPTO(SLJIT_NOT_ZERO, rmax_label);
11251     /* Drop STR_PTR for greedy plus quantifier. */
11252     if (opcode != OP_ONCE)
11253       free_stack(common, 1);
11254     }
11255   else if (opcode < OP_BRA || opcode >= OP_SBRA)
11256     {
11257     if (has_alternatives)
11258       BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
11259 
11260     /* Checking zero-length iteration. */
11261     if (opcode != OP_ONCE)
11262       {
11263       /* This case includes opcodes such as OP_SCRIPT_RUN. */
11264       CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
11265       /* Drop STR_PTR for greedy plus quantifier. */
11266       if (bra != OP_BRAZERO)
11267         free_stack(common, 1);
11268       }
11269     else
11270       /* TMP2 must contain the starting STR_PTR. */
11271       CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
11272     }
11273   else
11274     JUMPTO(SLJIT_JUMP, rmax_label);
11275   BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
11276   }
11277 
11278 if (repeat_type == OP_EXACT)
11279   {
11280   count_match(common);
11281   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
11282   JUMPTO(SLJIT_NOT_ZERO, rmax_label);
11283   }
11284 else if (repeat_type == OP_UPTO)
11285   {
11286   /* We need to preserve the counter. */
11287   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
11288   allocate_stack(common, 1);
11289   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11290   }
11291 
11292 if (bra == OP_BRAZERO)
11293   BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
11294 
11295 if (bra == OP_BRAMINZERO)
11296   {
11297   /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
11298   JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
11299   if (braminzero != NULL)
11300     {
11301     JUMPHERE(braminzero);
11302     /* We need to release the end pointer to perform the
11303     backtrack for the zero-length iteration. When
11304     framesize is < 0, OP_ONCE will do the release itself. */
11305     if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
11306       {
11307       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11308       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
11309       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
11310       }
11311     else if (ket == OP_KETRMIN && opcode != OP_ONCE)
11312       free_stack(common, 1);
11313     }
11314   /* Continue to the normal backtrack. */
11315   }
11316 
11317 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
11318   count_match(common);
11319 
11320 cc += 1 + LINK_SIZE;
11321 
11322 if (opcode == OP_ONCE)
11323   {
11324   /* We temporarily encode the needs_control_head in the lowest bit.
11325      Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns
11326      the same value for small signed numbers (including negative numbers). */
11327   BACKTRACK_AS(bracket_backtrack)->u.framesize = (int)((unsigned)BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
11328   }
11329 return cc + repeat_length;
11330 }
11331 
compile_bracketpos_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11332 static PCRE2_SPTR compile_bracketpos_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11333 {
11334 DEFINE_COMPILER;
11335 backtrack_common *backtrack;
11336 PCRE2_UCHAR opcode;
11337 int private_data_ptr;
11338 int cbraprivptr = 0;
11339 BOOL needs_control_head;
11340 int framesize;
11341 int stacksize;
11342 int offset = 0;
11343 BOOL zero = FALSE;
11344 PCRE2_SPTR ccbegin = NULL;
11345 int stack; /* Also contains the offset of control head. */
11346 struct sljit_label *loop = NULL;
11347 struct jump_list *emptymatch = NULL;
11348 
11349 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
11350 if (*cc == OP_BRAPOSZERO)
11351   {
11352   zero = TRUE;
11353   cc++;
11354   }
11355 
11356 opcode = *cc;
11357 private_data_ptr = PRIVATE_DATA(cc);
11358 SLJIT_ASSERT(private_data_ptr != 0);
11359 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
11360 switch(opcode)
11361   {
11362   case OP_BRAPOS:
11363   case OP_SBRAPOS:
11364   ccbegin = cc + 1 + LINK_SIZE;
11365   break;
11366 
11367   case OP_CBRAPOS:
11368   case OP_SCBRAPOS:
11369   offset = GET2(cc, 1 + LINK_SIZE);
11370   /* This case cannot be optimized in the same way as
11371   normal capturing brackets. */
11372   SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
11373   cbraprivptr = OVECTOR_PRIV(offset);
11374   offset <<= 1;
11375   ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
11376   break;
11377 
11378   default:
11379   SLJIT_UNREACHABLE();
11380   break;
11381   }
11382 
11383 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
11384 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
11385 if (framesize < 0)
11386   {
11387   if (offset != 0)
11388     {
11389     stacksize = 2;
11390     if (common->capture_last_ptr != 0)
11391       stacksize++;
11392     }
11393   else
11394     stacksize = 1;
11395 
11396   if (needs_control_head)
11397     stacksize++;
11398   if (!zero)
11399     stacksize++;
11400 
11401   BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
11402   allocate_stack(common, stacksize);
11403   if (framesize == no_frame)
11404     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
11405 
11406   stack = 0;
11407   if (offset != 0)
11408     {
11409     stack = 2;
11410     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
11411     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
11412     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
11413     if (common->capture_last_ptr != 0)
11414       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
11415     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
11416     if (needs_control_head)
11417       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11418     if (common->capture_last_ptr != 0)
11419       {
11420       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
11421       stack = 3;
11422       }
11423     }
11424   else
11425     {
11426     if (needs_control_head)
11427       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11428     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11429     stack = 1;
11430     }
11431 
11432   if (needs_control_head)
11433     stack++;
11434   if (!zero)
11435     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
11436   if (needs_control_head)
11437     {
11438     stack--;
11439     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
11440     }
11441   }
11442 else
11443   {
11444   stacksize = framesize + 1;
11445   if (!zero)
11446     stacksize++;
11447   if (needs_control_head)
11448     stacksize++;
11449   if (offset == 0)
11450     stacksize++;
11451   BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
11452 
11453   allocate_stack(common, stacksize);
11454   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11455   if (needs_control_head)
11456     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11457   OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11458 
11459   stack = 0;
11460   if (!zero)
11461     {
11462     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
11463     stack = 1;
11464     }
11465   if (needs_control_head)
11466     {
11467     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
11468     stack++;
11469     }
11470   if (offset == 0)
11471     {
11472     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
11473     stack++;
11474     }
11475   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
11476   init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize);
11477   stack -= 1 + (offset == 0);
11478   }
11479 
11480 if (offset != 0)
11481   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11482 
11483 loop = LABEL();
11484 while (*cc != OP_KETRPOS)
11485   {
11486   backtrack->top = NULL;
11487   backtrack->own_backtracks = NULL;
11488   cc += GET(cc, 1);
11489 
11490   compile_matchingpath(common, ccbegin, cc, backtrack);
11491   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11492     return NULL;
11493 
11494   if (framesize < 0)
11495     {
11496     if (framesize == no_frame)
11497       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11498 
11499     if (offset != 0)
11500       {
11501       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11502       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11503       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11504       if (common->capture_last_ptr != 0)
11505         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
11506       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11507       }
11508     else
11509       {
11510       if (opcode == OP_SBRAPOS)
11511         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11512       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11513       }
11514 
11515     /* Even if the match is empty, we need to reset the control head. */
11516     if (needs_control_head)
11517       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
11518 
11519     if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
11520       add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
11521 
11522     if (!zero)
11523       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
11524     }
11525   else
11526     {
11527     if (offset != 0)
11528       {
11529       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11530       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11531       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11532       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11533       if (common->capture_last_ptr != 0)
11534         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
11535       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11536       }
11537     else
11538       {
11539       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11540       OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11541       if (opcode == OP_SBRAPOS)
11542         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
11543       OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0);
11544       }
11545 
11546     /* Even if the match is empty, we need to reset the control head. */
11547     if (needs_control_head)
11548       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
11549 
11550     if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
11551       add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
11552 
11553     if (!zero)
11554       {
11555       if (framesize < 0)
11556         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
11557       else
11558         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
11559       }
11560     }
11561 
11562   JUMPTO(SLJIT_JUMP, loop);
11563   flush_stubs(common);
11564 
11565   compile_backtrackingpath(common, backtrack->top);
11566   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11567     return NULL;
11568   set_jumps(backtrack->own_backtracks, LABEL());
11569 
11570   if (framesize < 0)
11571     {
11572     if (offset != 0)
11573       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11574     else
11575       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11576     }
11577   else
11578     {
11579     if (offset != 0)
11580       {
11581       /* Last alternative. */
11582       if (*cc == OP_KETRPOS)
11583         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11584       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11585       }
11586     else
11587       {
11588       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11589       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
11590       }
11591     }
11592 
11593   if (*cc == OP_KETRPOS)
11594     break;
11595   ccbegin = cc + 1 + LINK_SIZE;
11596   }
11597 
11598 /* We don't have to restore the control head in case of a failed match. */
11599 
11600 backtrack->own_backtracks = NULL;
11601 if (!zero)
11602   {
11603   if (framesize < 0)
11604     add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
11605   else /* TMP2 is set to [private_data_ptr] above. */
11606     add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));
11607   }
11608 
11609 /* None of them matched. */
11610 set_jumps(emptymatch, LABEL());
11611 count_match(common);
11612 return cc + 1 + LINK_SIZE;
11613 }
11614 
get_iterator_parameters(compiler_common * common,PCRE2_SPTR cc,PCRE2_UCHAR * opcode,PCRE2_UCHAR * type,sljit_u32 * max,sljit_u32 * exact,PCRE2_SPTR * end)11615 static SLJIT_INLINE PCRE2_SPTR get_iterator_parameters(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *opcode, PCRE2_UCHAR *type, sljit_u32 *max, sljit_u32 *exact, PCRE2_SPTR *end)
11616 {
11617 int class_len;
11618 
11619 *opcode = *cc;
11620 *exact = 0;
11621 
11622 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
11623   {
11624   cc++;
11625   *type = OP_CHAR;
11626   }
11627 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
11628   {
11629   cc++;
11630   *type = OP_CHARI;
11631   *opcode -= OP_STARI - OP_STAR;
11632   }
11633 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
11634   {
11635   cc++;
11636   *type = OP_NOT;
11637   *opcode -= OP_NOTSTAR - OP_STAR;
11638   }
11639 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
11640   {
11641   cc++;
11642   *type = OP_NOTI;
11643   *opcode -= OP_NOTSTARI - OP_STAR;
11644   }
11645 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
11646   {
11647   cc++;
11648   *opcode -= OP_TYPESTAR - OP_STAR;
11649   *type = OP_END;
11650   }
11651 else
11652   {
11653   SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
11654   *type = *opcode;
11655   cc++;
11656   class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(PCRE2_UCHAR))) : GET(cc, 0);
11657   *opcode = cc[class_len - 1];
11658 
11659   if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
11660     {
11661     *opcode -= OP_CRSTAR - OP_STAR;
11662     *end = cc + class_len;
11663 
11664     if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
11665       {
11666       *exact = 1;
11667       *opcode -= OP_PLUS - OP_STAR;
11668       }
11669     }
11670   else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
11671     {
11672     *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
11673     *end = cc + class_len;
11674 
11675     if (*opcode == OP_POSPLUS)
11676       {
11677       *exact = 1;
11678       *opcode = OP_POSSTAR;
11679       }
11680     }
11681   else
11682     {
11683     SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
11684     *max = GET2(cc, (class_len + IMM2_SIZE));
11685     *exact = GET2(cc, class_len);
11686 
11687     if (*max == 0)
11688       {
11689       if (*opcode == OP_CRPOSRANGE)
11690         *opcode = OP_POSSTAR;
11691       else
11692         *opcode -= OP_CRRANGE - OP_STAR;
11693       }
11694     else
11695       {
11696       *max -= *exact;
11697       if (*max == 0)
11698         *opcode = OP_EXACT;
11699       else if (*max == 1)
11700         {
11701         if (*opcode == OP_CRPOSRANGE)
11702           *opcode = OP_POSQUERY;
11703         else
11704           *opcode -= OP_CRRANGE - OP_QUERY;
11705         }
11706       else
11707         {
11708         if (*opcode == OP_CRPOSRANGE)
11709           *opcode = OP_POSUPTO;
11710         else
11711           *opcode -= OP_CRRANGE - OP_UPTO;
11712         }
11713       }
11714     *end = cc + class_len + 2 * IMM2_SIZE;
11715     }
11716   return cc;
11717   }
11718 
11719 switch(*opcode)
11720   {
11721   case OP_EXACT:
11722   *exact = GET2(cc, 0);
11723   cc += IMM2_SIZE;
11724   break;
11725 
11726   case OP_PLUS:
11727   case OP_MINPLUS:
11728   *exact = 1;
11729   *opcode -= OP_PLUS - OP_STAR;
11730   break;
11731 
11732   case OP_POSPLUS:
11733   *exact = 1;
11734   *opcode = OP_POSSTAR;
11735   break;
11736 
11737   case OP_UPTO:
11738   case OP_MINUPTO:
11739   case OP_POSUPTO:
11740   *max = GET2(cc, 0);
11741   cc += IMM2_SIZE;
11742   break;
11743   }
11744 
11745 if (*type == OP_END)
11746   {
11747   *type = *cc;
11748   *end = next_opcode(common, cc);
11749   cc++;
11750   return cc;
11751   }
11752 
11753 *end = cc + 1;
11754 #ifdef SUPPORT_UNICODE
11755 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
11756 #endif
11757 return cc;
11758 }
11759 
compile_iterator_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11760 static PCRE2_SPTR compile_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11761 {
11762 DEFINE_COMPILER;
11763 backtrack_common *backtrack;
11764 PCRE2_UCHAR opcode;
11765 PCRE2_UCHAR type;
11766 sljit_u32 max = 0, exact;
11767 sljit_s32 early_fail_ptr = PRIVATE_DATA(cc + 1);
11768 sljit_s32 early_fail_type;
11769 BOOL charpos_enabled;
11770 PCRE2_UCHAR charpos_char;
11771 unsigned int charpos_othercasebit;
11772 PCRE2_SPTR end;
11773 jump_list *no_match = NULL;
11774 jump_list *no_char1_match = NULL;
11775 struct sljit_jump *jump = NULL;
11776 struct sljit_label *label;
11777 int private_data_ptr = PRIVATE_DATA(cc);
11778 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
11779 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
11780 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw);
11781 int tmp_base, tmp_offset;
11782 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11783 BOOL use_tmp;
11784 #endif
11785 
11786 PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL);
11787 
11788 early_fail_type = (early_fail_ptr & 0x7);
11789 early_fail_ptr >>= 3;
11790 
11791 /* During recursion, these optimizations are disabled. */
11792 if (common->early_fail_start_ptr == 0 && common->fast_forward_bc_ptr == NULL)
11793   {
11794   early_fail_ptr = 0;
11795   early_fail_type = type_skip;
11796   }
11797 
11798 SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || early_fail_ptr == 0
11799   || (early_fail_ptr >= common->early_fail_start_ptr && early_fail_ptr <= common->early_fail_end_ptr));
11800 
11801 if (early_fail_type == type_fail)
11802   add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr));
11803 
11804 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
11805 
11806 if (type != OP_EXTUNI)
11807   {
11808   tmp_base = TMP3;
11809   tmp_offset = 0;
11810   }
11811 else
11812   {
11813   tmp_base = SLJIT_MEM1(SLJIT_SP);
11814   tmp_offset = POSSESSIVE0;
11815   }
11816 
11817 /* Handle fixed part first. */
11818 if (exact > 1)
11819   {
11820   SLJIT_ASSERT(early_fail_ptr == 0);
11821 
11822   if (common->mode == PCRE2_JIT_COMPLETE
11823 #ifdef SUPPORT_UNICODE
11824       && !common->utf
11825 #endif
11826       && type != OP_ANYNL && type != OP_EXTUNI)
11827     {
11828     OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
11829     add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
11830     OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11831     label = LABEL();
11832     compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, FALSE);
11833     OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11834     JUMPTO(SLJIT_NOT_ZERO, label);
11835     }
11836   else
11837     {
11838     OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11839     label = LABEL();
11840     compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);
11841     OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11842     JUMPTO(SLJIT_NOT_ZERO, label);
11843     }
11844   }
11845 else if (exact == 1)
11846   compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);
11847 
11848 if (early_fail_type == type_fail_range)
11849   {
11850   /* Range end first, followed by range start. */
11851   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr);
11852   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw));
11853   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0);
11854   OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0);
11855   add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0));
11856 
11857   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11858   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw), STR_PTR, 0);
11859   }
11860 
11861 switch(opcode)
11862   {
11863   case OP_STAR:
11864   case OP_UPTO:
11865   SLJIT_ASSERT(early_fail_ptr == 0 || opcode == OP_STAR);
11866 
11867   if (type == OP_ANYNL || type == OP_EXTUNI)
11868     {
11869     SLJIT_ASSERT(private_data_ptr == 0);
11870     SLJIT_ASSERT(early_fail_ptr == 0);
11871 
11872     allocate_stack(common, 2);
11873     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11874     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
11875 
11876     if (opcode == OP_UPTO)
11877       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max);
11878 
11879     label = LABEL();
11880     compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
11881     if (opcode == OP_UPTO)
11882       {
11883       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
11884       OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
11885       jump = JUMP(SLJIT_ZERO);
11886       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
11887       }
11888 
11889     /* We cannot use TMP3 because of allocate_stack. */
11890     allocate_stack(common, 1);
11891     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11892     JUMPTO(SLJIT_JUMP, label);
11893     if (jump != NULL)
11894       JUMPHERE(jump);
11895     BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11896     break;
11897     }
11898 #ifdef SUPPORT_UNICODE
11899   else if (type == OP_ALLANY && !common->invalid_utf)
11900 #else
11901   else if (type == OP_ALLANY)
11902 #endif
11903     {
11904     if (opcode == OP_STAR)
11905       {
11906       if (private_data_ptr == 0)
11907         allocate_stack(common, 2);
11908 
11909       OP1(SLJIT_MOV, base, offset0, STR_END, 0);
11910       OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11911 
11912       OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
11913       process_partial_match(common);
11914 
11915       if (early_fail_ptr != 0)
11916         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
11917       BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11918       break;
11919       }
11920 #ifdef SUPPORT_UNICODE
11921     else if (!common->utf)
11922 #else
11923     else
11924 #endif
11925       {
11926       if (private_data_ptr == 0)
11927         allocate_stack(common, 2);
11928 
11929       OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11930       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
11931 
11932       if (common->mode == PCRE2_JIT_COMPLETE)
11933         {
11934         OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
11935         SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
11936         }
11937       else
11938         {
11939         jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
11940         process_partial_match(common);
11941         JUMPHERE(jump);
11942         }
11943 
11944       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11945 
11946       if (early_fail_ptr != 0)
11947         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11948       BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11949       break;
11950       }
11951     }
11952 
11953   charpos_enabled = FALSE;
11954   charpos_char = 0;
11955   charpos_othercasebit = 0;
11956 
11957   if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
11958     {
11959 #ifdef SUPPORT_UNICODE
11960     charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
11961 #else
11962     charpos_enabled = TRUE;
11963 #endif
11964     if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
11965       {
11966       charpos_othercasebit = char_get_othercase_bit(common, end + 1);
11967       if (charpos_othercasebit == 0)
11968         charpos_enabled = FALSE;
11969       }
11970 
11971     if (charpos_enabled)
11972       {
11973       charpos_char = end[1];
11974       /* Consume the OP_CHAR opcode. */
11975       end += 2;
11976 #if PCRE2_CODE_UNIT_WIDTH == 8
11977       SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
11978 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
11979       SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
11980       if ((charpos_othercasebit & 0x100) != 0)
11981         charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
11982 #endif
11983       if (charpos_othercasebit != 0)
11984         charpos_char |= charpos_othercasebit;
11985 
11986       BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE;
11987       BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char;
11988       BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit;
11989       }
11990     }
11991 
11992   if (charpos_enabled)
11993     {
11994     if (opcode == OP_UPTO)
11995       OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1);
11996 
11997     /* Search the first instance of charpos_char. */
11998     jump = JUMP(SLJIT_JUMP);
11999     label = LABEL();
12000     if (opcode == OP_UPTO)
12001       {
12002       OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12003       add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_ZERO));
12004       }
12005     compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, FALSE);
12006     if (early_fail_ptr != 0)
12007       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12008     JUMPHERE(jump);
12009 
12010     detect_partial_match(common, &backtrack->own_backtracks);
12011     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
12012     if (charpos_othercasebit != 0)
12013       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
12014     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
12015 
12016     if (private_data_ptr == 0)
12017       allocate_stack(common, 2);
12018     OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12019     OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
12020 
12021     if (opcode == OP_UPTO)
12022       {
12023       OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12024       add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
12025       }
12026 
12027     /* Search the last instance of charpos_char. */
12028     label = LABEL();
12029     compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
12030     if (early_fail_ptr != 0)
12031       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12032     detect_partial_match(common, &no_match);
12033     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
12034     if (charpos_othercasebit != 0)
12035       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
12036 
12037     if (opcode == OP_STAR)
12038       {
12039       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
12040       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12041       JUMPTO(SLJIT_JUMP, label);
12042       }
12043     else
12044       {
12045       jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char);
12046       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12047       JUMPHERE(jump);
12048       OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12049       JUMPTO(SLJIT_NOT_ZERO, label);
12050       }
12051 
12052     set_jumps(no_match, LABEL());
12053     OP2(SLJIT_ADD, STR_PTR, 0, base, offset0, SLJIT_IMM, IN_UCHARS(1));
12054     OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12055     }
12056   else
12057     {
12058     if (private_data_ptr == 0)
12059       allocate_stack(common, 2);
12060 
12061     OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
12062 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
12063     use_tmp = (!HAS_VIRTUAL_REGISTERS && opcode == OP_STAR);
12064     SLJIT_ASSERT(!use_tmp || tmp_base == TMP3);
12065 
12066     if (common->utf)
12067       OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
12068 #endif
12069     if (opcode == OP_UPTO)
12070       OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
12071 
12072     detect_partial_match(common, &no_match);
12073     label = LABEL();
12074     compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
12075 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
12076     if (common->utf)
12077       OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
12078 #endif
12079 
12080     if (opcode == OP_UPTO)
12081       {
12082       OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12083       add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
12084       }
12085 
12086     detect_partial_match_to(common, label);
12087     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12088 
12089     set_jumps(no_char1_match, LABEL());
12090 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
12091     if (common->utf)
12092       {
12093       set_jumps(no_match, LABEL());
12094       if (use_tmp)
12095         {
12096         OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
12097         OP1(SLJIT_MOV, base, offset0, TMP3, 0);
12098         }
12099       else
12100         OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12101       }
12102     else
12103 #endif
12104       {
12105       OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12106       set_jumps(no_match, LABEL());
12107       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12108       }
12109 
12110     if (early_fail_ptr != 0)
12111       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12112     }
12113 
12114   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
12115   break;
12116 
12117   case OP_MINSTAR:
12118   if (private_data_ptr == 0)
12119     allocate_stack(common, 1);
12120   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12121   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
12122   if (early_fail_ptr != 0)
12123     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12124   break;
12125 
12126   case OP_MINUPTO:
12127   SLJIT_ASSERT(early_fail_ptr == 0);
12128   if (private_data_ptr == 0)
12129     allocate_stack(common, 2);
12130   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12131   OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
12132   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
12133   break;
12134 
12135   case OP_QUERY:
12136   case OP_MINQUERY:
12137   SLJIT_ASSERT(early_fail_ptr == 0);
12138   if (private_data_ptr == 0)
12139     allocate_stack(common, 1);
12140   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12141   if (opcode == OP_QUERY)
12142     compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
12143   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
12144   break;
12145 
12146   case OP_EXACT:
12147   break;
12148 
12149   case OP_POSSTAR:
12150 #if defined SUPPORT_UNICODE
12151   if (type == OP_ALLANY && !common->invalid_utf)
12152 #else
12153   if (type == OP_ALLANY)
12154 #endif
12155     {
12156     OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
12157     process_partial_match(common);
12158     if (early_fail_ptr != 0)
12159       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
12160     break;
12161     }
12162 
12163 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
12164   if (type == OP_EXTUNI || common->utf)
12165     {
12166     OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
12167     detect_partial_match(common, &no_match);
12168     label = LABEL();
12169     compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
12170     OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
12171     detect_partial_match_to(common, label);
12172 
12173     set_jumps(no_match, LABEL());
12174     OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
12175     if (early_fail_ptr != 0)
12176       {
12177       if (!HAS_VIRTUAL_REGISTERS && tmp_base == TMP3)
12178         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, TMP3, 0);
12179       else
12180         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12181       }
12182     break;
12183     }
12184 #endif
12185 
12186   detect_partial_match(common, &no_match);
12187   label = LABEL();
12188   compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
12189   detect_partial_match_to(common, label);
12190   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12191 
12192   set_jumps(no_char1_match, LABEL());
12193   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12194   set_jumps(no_match, LABEL());
12195   if (early_fail_ptr != 0)
12196     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12197   break;
12198 
12199   case OP_POSUPTO:
12200   SLJIT_ASSERT(early_fail_ptr == 0);
12201 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
12202   if (common->utf)
12203     {
12204     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
12205     OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
12206 
12207     detect_partial_match(common, &no_match);
12208     label = LABEL();
12209     compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
12210     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
12211     OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12212     add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
12213     detect_partial_match_to(common, label);
12214 
12215     set_jumps(no_match, LABEL());
12216     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
12217     break;
12218     }
12219 #endif
12220 
12221   if (type == OP_ALLANY)
12222     {
12223     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
12224 
12225     if (common->mode == PCRE2_JIT_COMPLETE)
12226       {
12227       OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
12228       SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
12229       }
12230     else
12231       {
12232       jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
12233       process_partial_match(common);
12234       JUMPHERE(jump);
12235       }
12236     break;
12237     }
12238 
12239   OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
12240 
12241   detect_partial_match(common, &no_match);
12242   label = LABEL();
12243   compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
12244   OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12245   add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
12246   detect_partial_match_to(common, label);
12247   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12248 
12249   set_jumps(no_char1_match, LABEL());
12250   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12251   set_jumps(no_match, LABEL());
12252   break;
12253 
12254   case OP_POSQUERY:
12255   SLJIT_ASSERT(early_fail_ptr == 0);
12256   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
12257   compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
12258   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
12259   set_jumps(no_match, LABEL());
12260   OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
12261   break;
12262 
12263   default:
12264   SLJIT_UNREACHABLE();
12265   break;
12266   }
12267 
12268 count_match(common);
12269 return end;
12270 }
12271 
compile_fail_accept_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)12272 static SLJIT_INLINE PCRE2_SPTR compile_fail_accept_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
12273 {
12274 DEFINE_COMPILER;
12275 backtrack_common *backtrack;
12276 
12277 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
12278 
12279 if (*cc == OP_FAIL)
12280   {
12281   add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_JUMP));
12282   return cc + 1;
12283   }
12284 
12285 if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0)
12286   add_jump(compiler, &common->restart_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
12287 
12288 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
12289   {
12290   /* No need to check notempty conditions. */
12291   if (common->accept_label == NULL)
12292     add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
12293   else
12294     JUMPTO(SLJIT_JUMP, common->accept_label);
12295   return cc + 1;
12296   }
12297 
12298 if (common->accept_label == NULL)
12299   add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
12300 else
12301   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
12302 
12303 if (HAS_VIRTUAL_REGISTERS)
12304   {
12305   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12306   OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
12307   }
12308 else
12309   OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options));
12310 
12311 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
12312 add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_NOT_ZERO));
12313 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
12314 if (common->accept_label == NULL)
12315   add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO));
12316 else
12317   JUMPTO(SLJIT_ZERO, common->accept_label);
12318 
12319 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
12320 if (common->accept_label == NULL)
12321   add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
12322 else
12323   CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
12324 add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_JUMP));
12325 return cc + 1;
12326 }
12327 
compile_close_matchingpath(compiler_common * common,PCRE2_SPTR cc)12328 static SLJIT_INLINE PCRE2_SPTR compile_close_matchingpath(compiler_common *common, PCRE2_SPTR cc)
12329 {
12330 DEFINE_COMPILER;
12331 int offset = GET2(cc, 1);
12332 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
12333 
12334 /* Data will be discarded anyway... */
12335 if (common->currententry != NULL)
12336   return cc + 1 + IMM2_SIZE;
12337 
12338 if (!optimized_cbracket)
12339   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
12340 offset <<= 1;
12341 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
12342 if (!optimized_cbracket)
12343   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12344 return cc + 1 + IMM2_SIZE;
12345 }
12346 
compile_control_verb_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)12347 static SLJIT_INLINE PCRE2_SPTR compile_control_verb_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
12348 {
12349 DEFINE_COMPILER;
12350 backtrack_common *backtrack;
12351 PCRE2_UCHAR opcode = *cc;
12352 PCRE2_SPTR ccend = cc + 1;
12353 
12354 if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG ||
12355     opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
12356   ccend += 2 + cc[1];
12357 
12358 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
12359 
12360 if (opcode == OP_SKIP)
12361   {
12362   allocate_stack(common, 1);
12363   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
12364   return ccend;
12365   }
12366 
12367 if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
12368   {
12369   if (HAS_VIRTUAL_REGISTERS)
12370     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12371   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
12372   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
12373   OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
12374   }
12375 
12376 return ccend;
12377 }
12378 
12379 static PCRE2_UCHAR then_trap_opcode[1] = { OP_THEN_TRAP };
12380 
compile_then_trap_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,backtrack_common * parent)12381 static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
12382 {
12383 DEFINE_COMPILER;
12384 backtrack_common *backtrack;
12385 BOOL needs_control_head;
12386 int size;
12387 
12388 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
12389 common->then_trap = BACKTRACK_AS(then_trap_backtrack);
12390 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
12391 BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
12392 BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
12393 
12394 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
12395 size = 3 + (size < 0 ? 0 : size);
12396 
12397 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12398 allocate_stack(common, size);
12399 if (size > 3)
12400   OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
12401 else
12402   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
12403 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
12404 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
12405 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
12406 
12407 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
12408 if (size >= 0)
12409   init_frame(common, cc, ccend, size - 1, 0);
12410 }
12411 
compile_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,backtrack_common * parent)12412 static void compile_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
12413 {
12414 DEFINE_COMPILER;
12415 backtrack_common *backtrack;
12416 BOOL has_then_trap = FALSE;
12417 then_trap_backtrack *save_then_trap = NULL;
12418 
12419 SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
12420 
12421 if (common->has_then && common->then_offsets[cc - common->start] != 0)
12422   {
12423   SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
12424   has_then_trap = TRUE;
12425   save_then_trap = common->then_trap;
12426   /* Tail item on backtrack. */
12427   compile_then_trap_matchingpath(common, cc, ccend, parent);
12428   }
12429 
12430 while (cc < ccend)
12431   {
12432   switch(*cc)
12433     {
12434     case OP_SOD:
12435     case OP_SOM:
12436     case OP_NOT_WORD_BOUNDARY:
12437     case OP_WORD_BOUNDARY:
12438     case OP_EODN:
12439     case OP_EOD:
12440     case OP_DOLL:
12441     case OP_DOLLM:
12442     case OP_CIRC:
12443     case OP_CIRCM:
12444     case OP_NOT_UCP_WORD_BOUNDARY:
12445     case OP_UCP_WORD_BOUNDARY:
12446     cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
12447     break;
12448 
12449     case OP_NOT_DIGIT:
12450     case OP_DIGIT:
12451     case OP_NOT_WHITESPACE:
12452     case OP_WHITESPACE:
12453     case OP_NOT_WORDCHAR:
12454     case OP_WORDCHAR:
12455     case OP_ANY:
12456     case OP_ALLANY:
12457     case OP_ANYBYTE:
12458     case OP_NOTPROP:
12459     case OP_PROP:
12460     case OP_ANYNL:
12461     case OP_NOT_HSPACE:
12462     case OP_HSPACE:
12463     case OP_NOT_VSPACE:
12464     case OP_VSPACE:
12465     case OP_EXTUNI:
12466     case OP_NOT:
12467     case OP_NOTI:
12468     cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
12469     break;
12470 
12471     case OP_SET_SOM:
12472     PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
12473     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
12474     allocate_stack(common, 1);
12475     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
12476     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
12477     cc++;
12478     break;
12479 
12480     case OP_CHAR:
12481     case OP_CHARI:
12482     if (common->mode == PCRE2_JIT_COMPLETE)
12483       cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
12484     else
12485       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
12486     break;
12487 
12488     case OP_STAR:
12489     case OP_MINSTAR:
12490     case OP_PLUS:
12491     case OP_MINPLUS:
12492     case OP_QUERY:
12493     case OP_MINQUERY:
12494     case OP_UPTO:
12495     case OP_MINUPTO:
12496     case OP_EXACT:
12497     case OP_POSSTAR:
12498     case OP_POSPLUS:
12499     case OP_POSQUERY:
12500     case OP_POSUPTO:
12501     case OP_STARI:
12502     case OP_MINSTARI:
12503     case OP_PLUSI:
12504     case OP_MINPLUSI:
12505     case OP_QUERYI:
12506     case OP_MINQUERYI:
12507     case OP_UPTOI:
12508     case OP_MINUPTOI:
12509     case OP_EXACTI:
12510     case OP_POSSTARI:
12511     case OP_POSPLUSI:
12512     case OP_POSQUERYI:
12513     case OP_POSUPTOI:
12514     case OP_NOTSTAR:
12515     case OP_NOTMINSTAR:
12516     case OP_NOTPLUS:
12517     case OP_NOTMINPLUS:
12518     case OP_NOTQUERY:
12519     case OP_NOTMINQUERY:
12520     case OP_NOTUPTO:
12521     case OP_NOTMINUPTO:
12522     case OP_NOTEXACT:
12523     case OP_NOTPOSSTAR:
12524     case OP_NOTPOSPLUS:
12525     case OP_NOTPOSQUERY:
12526     case OP_NOTPOSUPTO:
12527     case OP_NOTSTARI:
12528     case OP_NOTMINSTARI:
12529     case OP_NOTPLUSI:
12530     case OP_NOTMINPLUSI:
12531     case OP_NOTQUERYI:
12532     case OP_NOTMINQUERYI:
12533     case OP_NOTUPTOI:
12534     case OP_NOTMINUPTOI:
12535     case OP_NOTEXACTI:
12536     case OP_NOTPOSSTARI:
12537     case OP_NOTPOSPLUSI:
12538     case OP_NOTPOSQUERYI:
12539     case OP_NOTPOSUPTOI:
12540     case OP_TYPESTAR:
12541     case OP_TYPEMINSTAR:
12542     case OP_TYPEPLUS:
12543     case OP_TYPEMINPLUS:
12544     case OP_TYPEQUERY:
12545     case OP_TYPEMINQUERY:
12546     case OP_TYPEUPTO:
12547     case OP_TYPEMINUPTO:
12548     case OP_TYPEEXACT:
12549     case OP_TYPEPOSSTAR:
12550     case OP_TYPEPOSPLUS:
12551     case OP_TYPEPOSQUERY:
12552     case OP_TYPEPOSUPTO:
12553     cc = compile_iterator_matchingpath(common, cc, parent);
12554     break;
12555 
12556     case OP_CLASS:
12557     case OP_NCLASS:
12558     if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE)
12559       cc = compile_iterator_matchingpath(common, cc, parent);
12560     else
12561       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
12562     break;
12563 
12564 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
12565     case OP_XCLASS:
12566     if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
12567       cc = compile_iterator_matchingpath(common, cc, parent);
12568     else
12569       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
12570     break;
12571 #endif
12572 
12573     case OP_REF:
12574     case OP_REFI:
12575     if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
12576       cc = compile_ref_iterator_matchingpath(common, cc, parent);
12577     else
12578       {
12579       compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE, FALSE);
12580       cc += 1 + IMM2_SIZE;
12581       }
12582     break;
12583 
12584     case OP_DNREF:
12585     case OP_DNREFI:
12586     if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
12587       cc = compile_ref_iterator_matchingpath(common, cc, parent);
12588     else
12589       {
12590       compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
12591       compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE, FALSE);
12592       cc += 1 + 2 * IMM2_SIZE;
12593       }
12594     break;
12595 
12596     case OP_RECURSE:
12597     cc = compile_recurse_matchingpath(common, cc, parent);
12598     break;
12599 
12600     case OP_CALLOUT:
12601     case OP_CALLOUT_STR:
12602     cc = compile_callout_matchingpath(common, cc, parent);
12603     break;
12604 
12605     case OP_ASSERT:
12606     case OP_ASSERT_NOT:
12607     case OP_ASSERTBACK:
12608     case OP_ASSERTBACK_NOT:
12609     PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12610     cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12611     break;
12612 
12613     case OP_BRAMINZERO:
12614     PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
12615     cc = bracketend(cc + 1);
12616     if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
12617       {
12618       allocate_stack(common, 1);
12619       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
12620       }
12621     else
12622       {
12623       allocate_stack(common, 2);
12624       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12625       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
12626       }
12627     BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
12628     count_match(common);
12629     break;
12630 
12631     case OP_ASSERT_NA:
12632     case OP_ASSERTBACK_NA:
12633     case OP_ONCE:
12634     case OP_SCRIPT_RUN:
12635     case OP_BRA:
12636     case OP_CBRA:
12637     case OP_COND:
12638     case OP_SBRA:
12639     case OP_SCBRA:
12640     case OP_SCOND:
12641     cc = compile_bracket_matchingpath(common, cc, parent);
12642     break;
12643 
12644     case OP_BRAZERO:
12645     if (cc[1] > OP_ASSERTBACK_NOT)
12646       cc = compile_bracket_matchingpath(common, cc, parent);
12647     else
12648       {
12649       PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12650       cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12651       }
12652     break;
12653 
12654     case OP_BRAPOS:
12655     case OP_CBRAPOS:
12656     case OP_SBRAPOS:
12657     case OP_SCBRAPOS:
12658     case OP_BRAPOSZERO:
12659     cc = compile_bracketpos_matchingpath(common, cc, parent);
12660     break;
12661 
12662     case OP_MARK:
12663     PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
12664     SLJIT_ASSERT(common->mark_ptr != 0);
12665     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
12666     allocate_stack(common, common->has_skip_arg ? 5 : 1);
12667     if (HAS_VIRTUAL_REGISTERS)
12668       OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12669     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
12670     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
12671     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
12672     OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
12673     if (common->has_skip_arg)
12674       {
12675       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12676       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
12677       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
12678       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
12679       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
12680       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
12681       }
12682     cc += 1 + 2 + cc[1];
12683     break;
12684 
12685     case OP_PRUNE:
12686     case OP_PRUNE_ARG:
12687     case OP_SKIP:
12688     case OP_SKIP_ARG:
12689     case OP_THEN:
12690     case OP_THEN_ARG:
12691     case OP_COMMIT:
12692     case OP_COMMIT_ARG:
12693     cc = compile_control_verb_matchingpath(common, cc, parent);
12694     break;
12695 
12696     case OP_FAIL:
12697     case OP_ACCEPT:
12698     case OP_ASSERT_ACCEPT:
12699     cc = compile_fail_accept_matchingpath(common, cc, parent);
12700     break;
12701 
12702     case OP_CLOSE:
12703     cc = compile_close_matchingpath(common, cc);
12704     break;
12705 
12706     case OP_SKIPZERO:
12707     cc = bracketend(cc + 1);
12708     break;
12709 
12710     default:
12711     SLJIT_UNREACHABLE();
12712     return;
12713     }
12714   if (cc == NULL)
12715     return;
12716   }
12717 
12718 if (has_then_trap)
12719   {
12720   /* Head item on backtrack. */
12721   PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
12722   BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
12723   BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
12724   common->then_trap = save_then_trap;
12725   }
12726 SLJIT_ASSERT(cc == ccend);
12727 }
12728 
12729 #undef PUSH_BACKTRACK
12730 #undef PUSH_BACKTRACK_NOVALUE
12731 #undef BACKTRACK_AS
12732 
12733 #define COMPILE_BACKTRACKINGPATH(current) \
12734   do \
12735     { \
12736     compile_backtrackingpath(common, (current)); \
12737     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
12738       return; \
12739     } \
12740   while (0)
12741 
12742 #define CURRENT_AS(type) ((type *)current)
12743 
compile_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)12744 static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12745 {
12746 DEFINE_COMPILER;
12747 PCRE2_SPTR cc = current->cc;
12748 PCRE2_UCHAR opcode;
12749 PCRE2_UCHAR type;
12750 sljit_u32 max = 0, exact;
12751 struct sljit_label *label = NULL;
12752 struct sljit_jump *jump = NULL;
12753 jump_list *jumplist = NULL;
12754 PCRE2_SPTR end;
12755 int private_data_ptr = PRIVATE_DATA(cc);
12756 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
12757 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
12758 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw);
12759 
12760 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
12761 
12762 switch(opcode)
12763   {
12764   case OP_STAR:
12765   case OP_UPTO:
12766   if (type == OP_ANYNL || type == OP_EXTUNI)
12767     {
12768     SLJIT_ASSERT(private_data_ptr == 0);
12769     set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12770     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12771     free_stack(common, 1);
12772     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12773     }
12774   else
12775     {
12776     if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled)
12777       {
12778       OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12779       OP1(SLJIT_MOV, TMP2, 0, base, offset1);
12780       OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12781 
12782       jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
12783       label = LABEL();
12784       OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
12785       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12786       if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0)
12787         OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit);
12788       CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12789       move_back(common, NULL, TRUE);
12790       CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label);
12791       }
12792     else
12793       {
12794       OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12795       jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
12796       move_back(common, NULL, TRUE);
12797       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12798       JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12799       }
12800     JUMPHERE(jump);
12801     if (private_data_ptr == 0)
12802       free_stack(common, 2);
12803     }
12804   break;
12805 
12806   case OP_MINSTAR:
12807   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12808   compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12809   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12810   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12811   set_jumps(jumplist, LABEL());
12812   if (private_data_ptr == 0)
12813     free_stack(common, 1);
12814   break;
12815 
12816   case OP_MINUPTO:
12817   OP1(SLJIT_MOV, TMP1, 0, base, offset1);
12818   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12819   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
12820   add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
12821 
12822   OP1(SLJIT_MOV, base, offset1, TMP1, 0);
12823   compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12824   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12825   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12826 
12827   set_jumps(jumplist, LABEL());
12828   if (private_data_ptr == 0)
12829     free_stack(common, 2);
12830   break;
12831 
12832   case OP_QUERY:
12833   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12834   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12835   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12836   jump = JUMP(SLJIT_JUMP);
12837   set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12838   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12839   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12840   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12841   JUMPHERE(jump);
12842   if (private_data_ptr == 0)
12843     free_stack(common, 1);
12844   break;
12845 
12846   case OP_MINQUERY:
12847   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12848   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12849   jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12850   compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12851   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12852   set_jumps(jumplist, LABEL());
12853   JUMPHERE(jump);
12854   if (private_data_ptr == 0)
12855     free_stack(common, 1);
12856   break;
12857 
12858   case OP_EXACT:
12859   case OP_POSSTAR:
12860   case OP_POSQUERY:
12861   case OP_POSUPTO:
12862   break;
12863 
12864   default:
12865   SLJIT_UNREACHABLE();
12866   break;
12867   }
12868 
12869 set_jumps(current->own_backtracks, LABEL());
12870 }
12871 
compile_ref_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)12872 static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12873 {
12874 DEFINE_COMPILER;
12875 PCRE2_SPTR cc = current->cc;
12876 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
12877 PCRE2_UCHAR type;
12878 
12879 type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
12880 
12881 if ((type & 0x1) == 0)
12882   {
12883   /* Maximize case. */
12884   set_jumps(current->own_backtracks, LABEL());
12885   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12886   free_stack(common, 1);
12887   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12888   return;
12889   }
12890 
12891 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12892 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12893 set_jumps(current->own_backtracks, LABEL());
12894 free_stack(common, ref ? 2 : 3);
12895 }
12896 
compile_recurse_backtrackingpath(compiler_common * common,struct backtrack_common * current)12897 static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12898 {
12899 DEFINE_COMPILER;
12900 recurse_entry *entry;
12901 
12902 if (!CURRENT_AS(recurse_backtrack)->inlined_pattern)
12903   {
12904   entry = CURRENT_AS(recurse_backtrack)->entry;
12905   if (entry->backtrack_label == NULL)
12906     add_jump(compiler, &entry->backtrack_calls, JUMP(SLJIT_FAST_CALL));
12907   else
12908     JUMPTO(SLJIT_FAST_CALL, entry->backtrack_label);
12909   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(recurse_backtrack)->matchingpath);
12910   }
12911 else
12912   compile_backtrackingpath(common, current->top);
12913 
12914 set_jumps(current->own_backtracks, LABEL());
12915 }
12916 
compile_assert_backtrackingpath(compiler_common * common,struct backtrack_common * current)12917 static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12918 {
12919 DEFINE_COMPILER;
12920 PCRE2_SPTR cc = current->cc;
12921 PCRE2_UCHAR bra = OP_BRA;
12922 struct sljit_jump *brajump = NULL;
12923 
12924 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
12925 if (*cc == OP_BRAZERO)
12926   {
12927   bra = *cc;
12928   cc++;
12929   }
12930 
12931 if (bra == OP_BRAZERO)
12932   {
12933   SLJIT_ASSERT(current->own_backtracks == NULL);
12934   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12935   }
12936 
12937 if (CURRENT_AS(assert_backtrack)->framesize < 0)
12938   {
12939   set_jumps(current->own_backtracks, LABEL());
12940 
12941   if (bra == OP_BRAZERO)
12942     {
12943     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12944     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12945     free_stack(common, 1);
12946     }
12947   return;
12948   }
12949 
12950 if (bra == OP_BRAZERO)
12951   {
12952   if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
12953     {
12954     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12955     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12956     free_stack(common, 1);
12957     return;
12958     }
12959   free_stack(common, 1);
12960   brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12961   }
12962 
12963 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
12964   {
12965   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
12966   add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12967   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12968   OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(assert_backtrack)->framesize - 1) * sizeof(sljit_sw));
12969   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, TMP1, 0);
12970 
12971   set_jumps(current->own_backtracks, LABEL());
12972   }
12973 else
12974   set_jumps(current->own_backtracks, LABEL());
12975 
12976 if (bra == OP_BRAZERO)
12977   {
12978   /* We know there is enough place on the stack. */
12979   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
12980   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12981   JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
12982   JUMPHERE(brajump);
12983   }
12984 }
12985 
compile_bracket_backtrackingpath(compiler_common * common,struct backtrack_common * current)12986 static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12987 {
12988 DEFINE_COMPILER;
12989 int opcode, stacksize, alt_count, alt_max;
12990 int offset = 0;
12991 int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
12992 int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
12993 PCRE2_SPTR cc = current->cc;
12994 PCRE2_SPTR ccbegin;
12995 PCRE2_SPTR ccprev;
12996 PCRE2_UCHAR bra = OP_BRA;
12997 PCRE2_UCHAR ket;
12998 assert_backtrack *assert;
12999 BOOL has_alternatives;
13000 BOOL needs_control_head = FALSE;
13001 BOOL has_vreverse;
13002 struct sljit_jump *brazero = NULL;
13003 struct sljit_jump *next_alt = NULL;
13004 struct sljit_jump *once = NULL;
13005 struct sljit_jump *cond = NULL;
13006 struct sljit_label *rmin_label = NULL;
13007 struct sljit_label *exact_label = NULL;
13008 struct sljit_put_label *put_label = NULL;
13009 
13010 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
13011   {
13012   bra = *cc;
13013   cc++;
13014   }
13015 
13016 opcode = *cc;
13017 ccbegin = bracketend(cc) - 1 - LINK_SIZE;
13018 ket = *ccbegin;
13019 if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
13020   {
13021   repeat_ptr = PRIVATE_DATA(ccbegin);
13022   repeat_type = PRIVATE_DATA(ccbegin + 2);
13023   repeat_count = PRIVATE_DATA(ccbegin + 3);
13024   SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
13025   if (repeat_type == OP_UPTO)
13026     ket = OP_KETRMAX;
13027   if (repeat_type == OP_MINUPTO)
13028     ket = OP_KETRMIN;
13029   }
13030 ccbegin = cc;
13031 cc += GET(cc, 1);
13032 has_alternatives = *cc == OP_ALT;
13033 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
13034   has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
13035 if (opcode == OP_CBRA || opcode == OP_SCBRA)
13036   offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
13037 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
13038   opcode = OP_SCOND;
13039 
13040 alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
13041 
13042 /* Decoding the needs_control_head in framesize. */
13043 if (opcode == OP_ONCE)
13044   {
13045   needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
13046   CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
13047   }
13048 
13049 if (ket != OP_KET && repeat_type != 0)
13050   {
13051   /* TMP1 is used in OP_KETRMIN below. */
13052   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13053   free_stack(common, 1);
13054   if (repeat_type == OP_UPTO)
13055     OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
13056   else
13057     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
13058   }
13059 
13060 if (ket == OP_KETRMAX)
13061   {
13062   if (bra == OP_BRAZERO)
13063     {
13064     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13065     free_stack(common, 1);
13066     brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13067     }
13068   }
13069 else if (ket == OP_KETRMIN)
13070   {
13071   if (bra != OP_BRAMINZERO)
13072     {
13073     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13074     if (repeat_type != 0)
13075       {
13076       /* TMP1 was set a few lines above. */
13077       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
13078       /* Drop STR_PTR for non-greedy plus quantifier. */
13079       if (opcode != OP_ONCE)
13080         free_stack(common, 1);
13081       }
13082     else if (opcode >= OP_SBRA || opcode == OP_ONCE)
13083       {
13084       /* Checking zero-length iteration. */
13085       if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
13086         CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
13087       else
13088         {
13089         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
13090         CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
13091         }
13092       /* Drop STR_PTR for non-greedy plus quantifier. */
13093       if (opcode != OP_ONCE)
13094         free_stack(common, 1);
13095       }
13096     else
13097       JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
13098     }
13099   rmin_label = LABEL();
13100   if (repeat_type != 0)
13101     OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
13102   }
13103 else if (bra == OP_BRAZERO)
13104   {
13105   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13106   free_stack(common, 1);
13107   brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13108   }
13109 else if (repeat_type == OP_EXACT)
13110   {
13111   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
13112   exact_label = LABEL();
13113   }
13114 
13115 if (offset != 0)
13116   {
13117   if (common->capture_last_ptr != 0)
13118     {
13119     SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
13120     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13121     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13122     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
13123     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
13124     free_stack(common, 3);
13125     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
13126     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
13127     }
13128   else if (common->optimized_cbracket[offset >> 1] == 0)
13129     {
13130     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13131     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13132     free_stack(common, 2);
13133     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
13134     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
13135     }
13136   }
13137 
13138 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
13139   {
13140   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
13141     {
13142     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
13143     add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13144     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
13145     }
13146   once = JUMP(SLJIT_JUMP);
13147   }
13148 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
13149   {
13150   if (has_alternatives)
13151     {
13152     /* Always exactly one alternative. */
13153     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13154     free_stack(common, 1);
13155 
13156     alt_max = 2;
13157     next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13158     }
13159   }
13160 else if (has_alternatives)
13161   {
13162   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13163   free_stack(common, 1);
13164 
13165   if (alt_max > 3)
13166     {
13167     sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
13168 
13169     SLJIT_ASSERT(CURRENT_AS(bracket_backtrack)->u.matching_put_label);
13170     sljit_set_put_label(CURRENT_AS(bracket_backtrack)->u.matching_put_label, LABEL());
13171     sljit_emit_op0(compiler, SLJIT_ENDBR);
13172     }
13173   else
13174     next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13175   }
13176 
13177 COMPILE_BACKTRACKINGPATH(current->top);
13178 if (current->own_backtracks)
13179   set_jumps(current->own_backtracks, LABEL());
13180 
13181 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
13182   {
13183   /* Conditional block always has at most one alternative. */
13184   if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
13185     {
13186     SLJIT_ASSERT(has_alternatives);
13187     assert = CURRENT_AS(bracket_backtrack)->u.assert;
13188     if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
13189       {
13190       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
13191       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13192       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
13193       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
13194       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
13195       }
13196     cond = JUMP(SLJIT_JUMP);
13197     set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
13198     }
13199   else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
13200     {
13201     SLJIT_ASSERT(has_alternatives);
13202     cond = JUMP(SLJIT_JUMP);
13203     set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
13204     }
13205   else
13206     SLJIT_ASSERT(!has_alternatives);
13207   }
13208 
13209 if (has_alternatives)
13210   {
13211   alt_count = 1;
13212   do
13213     {
13214     current->top = NULL;
13215     current->own_backtracks = NULL;
13216     current->simple_backtracks = NULL;
13217     /* Conditional blocks always have an additional alternative, even if it is empty. */
13218     if (*cc == OP_ALT)
13219       {
13220       ccprev = cc + 1 + LINK_SIZE;
13221       cc += GET(cc, 1);
13222 
13223       has_vreverse = FALSE;
13224       if (opcode == OP_ASSERTBACK || opcode == OP_ASSERTBACK_NA)
13225         {
13226         SLJIT_ASSERT(private_data_ptr != 0);
13227         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
13228 
13229         has_vreverse = (*ccprev == OP_VREVERSE);
13230         if (*ccprev == OP_REVERSE || has_vreverse)
13231           ccprev = compile_reverse_matchingpath(common, ccprev, current);
13232         }
13233       else if (opcode != OP_COND && opcode != OP_SCOND)
13234         {
13235         if (opcode != OP_ONCE)
13236           {
13237           if (private_data_ptr != 0)
13238             OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
13239           else
13240             OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13241           }
13242         else
13243           OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
13244         }
13245 
13246       compile_matchingpath(common, ccprev, cc, current);
13247       if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13248         return;
13249 
13250       switch (opcode)
13251         {
13252         case OP_ASSERTBACK_NA:
13253           if (has_vreverse)
13254             {
13255             SLJIT_ASSERT(current->top != NULL && PRIVATE_DATA(ccbegin + 1));
13256             add_jump(compiler, &current->top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
13257             }
13258 
13259           if (PRIVATE_DATA(ccbegin + 1))
13260             OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
13261           break;
13262         case OP_ASSERT_NA:
13263           OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
13264           break;
13265         case OP_SCRIPT_RUN:
13266           match_script_run_common(common, private_data_ptr, current);
13267           break;
13268         }
13269       }
13270 
13271     /* Instructions after the current alternative is successfully matched. */
13272     /* There is a similar code in compile_bracket_matchingpath. */
13273     if (opcode == OP_ONCE)
13274       match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
13275 
13276     stacksize = 0;
13277     if (repeat_type == OP_MINUPTO)
13278       {
13279       /* We need to preserve the counter. TMP2 will be used below. */
13280       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
13281       stacksize++;
13282       }
13283     if (ket != OP_KET || bra != OP_BRA)
13284       stacksize++;
13285     if (offset != 0)
13286       {
13287       if (common->capture_last_ptr != 0)
13288         stacksize++;
13289       if (common->optimized_cbracket[offset >> 1] == 0)
13290         stacksize += 2;
13291       }
13292     if (opcode != OP_ONCE)
13293       stacksize++;
13294 
13295     if (stacksize > 0)
13296       allocate_stack(common, stacksize);
13297 
13298     stacksize = 0;
13299     if (repeat_type == OP_MINUPTO)
13300       {
13301       /* TMP2 was set above. */
13302       OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
13303       stacksize++;
13304       }
13305 
13306     if (ket != OP_KET || bra != OP_BRA)
13307       {
13308       if (ket != OP_KET)
13309         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
13310       else
13311         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
13312       stacksize++;
13313       }
13314 
13315     if (offset != 0)
13316       stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
13317 
13318     if (opcode != OP_ONCE)
13319       {
13320       if (alt_max <= 3)
13321         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
13322       else
13323         put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
13324       }
13325 
13326     if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
13327       {
13328       /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
13329       SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
13330       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
13331       }
13332 
13333     JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
13334 
13335     if (opcode != OP_ONCE)
13336       {
13337       if (alt_max <= 3)
13338         {
13339         JUMPHERE(next_alt);
13340         alt_count++;
13341         if (alt_count < alt_max)
13342           {
13343           SLJIT_ASSERT(alt_count == 2 && alt_max == 3);
13344           next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
13345           }
13346         }
13347       else
13348         {
13349         sljit_set_put_label(put_label, LABEL());
13350         sljit_emit_op0(compiler, SLJIT_ENDBR);
13351         }
13352       }
13353 
13354     COMPILE_BACKTRACKINGPATH(current->top);
13355     if (current->own_backtracks)
13356       set_jumps(current->own_backtracks, LABEL());
13357     SLJIT_ASSERT(!current->simple_backtracks);
13358     }
13359   while (*cc == OP_ALT);
13360 
13361   if (cond != NULL)
13362     {
13363     SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
13364     assert = CURRENT_AS(bracket_backtrack)->u.assert;
13365     if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
13366       {
13367       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
13368       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13369       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
13370       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
13371       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
13372       }
13373     JUMPHERE(cond);
13374     }
13375 
13376   /* Free the STR_PTR. */
13377   if (private_data_ptr == 0)
13378     free_stack(common, 1);
13379   }
13380 
13381 if (offset != 0)
13382   {
13383   /* Using both tmp register is better for instruction scheduling. */
13384   if (common->optimized_cbracket[offset >> 1] != 0)
13385     {
13386     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13387     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13388     free_stack(common, 2);
13389     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
13390     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
13391     }
13392   else
13393     {
13394     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13395     free_stack(common, 1);
13396     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
13397     }
13398   }
13399 else if (opcode == OP_ASSERTBACK_NA && PRIVATE_DATA(ccbegin + 1))
13400   {
13401   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13402   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13403   OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
13404   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
13405   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP2, 0);
13406   free_stack(common, 4);
13407   }
13408 else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
13409   {
13410   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
13411   free_stack(common, 1);
13412   }
13413 else if (opcode == OP_ONCE)
13414   {
13415   cc = ccbegin + GET(ccbegin, 1);
13416   stacksize = needs_control_head ? 1 : 0;
13417 
13418   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
13419     {
13420     /* Reset head and drop saved frame. */
13421     stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
13422     }
13423   else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
13424     {
13425     /* The STR_PTR must be released. */
13426     stacksize++;
13427     }
13428 
13429   if (stacksize > 0)
13430     free_stack(common, stacksize);
13431 
13432   JUMPHERE(once);
13433   /* Restore previous private_data_ptr */
13434   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
13435     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1));
13436   else if (ket == OP_KETRMIN)
13437     {
13438     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13439     /* See the comment below. */
13440     free_stack(common, 2);
13441     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
13442     }
13443   }
13444 
13445 if (repeat_type == OP_EXACT)
13446   {
13447   OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
13448   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
13449   CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
13450   }
13451 else if (ket == OP_KETRMAX)
13452   {
13453   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13454   if (bra != OP_BRAZERO)
13455     free_stack(common, 1);
13456 
13457   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
13458   if (bra == OP_BRAZERO)
13459     {
13460     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13461     JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
13462     JUMPHERE(brazero);
13463     free_stack(common, 1);
13464     }
13465   }
13466 else if (ket == OP_KETRMIN)
13467   {
13468   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13469 
13470   /* OP_ONCE removes everything in case of a backtrack, so we don't
13471   need to explicitly release the STR_PTR. The extra release would
13472   affect badly the free_stack(2) above. */
13473   if (opcode != OP_ONCE)
13474     free_stack(common, 1);
13475   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
13476   if (opcode == OP_ONCE)
13477     free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
13478   else if (bra == OP_BRAMINZERO)
13479     free_stack(common, 1);
13480   }
13481 else if (bra == OP_BRAZERO)
13482   {
13483   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13484   JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
13485   JUMPHERE(brazero);
13486   }
13487 }
13488 
compile_bracketpos_backtrackingpath(compiler_common * common,struct backtrack_common * current)13489 static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13490 {
13491 DEFINE_COMPILER;
13492 int offset;
13493 struct sljit_jump *jump;
13494 PCRE2_SPTR cc;
13495 
13496 /* No retry on backtrack, just drop everything. */
13497 if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
13498   {
13499   cc = current->cc;
13500 
13501   if (*cc == OP_BRAPOSZERO)
13502     cc++;
13503 
13504   if (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS)
13505     {
13506     offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
13507     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13508     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13509     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
13510     if (common->capture_last_ptr != 0)
13511       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
13512     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
13513     if (common->capture_last_ptr != 0)
13514       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
13515     }
13516   set_jumps(current->own_backtracks, LABEL());
13517   free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
13518   return;
13519   }
13520 
13521 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
13522 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13523 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracketpos_backtrack)->framesize - 1) * sizeof(sljit_sw));
13524 
13525 if (current->own_backtracks)
13526   {
13527   jump = JUMP(SLJIT_JUMP);
13528   set_jumps(current->own_backtracks, LABEL());
13529   /* Drop the stack frame. */
13530   free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
13531   JUMPHERE(jump);
13532   }
13533 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1));
13534 }
13535 
compile_braminzero_backtrackingpath(compiler_common * common,struct backtrack_common * current)13536 static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13537 {
13538 assert_backtrack backtrack;
13539 
13540 current->top = NULL;
13541 current->own_backtracks = NULL;
13542 current->simple_backtracks = NULL;
13543 if (current->cc[1] > OP_ASSERTBACK_NOT)
13544   {
13545   /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
13546   compile_bracket_matchingpath(common, current->cc, current);
13547   compile_bracket_backtrackingpath(common, current->top);
13548   }
13549 else
13550   {
13551   memset(&backtrack, 0, sizeof(backtrack));
13552   backtrack.common.cc = current->cc;
13553   backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
13554   /* Manual call of compile_assert_matchingpath. */
13555   compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
13556   }
13557 SLJIT_ASSERT(!current->simple_backtracks && !current->own_backtracks);
13558 }
13559 
compile_control_verb_backtrackingpath(compiler_common * common,struct backtrack_common * current)13560 static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13561 {
13562 DEFINE_COMPILER;
13563 PCRE2_UCHAR opcode = *current->cc;
13564 struct sljit_label *loop;
13565 struct sljit_jump *jump;
13566 
13567 if (opcode == OP_THEN || opcode == OP_THEN_ARG)
13568   {
13569   if (common->then_trap != NULL)
13570     {
13571     SLJIT_ASSERT(common->control_head_ptr != 0);
13572 
13573     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
13574     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
13575     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
13576     jump = JUMP(SLJIT_JUMP);
13577 
13578     loop = LABEL();
13579     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13580     JUMPHERE(jump);
13581     CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop);
13582     CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop);
13583     add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
13584     return;
13585     }
13586   else if (!common->local_quit_available && common->in_positive_assertion)
13587     {
13588     add_jump(compiler, &common->positive_assertion_quit, JUMP(SLJIT_JUMP));
13589     return;
13590     }
13591   }
13592 
13593 if (common->local_quit_available)
13594   {
13595   /* Abort match with a fail. */
13596   if (common->quit_label == NULL)
13597     add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
13598   else
13599     JUMPTO(SLJIT_JUMP, common->quit_label);
13600   return;
13601   }
13602 
13603 if (opcode == OP_SKIP_ARG)
13604   {
13605   SLJIT_ASSERT(common->control_head_ptr != 0 && TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
13606   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
13607   OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
13608   sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_search_mark));
13609 
13610   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0);
13611   add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0));
13612   return;
13613   }
13614 
13615 if (opcode == OP_SKIP)
13616   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13617 else
13618   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
13619 add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
13620 }
13621 
compile_vreverse_backtrackingpath(compiler_common * common,struct backtrack_common * current)13622 static SLJIT_INLINE void compile_vreverse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13623 {
13624 DEFINE_COMPILER;
13625 struct sljit_jump *jump;
13626 struct sljit_label *label;
13627 
13628 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
13629 jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(3));
13630 skip_valid_char(common);
13631 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
13632 JUMPTO(SLJIT_JUMP, CURRENT_AS(vreverse_backtrack)->matchingpath);
13633 
13634 label = LABEL();
13635 sljit_set_label(jump, label);
13636 set_jumps(current->own_backtracks, label);
13637 }
13638 
compile_then_trap_backtrackingpath(compiler_common * common,struct backtrack_common * current)13639 static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13640 {
13641 DEFINE_COMPILER;
13642 struct sljit_jump *jump;
13643 int size;
13644 
13645 if (CURRENT_AS(then_trap_backtrack)->then_trap)
13646   {
13647   common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
13648   return;
13649   }
13650 
13651 size = CURRENT_AS(then_trap_backtrack)->framesize;
13652 size = 3 + (size < 0 ? 0 : size);
13653 
13654 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
13655 free_stack(common, size);
13656 jump = JUMP(SLJIT_JUMP);
13657 
13658 set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
13659 /* STACK_TOP is set by THEN. */
13660 if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
13661   {
13662   add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13663   OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(then_trap_backtrack)->framesize - 1) * sizeof(sljit_sw));
13664   }
13665 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13666 free_stack(common, 3);
13667 
13668 JUMPHERE(jump);
13669 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
13670 }
13671 
compile_backtrackingpath(compiler_common * common,struct backtrack_common * current)13672 static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13673 {
13674 DEFINE_COMPILER;
13675 then_trap_backtrack *save_then_trap = common->then_trap;
13676 
13677 while (current)
13678   {
13679   if (current->simple_backtracks != NULL)
13680     set_jumps(current->simple_backtracks, LABEL());
13681   switch(*current->cc)
13682     {
13683     case OP_SET_SOM:
13684     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13685     free_stack(common, 1);
13686     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
13687     break;
13688 
13689     case OP_STAR:
13690     case OP_MINSTAR:
13691     case OP_PLUS:
13692     case OP_MINPLUS:
13693     case OP_QUERY:
13694     case OP_MINQUERY:
13695     case OP_UPTO:
13696     case OP_MINUPTO:
13697     case OP_EXACT:
13698     case OP_POSSTAR:
13699     case OP_POSPLUS:
13700     case OP_POSQUERY:
13701     case OP_POSUPTO:
13702     case OP_STARI:
13703     case OP_MINSTARI:
13704     case OP_PLUSI:
13705     case OP_MINPLUSI:
13706     case OP_QUERYI:
13707     case OP_MINQUERYI:
13708     case OP_UPTOI:
13709     case OP_MINUPTOI:
13710     case OP_EXACTI:
13711     case OP_POSSTARI:
13712     case OP_POSPLUSI:
13713     case OP_POSQUERYI:
13714     case OP_POSUPTOI:
13715     case OP_NOTSTAR:
13716     case OP_NOTMINSTAR:
13717     case OP_NOTPLUS:
13718     case OP_NOTMINPLUS:
13719     case OP_NOTQUERY:
13720     case OP_NOTMINQUERY:
13721     case OP_NOTUPTO:
13722     case OP_NOTMINUPTO:
13723     case OP_NOTEXACT:
13724     case OP_NOTPOSSTAR:
13725     case OP_NOTPOSPLUS:
13726     case OP_NOTPOSQUERY:
13727     case OP_NOTPOSUPTO:
13728     case OP_NOTSTARI:
13729     case OP_NOTMINSTARI:
13730     case OP_NOTPLUSI:
13731     case OP_NOTMINPLUSI:
13732     case OP_NOTQUERYI:
13733     case OP_NOTMINQUERYI:
13734     case OP_NOTUPTOI:
13735     case OP_NOTMINUPTOI:
13736     case OP_NOTEXACTI:
13737     case OP_NOTPOSSTARI:
13738     case OP_NOTPOSPLUSI:
13739     case OP_NOTPOSQUERYI:
13740     case OP_NOTPOSUPTOI:
13741     case OP_TYPESTAR:
13742     case OP_TYPEMINSTAR:
13743     case OP_TYPEPLUS:
13744     case OP_TYPEMINPLUS:
13745     case OP_TYPEQUERY:
13746     case OP_TYPEMINQUERY:
13747     case OP_TYPEUPTO:
13748     case OP_TYPEMINUPTO:
13749     case OP_TYPEEXACT:
13750     case OP_TYPEPOSSTAR:
13751     case OP_TYPEPOSPLUS:
13752     case OP_TYPEPOSQUERY:
13753     case OP_TYPEPOSUPTO:
13754     case OP_CLASS:
13755     case OP_NCLASS:
13756 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
13757     case OP_XCLASS:
13758 #endif
13759     compile_iterator_backtrackingpath(common, current);
13760     break;
13761 
13762     case OP_REF:
13763     case OP_REFI:
13764     case OP_DNREF:
13765     case OP_DNREFI:
13766     compile_ref_iterator_backtrackingpath(common, current);
13767     break;
13768 
13769     case OP_RECURSE:
13770     compile_recurse_backtrackingpath(common, current);
13771     break;
13772 
13773     case OP_ASSERT:
13774     case OP_ASSERT_NOT:
13775     case OP_ASSERTBACK:
13776     case OP_ASSERTBACK_NOT:
13777     compile_assert_backtrackingpath(common, current);
13778     break;
13779 
13780     case OP_ASSERT_NA:
13781     case OP_ASSERTBACK_NA:
13782     case OP_ONCE:
13783     case OP_SCRIPT_RUN:
13784     case OP_BRA:
13785     case OP_CBRA:
13786     case OP_COND:
13787     case OP_SBRA:
13788     case OP_SCBRA:
13789     case OP_SCOND:
13790     compile_bracket_backtrackingpath(common, current);
13791     break;
13792 
13793     case OP_BRAZERO:
13794     if (current->cc[1] > OP_ASSERTBACK_NOT)
13795       compile_bracket_backtrackingpath(common, current);
13796     else
13797       compile_assert_backtrackingpath(common, current);
13798     break;
13799 
13800     case OP_BRAPOS:
13801     case OP_CBRAPOS:
13802     case OP_SBRAPOS:
13803     case OP_SCBRAPOS:
13804     case OP_BRAPOSZERO:
13805     compile_bracketpos_backtrackingpath(common, current);
13806     break;
13807 
13808     case OP_BRAMINZERO:
13809     compile_braminzero_backtrackingpath(common, current);
13810     break;
13811 
13812     case OP_MARK:
13813     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
13814     if (common->has_skip_arg)
13815       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13816     free_stack(common, common->has_skip_arg ? 5 : 1);
13817     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
13818     if (common->has_skip_arg)
13819       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
13820     break;
13821 
13822     case OP_THEN:
13823     case OP_THEN_ARG:
13824     case OP_PRUNE:
13825     case OP_PRUNE_ARG:
13826     case OP_SKIP:
13827     case OP_SKIP_ARG:
13828     compile_control_verb_backtrackingpath(common, current);
13829     break;
13830 
13831     case OP_COMMIT:
13832     case OP_COMMIT_ARG:
13833     if (!common->local_quit_available)
13834       OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13835     if (common->quit_label == NULL)
13836       add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
13837     else
13838       JUMPTO(SLJIT_JUMP, common->quit_label);
13839     break;
13840 
13841     case OP_CALLOUT:
13842     case OP_CALLOUT_STR:
13843     case OP_FAIL:
13844     case OP_ACCEPT:
13845     case OP_ASSERT_ACCEPT:
13846     set_jumps(current->own_backtracks, LABEL());
13847     break;
13848 
13849     case OP_VREVERSE:
13850     compile_vreverse_backtrackingpath(common, current);
13851     break;
13852 
13853     case OP_THEN_TRAP:
13854     /* A virtual opcode for then traps. */
13855     compile_then_trap_backtrackingpath(common, current);
13856     break;
13857 
13858     default:
13859     SLJIT_UNREACHABLE();
13860     break;
13861     }
13862   current = current->prev;
13863   }
13864 common->then_trap = save_then_trap;
13865 }
13866 
compile_recurse(compiler_common * common)13867 static SLJIT_INLINE void compile_recurse(compiler_common *common)
13868 {
13869 DEFINE_COMPILER;
13870 PCRE2_SPTR cc = common->start + common->currententry->start;
13871 PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
13872 PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE);
13873 uint32_t recurse_flags = 0;
13874 int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &recurse_flags);
13875 int alt_count, alt_max, local_size;
13876 backtrack_common altbacktrack;
13877 jump_list *match = NULL;
13878 struct sljit_jump *next_alt = NULL;
13879 struct sljit_jump *accept_exit = NULL;
13880 struct sljit_label *quit;
13881 struct sljit_put_label *put_label = NULL;
13882 
13883 /* Recurse captures then. */
13884 common->then_trap = NULL;
13885 
13886 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
13887 
13888 alt_max = no_alternatives(cc);
13889 alt_count = 0;
13890 
13891 /* Matching path. */
13892 SLJIT_ASSERT(common->currententry->entry_label == NULL && common->recursive_head_ptr != 0);
13893 common->currententry->entry_label = LABEL();
13894 set_jumps(common->currententry->entry_calls, common->currententry->entry_label);
13895 
13896 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, TMP2, 0);
13897 count_match(common);
13898 
13899 local_size = (alt_max > 1) ? 2 : 1;
13900 
13901 /* (Reversed) stack layout:
13902    [private data][return address][optional: str ptr] ... [optional: alternative index][recursive_head_ptr] */
13903 
13904 allocate_stack(common, private_data_size + local_size);
13905 /* Save return address. */
13906 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0);
13907 
13908 copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, recurse_flags);
13909 
13910 /* This variable is saved and restored all time when we enter or exit from a recursive context. */
13911 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
13912 
13913 if (recurse_flags & recurse_flag_control_head_found)
13914   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13915 
13916 if (alt_max > 1)
13917   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
13918 
13919 memset(&altbacktrack, 0, sizeof(backtrack_common));
13920 common->quit_label = NULL;
13921 common->accept_label = NULL;
13922 common->quit = NULL;
13923 common->accept = NULL;
13924 altbacktrack.cc = ccbegin;
13925 cc += GET(cc, 1);
13926 while (1)
13927   {
13928   altbacktrack.top = NULL;
13929   altbacktrack.own_backtracks = NULL;
13930 
13931   if (altbacktrack.cc != ccbegin)
13932     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13933 
13934   compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
13935   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13936     return;
13937 
13938   allocate_stack(common, (alt_max > 1 || (recurse_flags & recurse_flag_accept_found)) ? 2 : 1);
13939   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13940 
13941   if (alt_max > 1 || (recurse_flags & recurse_flag_accept_found))
13942     {
13943     if (alt_max > 3)
13944       put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(1));
13945     else
13946       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
13947     }
13948 
13949   add_jump(compiler, &match, JUMP(SLJIT_JUMP));
13950 
13951   if (alt_count == 0)
13952     {
13953     /* Backtracking path entry. */
13954     SLJIT_ASSERT(common->currententry->backtrack_label == NULL);
13955     common->currententry->backtrack_label = LABEL();
13956     set_jumps(common->currententry->backtrack_calls, common->currententry->backtrack_label);
13957 
13958     sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, TMP1, 0);
13959 
13960     if (recurse_flags & recurse_flag_accept_found)
13961       accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
13962 
13963     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13964     /* Save return address. */
13965     OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0);
13966 
13967     copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags);
13968 
13969     if (alt_max > 1)
13970       {
13971       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13972       free_stack(common, 2);
13973 
13974       if (alt_max > 3)
13975         {
13976         sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
13977         sljit_set_put_label(put_label, LABEL());
13978         sljit_emit_op0(compiler, SLJIT_ENDBR);
13979         }
13980       else
13981         next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13982       }
13983     else
13984       free_stack(common, (recurse_flags & recurse_flag_accept_found) ? 2 : 1);
13985     }
13986   else if (alt_max > 3)
13987     {
13988     sljit_set_put_label(put_label, LABEL());
13989     sljit_emit_op0(compiler, SLJIT_ENDBR);
13990     }
13991   else
13992     {
13993     JUMPHERE(next_alt);
13994     if (alt_count + 1 < alt_max)
13995       {
13996       SLJIT_ASSERT(alt_count == 1 && alt_max == 3);
13997       next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
13998       }
13999     }
14000 
14001   alt_count++;
14002 
14003   compile_backtrackingpath(common, altbacktrack.top);
14004   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14005     return;
14006   set_jumps(altbacktrack.own_backtracks, LABEL());
14007 
14008   if (*cc != OP_ALT)
14009     break;
14010 
14011   altbacktrack.cc = cc + 1 + LINK_SIZE;
14012   cc += GET(cc, 1);
14013   }
14014 
14015 /* No alternative is matched. */
14016 
14017 quit = LABEL();
14018 
14019 copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, recurse_flags);
14020 
14021 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
14022 free_stack(common, private_data_size + local_size);
14023 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
14024 OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
14025 
14026 if (common->quit != NULL)
14027   {
14028   SLJIT_ASSERT(recurse_flags & recurse_flag_quit_found);
14029 
14030   set_jumps(common->quit, LABEL());
14031   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
14032   copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, recurse_flags);
14033   JUMPTO(SLJIT_JUMP, quit);
14034   }
14035 
14036 if (recurse_flags & recurse_flag_accept_found)
14037   {
14038   JUMPHERE(accept_exit);
14039   free_stack(common, 2);
14040 
14041   /* Save return address. */
14042   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0);
14043 
14044   copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, recurse_flags);
14045 
14046   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
14047   free_stack(common, private_data_size + local_size);
14048   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
14049   OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
14050   }
14051 
14052 if (common->accept != NULL)
14053   {
14054   SLJIT_ASSERT(recurse_flags & recurse_flag_accept_found);
14055 
14056   set_jumps(common->accept, LABEL());
14057 
14058   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
14059   OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
14060 
14061   allocate_stack(common, 2);
14062   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
14063   }
14064 
14065 set_jumps(match, LABEL());
14066 
14067 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
14068 
14069 copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags);
14070 
14071 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1));
14072 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
14073 OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
14074 }
14075 
14076 #undef COMPILE_BACKTRACKINGPATH
14077 #undef CURRENT_AS
14078 
14079 #define PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS \
14080   (PCRE2_JIT_INVALID_UTF)
14081 
jit_compile(pcre2_code * code,sljit_u32 mode)14082 static int jit_compile(pcre2_code *code, sljit_u32 mode)
14083 {
14084 pcre2_real_code *re = (pcre2_real_code *)code;
14085 struct sljit_compiler *compiler;
14086 backtrack_common rootbacktrack;
14087 compiler_common common_data;
14088 compiler_common *common = &common_data;
14089 const sljit_u8 *tables = re->tables;
14090 void *allocator_data = &re->memctl;
14091 int private_data_size;
14092 PCRE2_SPTR ccend;
14093 executable_functions *functions;
14094 void *executable_func;
14095 sljit_uw executable_size;
14096 sljit_uw total_length;
14097 struct sljit_label *mainloop_label = NULL;
14098 struct sljit_label *continue_match_label;
14099 struct sljit_label *empty_match_found_label = NULL;
14100 struct sljit_label *empty_match_backtrack_label = NULL;
14101 struct sljit_label *reset_match_label;
14102 struct sljit_label *quit_label;
14103 struct sljit_jump *jump;
14104 struct sljit_jump *minlength_check_failed = NULL;
14105 struct sljit_jump *empty_match = NULL;
14106 struct sljit_jump *end_anchor_failed = NULL;
14107 jump_list *reqcu_not_found = NULL;
14108 
14109 SLJIT_ASSERT(tables);
14110 
14111 #if HAS_VIRTUAL_REGISTERS == 1
14112 SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) < 0 && sljit_get_register_index(SLJIT_GP_REGISTER, ARGUMENTS) < 0 && sljit_get_register_index(SLJIT_GP_REGISTER, RETURN_ADDR) < 0);
14113 #elif HAS_VIRTUAL_REGISTERS == 0
14114 SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) >= 0 && sljit_get_register_index(SLJIT_GP_REGISTER, ARGUMENTS) >= 0 && sljit_get_register_index(SLJIT_GP_REGISTER, RETURN_ADDR) >= 0);
14115 #else
14116 #error "Invalid value for HAS_VIRTUAL_REGISTERS"
14117 #endif
14118 
14119 memset(&rootbacktrack, 0, sizeof(backtrack_common));
14120 memset(common, 0, sizeof(compiler_common));
14121 common->re = re;
14122 common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
14123 rootbacktrack.cc = common->name_table + re->name_count * re->name_entry_size;
14124 
14125 #ifdef SUPPORT_UNICODE
14126 common->invalid_utf = (mode & PCRE2_JIT_INVALID_UTF) != 0;
14127 #endif /* SUPPORT_UNICODE */
14128 mode &= ~PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS;
14129 
14130 common->start = rootbacktrack.cc;
14131 common->read_only_data_head = NULL;
14132 common->fcc = tables + fcc_offset;
14133 common->lcc = (sljit_sw)(tables + lcc_offset);
14134 common->mode = mode;
14135 common->might_be_empty = (re->minlength == 0) || (re->flags & PCRE2_MATCH_EMPTY);
14136 common->allow_empty_partial = (re->max_lookbehind > 0) || (re->flags & PCRE2_MATCH_EMPTY);
14137 common->nltype = NLTYPE_FIXED;
14138 switch(re->newline_convention)
14139   {
14140   case PCRE2_NEWLINE_CR: common->newline = CHAR_CR; break;
14141   case PCRE2_NEWLINE_LF: common->newline = CHAR_NL; break;
14142   case PCRE2_NEWLINE_CRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
14143   case PCRE2_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
14144   case PCRE2_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
14145   case PCRE2_NEWLINE_NUL: common->newline = CHAR_NUL; break;
14146   default: return PCRE2_ERROR_INTERNAL;
14147   }
14148 common->nlmax = READ_CHAR_MAX;
14149 common->nlmin = 0;
14150 if (re->bsr_convention == PCRE2_BSR_UNICODE)
14151   common->bsr_nltype = NLTYPE_ANY;
14152 else if (re->bsr_convention == PCRE2_BSR_ANYCRLF)
14153   common->bsr_nltype = NLTYPE_ANYCRLF;
14154 else
14155   {
14156 #ifdef BSR_ANYCRLF
14157   common->bsr_nltype = NLTYPE_ANYCRLF;
14158 #else
14159   common->bsr_nltype = NLTYPE_ANY;
14160 #endif
14161   }
14162 common->bsr_nlmax = READ_CHAR_MAX;
14163 common->bsr_nlmin = 0;
14164 common->endonly = (re->overall_options & PCRE2_DOLLAR_ENDONLY) != 0;
14165 common->ctypes = (sljit_sw)(tables + ctypes_offset);
14166 common->name_count = re->name_count;
14167 common->name_entry_size = re->name_entry_size;
14168 common->unset_backref = (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) != 0;
14169 common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0;
14170 #ifdef SUPPORT_UNICODE
14171 /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
14172 common->utf = (re->overall_options & PCRE2_UTF) != 0;
14173 common->ucp = (re->overall_options & PCRE2_UCP) != 0;
14174 if (common->utf)
14175   {
14176   if (common->nltype == NLTYPE_ANY)
14177     common->nlmax = 0x2029;
14178   else if (common->nltype == NLTYPE_ANYCRLF)
14179     common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
14180   else
14181     {
14182     /* We only care about the first newline character. */
14183     common->nlmax = common->newline & 0xff;
14184     }
14185 
14186   if (common->nltype == NLTYPE_FIXED)
14187     common->nlmin = common->newline & 0xff;
14188   else
14189     common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
14190 
14191   if (common->bsr_nltype == NLTYPE_ANY)
14192     common->bsr_nlmax = 0x2029;
14193   else
14194     common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
14195   common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
14196   }
14197 else
14198   common->invalid_utf = FALSE;
14199 #endif /* SUPPORT_UNICODE */
14200 ccend = bracketend(common->start);
14201 
14202 /* Calculate the local space size on the stack. */
14203 common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
14204 common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, allocator_data);
14205 if (!common->optimized_cbracket)
14206   return PCRE2_ERROR_NOMEMORY;
14207 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
14208 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
14209 #else
14210 memset(common->optimized_cbracket, 1, re->top_bracket + 1);
14211 #endif
14212 
14213 SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
14214 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
14215 common->capture_last_ptr = common->ovector_start;
14216 common->ovector_start += sizeof(sljit_sw);
14217 #endif
14218 if (!check_opcode_types(common, common->start, ccend))
14219   {
14220   SLJIT_FREE(common->optimized_cbracket, allocator_data);
14221   return PCRE2_ERROR_NOMEMORY;
14222   }
14223 
14224 /* Checking flags and updating ovector_start. */
14225 if (mode == PCRE2_JIT_COMPLETE && (re->flags & PCRE2_LASTSET) != 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
14226   {
14227   common->req_char_ptr = common->ovector_start;
14228   common->ovector_start += sizeof(sljit_sw);
14229   }
14230 if (mode != PCRE2_JIT_COMPLETE)
14231   {
14232   common->start_used_ptr = common->ovector_start;
14233   common->ovector_start += sizeof(sljit_sw);
14234   if (mode == PCRE2_JIT_PARTIAL_SOFT)
14235     {
14236     common->hit_start = common->ovector_start;
14237     common->ovector_start += sizeof(sljit_sw);
14238     }
14239   }
14240 if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0)
14241   {
14242   common->match_end_ptr = common->ovector_start;
14243   common->ovector_start += sizeof(sljit_sw);
14244   }
14245 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
14246 common->control_head_ptr = 1;
14247 #endif
14248 if (common->control_head_ptr != 0)
14249   {
14250   common->control_head_ptr = common->ovector_start;
14251   common->ovector_start += sizeof(sljit_sw);
14252   }
14253 if (common->has_set_som)
14254   {
14255   /* Saving the real start pointer is necessary. */
14256   common->start_ptr = common->ovector_start;
14257   common->ovector_start += sizeof(sljit_sw);
14258   }
14259 
14260 /* Aligning ovector to even number of sljit words. */
14261 if ((common->ovector_start & sizeof(sljit_sw)) != 0)
14262   common->ovector_start += sizeof(sljit_sw);
14263 
14264 if (common->start_ptr == 0)
14265   common->start_ptr = OVECTOR(0);
14266 
14267 /* Capturing brackets cannot be optimized if callouts are allowed. */
14268 if (common->capture_last_ptr != 0)
14269   memset(common->optimized_cbracket, 0, re->top_bracket + 1);
14270 
14271 SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
14272 common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
14273 
14274 total_length = ccend - common->start;
14275 common->private_data_ptrs = (sljit_s32*)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data);
14276 if (!common->private_data_ptrs)
14277   {
14278   SLJIT_FREE(common->optimized_cbracket, allocator_data);
14279   return PCRE2_ERROR_NOMEMORY;
14280   }
14281 memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
14282 
14283 private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
14284 
14285 if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && !common->has_skip_in_assert_back)
14286   detect_early_fail(common, common->start, &private_data_size, 0, 0);
14287 
14288 set_private_data_ptrs(common, &private_data_size, ccend);
14289 
14290 SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr);
14291 
14292 if (private_data_size > 65536)
14293   {
14294   SLJIT_FREE(common->private_data_ptrs, allocator_data);
14295   SLJIT_FREE(common->optimized_cbracket, allocator_data);
14296   return PCRE2_ERROR_NOMEMORY;
14297   }
14298 
14299 if (common->has_then)
14300   {
14301   common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length);
14302   memset(common->then_offsets, 0, total_length);
14303   set_then_offsets(common, common->start, NULL);
14304   }
14305 
14306 compiler = sljit_create_compiler(allocator_data, NULL);
14307 if (!compiler)
14308   {
14309   SLJIT_FREE(common->optimized_cbracket, allocator_data);
14310   SLJIT_FREE(common->private_data_ptrs, allocator_data);
14311   return PCRE2_ERROR_NOMEMORY;
14312   }
14313 common->compiler = compiler;
14314 
14315 /* Main pcre2_jit_exec entry. */
14316 SLJIT_ASSERT((private_data_size & (sizeof(sljit_sw) - 1)) == 0);
14317 sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, W), 5, 5, SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS, 0, private_data_size);
14318 
14319 /* Register init. */
14320 reset_ovector(common, (re->top_bracket + 1) * 2);
14321 if (common->req_char_ptr != 0)
14322   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
14323 
14324 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
14325 OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
14326 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
14327 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
14328 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
14329 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
14330 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, end));
14331 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, start));
14332 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
14333 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
14334 
14335 if (common->early_fail_start_ptr < common->early_fail_end_ptr)
14336   reset_early_fail(common);
14337 
14338 if (mode == PCRE2_JIT_PARTIAL_SOFT)
14339   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
14340 if (common->mark_ptr != 0)
14341   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
14342 if (common->control_head_ptr != 0)
14343   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
14344 
14345 /* Main part of the matching */
14346 if ((re->overall_options & PCRE2_ANCHORED) == 0)
14347   {
14348   mainloop_label = mainloop_entry(common);
14349   continue_match_label = LABEL();
14350   /* Forward search if possible. */
14351   if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
14352     {
14353     if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common))
14354       ;
14355     else if ((re->flags & PCRE2_FIRSTSET) != 0)
14356       fast_forward_first_char(common);
14357     else if ((re->flags & PCRE2_STARTLINE) != 0)
14358       fast_forward_newline(common);
14359     else if ((re->flags & PCRE2_FIRSTMAPSET) != 0)
14360       fast_forward_start_bits(common);
14361     }
14362   }
14363 else
14364   continue_match_label = LABEL();
14365 
14366 if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
14367   {
14368   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
14369   OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(re->minlength));
14370   minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
14371   }
14372 if (common->req_char_ptr != 0)
14373   reqcu_not_found = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0);
14374 
14375 /* Store the current STR_PTR in OVECTOR(0). */
14376 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
14377 /* Copy the limit of allowed recursions. */
14378 OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
14379 if (common->capture_last_ptr != 0)
14380   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, 0);
14381 if (common->fast_forward_bc_ptr != NULL)
14382   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3, STR_PTR, 0);
14383 
14384 if (common->start_ptr != OVECTOR(0))
14385   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
14386 
14387 /* Copy the beginning of the string. */
14388 if (mode == PCRE2_JIT_PARTIAL_SOFT)
14389   {
14390   jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
14391   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
14392   JUMPHERE(jump);
14393   }
14394 else if (mode == PCRE2_JIT_PARTIAL_HARD)
14395   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
14396 
14397 compile_matchingpath(common, common->start, ccend, &rootbacktrack);
14398 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14399   {
14400   sljit_free_compiler(compiler);
14401   SLJIT_FREE(common->optimized_cbracket, allocator_data);
14402   SLJIT_FREE(common->private_data_ptrs, allocator_data);
14403   PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14404   return PCRE2_ERROR_NOMEMORY;
14405   }
14406 
14407 if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
14408   end_anchor_failed = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0);
14409 
14410 if (common->might_be_empty)
14411   {
14412   empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
14413   empty_match_found_label = LABEL();
14414   }
14415 
14416 common->accept_label = LABEL();
14417 if (common->accept != NULL)
14418   set_jumps(common->accept, common->accept_label);
14419 
14420 /* This means we have a match. Update the ovector. */
14421 copy_ovector(common, re->top_bracket + 1);
14422 common->quit_label = common->abort_label = LABEL();
14423 if (common->quit != NULL)
14424   set_jumps(common->quit, common->quit_label);
14425 if (common->abort != NULL)
14426   set_jumps(common->abort, common->abort_label);
14427 if (minlength_check_failed != NULL)
14428   SET_LABEL(minlength_check_failed, common->abort_label);
14429 
14430 sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN);
14431 sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
14432 
14433 if (common->failed_match != NULL)
14434   {
14435   SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
14436   set_jumps(common->failed_match, LABEL());
14437   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
14438   JUMPTO(SLJIT_JUMP, common->abort_label);
14439   }
14440 
14441 if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
14442   JUMPHERE(end_anchor_failed);
14443 
14444 if (mode != PCRE2_JIT_COMPLETE)
14445   {
14446   common->partialmatchlabel = LABEL();
14447   set_jumps(common->partialmatch, common->partialmatchlabel);
14448   return_with_partial_match(common, common->quit_label);
14449   }
14450 
14451 if (common->might_be_empty)
14452   empty_match_backtrack_label = LABEL();
14453 compile_backtrackingpath(common, rootbacktrack.top);
14454 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14455   {
14456   sljit_free_compiler(compiler);
14457   SLJIT_FREE(common->optimized_cbracket, allocator_data);
14458   SLJIT_FREE(common->private_data_ptrs, allocator_data);
14459   PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14460   return PCRE2_ERROR_NOMEMORY;
14461   }
14462 
14463 SLJIT_ASSERT(rootbacktrack.prev == NULL);
14464 reset_match_label = LABEL();
14465 
14466 if (mode == PCRE2_JIT_PARTIAL_SOFT)
14467   {
14468   /* Update hit_start only in the first time. */
14469   jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
14470   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
14471   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
14472   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
14473   JUMPHERE(jump);
14474   }
14475 
14476 /* Check we have remaining characters. */
14477 if ((re->overall_options & PCRE2_ANCHORED) == 0 && common->match_end_ptr != 0)
14478   {
14479   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
14480   }
14481 
14482 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
14483     (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3) : common->start_ptr);
14484 
14485 if ((re->overall_options & PCRE2_ANCHORED) == 0)
14486   {
14487   if (common->ff_newline_shortcut != NULL)
14488     {
14489     /* There cannot be more newlines if PCRE2_FIRSTLINE is set. */
14490     if ((re->overall_options & PCRE2_FIRSTLINE) == 0)
14491       {
14492       if (common->match_end_ptr != 0)
14493         {
14494         OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
14495         OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
14496         CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, common->ff_newline_shortcut);
14497         OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
14498         }
14499       else
14500         CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
14501       }
14502     }
14503   else
14504     CMPTO(SLJIT_LESS, STR_PTR, 0, (common->match_end_ptr == 0) ? STR_END : TMP1, 0, mainloop_label);
14505   }
14506 
14507 /* No more remaining characters. */
14508 if (reqcu_not_found != NULL)
14509   set_jumps(reqcu_not_found, LABEL());
14510 
14511 if (mode == PCRE2_JIT_PARTIAL_SOFT)
14512   CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
14513 
14514 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
14515 JUMPTO(SLJIT_JUMP, common->quit_label);
14516 
14517 flush_stubs(common);
14518 
14519 if (common->might_be_empty)
14520   {
14521   JUMPHERE(empty_match);
14522   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
14523   OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
14524   OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
14525   JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label);
14526   OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
14527   JUMPTO(SLJIT_ZERO, empty_match_found_label);
14528   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
14529   CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
14530   JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
14531   }
14532 
14533 common->fast_forward_bc_ptr = NULL;
14534 common->early_fail_start_ptr = 0;
14535 common->early_fail_end_ptr = 0;
14536 common->currententry = common->entries;
14537 common->local_quit_available = TRUE;
14538 quit_label = common->quit_label;
14539 if (common->currententry != NULL)
14540   {
14541   /* A free bit for each private data. */
14542   common->recurse_bitset_size = ((private_data_size / SSIZE_OF(sw)) + 7) >> 3;
14543   SLJIT_ASSERT(common->recurse_bitset_size > 0);
14544   common->recurse_bitset = (sljit_u8*)SLJIT_MALLOC(common->recurse_bitset_size, allocator_data);;
14545 
14546   if (common->recurse_bitset != NULL)
14547     {
14548     do
14549       {
14550       /* Might add new entries. */
14551       compile_recurse(common);
14552       if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14553         break;
14554       flush_stubs(common);
14555       common->currententry = common->currententry->next;
14556       }
14557     while (common->currententry != NULL);
14558 
14559     SLJIT_FREE(common->recurse_bitset, allocator_data);
14560     }
14561 
14562   if (common->currententry != NULL)
14563     {
14564     /* The common->recurse_bitset has been freed. */
14565     SLJIT_ASSERT(sljit_get_compiler_error(compiler) || common->recurse_bitset == NULL);
14566 
14567     sljit_free_compiler(compiler);
14568     SLJIT_FREE(common->optimized_cbracket, allocator_data);
14569     SLJIT_FREE(common->private_data_ptrs, allocator_data);
14570     PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14571     return PCRE2_ERROR_NOMEMORY;
14572     }
14573   }
14574 common->local_quit_available = FALSE;
14575 common->quit_label = quit_label;
14576 
14577 /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
14578 /* This is a (really) rare case. */
14579 set_jumps(common->stackalloc, LABEL());
14580 /* RETURN_ADDR is not a saved register. */
14581 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0);
14582 
14583 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
14584 
14585 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
14586 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
14587 OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE);
14588 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack));
14589 OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0);
14590 
14591 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(sljit_stack_resize));
14592 
14593 jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
14594 OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0);
14595 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0);
14596 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
14597 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
14598 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
14599 
14600 /* Allocation failed. */
14601 JUMPHERE(jump);
14602 /* We break the return address cache here, but this is a really rare case. */
14603 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_JIT_STACKLIMIT);
14604 JUMPTO(SLJIT_JUMP, common->quit_label);
14605 
14606 /* Call limit reached. */
14607 set_jumps(common->calllimit, LABEL());
14608 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_MATCHLIMIT);
14609 JUMPTO(SLJIT_JUMP, common->quit_label);
14610 
14611 if (common->revertframes != NULL)
14612   {
14613   set_jumps(common->revertframes, LABEL());
14614   do_revertframes(common);
14615   }
14616 if (common->wordboundary != NULL)
14617   {
14618   set_jumps(common->wordboundary, LABEL());
14619   check_wordboundary(common, FALSE);
14620   }
14621 if (common->ucp_wordboundary != NULL)
14622   {
14623   set_jumps(common->ucp_wordboundary, LABEL());
14624   check_wordboundary(common, TRUE);
14625   }
14626 if (common->anynewline != NULL)
14627   {
14628   set_jumps(common->anynewline, LABEL());
14629   check_anynewline(common);
14630   }
14631 if (common->hspace != NULL)
14632   {
14633   set_jumps(common->hspace, LABEL());
14634   check_hspace(common);
14635   }
14636 if (common->vspace != NULL)
14637   {
14638   set_jumps(common->vspace, LABEL());
14639   check_vspace(common);
14640   }
14641 if (common->casefulcmp != NULL)
14642   {
14643   set_jumps(common->casefulcmp, LABEL());
14644   do_casefulcmp(common);
14645   }
14646 if (common->caselesscmp != NULL)
14647   {
14648   set_jumps(common->caselesscmp, LABEL());
14649   do_caselesscmp(common);
14650   }
14651 if (common->reset_match != NULL || common->restart_match != NULL)
14652   {
14653   if (common->restart_match != NULL)
14654     {
14655     set_jumps(common->restart_match, LABEL());
14656     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
14657     }
14658 
14659   set_jumps(common->reset_match, LABEL());
14660   do_reset_match(common, (re->top_bracket + 1) * 2);
14661   /* The value of restart_match is in TMP1. */
14662   CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
14663   OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
14664   JUMPTO(SLJIT_JUMP, reset_match_label);
14665   }
14666 #ifdef SUPPORT_UNICODE
14667 #if PCRE2_CODE_UNIT_WIDTH == 8
14668 if (common->utfreadchar != NULL)
14669   {
14670   set_jumps(common->utfreadchar, LABEL());
14671   do_utfreadchar(common);
14672   }
14673 if (common->utfreadtype8 != NULL)
14674   {
14675   set_jumps(common->utfreadtype8, LABEL());
14676   do_utfreadtype8(common);
14677   }
14678 if (common->utfpeakcharback != NULL)
14679   {
14680   set_jumps(common->utfpeakcharback, LABEL());
14681   do_utfpeakcharback(common);
14682   }
14683 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
14684 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
14685 if (common->utfreadchar_invalid != NULL)
14686   {
14687   set_jumps(common->utfreadchar_invalid, LABEL());
14688   do_utfreadchar_invalid(common);
14689   }
14690 if (common->utfreadnewline_invalid != NULL)
14691   {
14692   set_jumps(common->utfreadnewline_invalid, LABEL());
14693   do_utfreadnewline_invalid(common);
14694   }
14695 if (common->utfmoveback_invalid)
14696   {
14697   set_jumps(common->utfmoveback_invalid, LABEL());
14698   do_utfmoveback_invalid(common);
14699   }
14700 if (common->utfpeakcharback_invalid)
14701   {
14702   set_jumps(common->utfpeakcharback_invalid, LABEL());
14703   do_utfpeakcharback_invalid(common);
14704   }
14705 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 */
14706 if (common->getucd != NULL)
14707   {
14708   set_jumps(common->getucd, LABEL());
14709   do_getucd(common);
14710   }
14711 if (common->getucdtype != NULL)
14712   {
14713   set_jumps(common->getucdtype, LABEL());
14714   do_getucdtype(common);
14715   }
14716 #endif /* SUPPORT_UNICODE */
14717 
14718 SLJIT_FREE(common->optimized_cbracket, allocator_data);
14719 SLJIT_FREE(common->private_data_ptrs, allocator_data);
14720 
14721 executable_func = sljit_generate_code(compiler);
14722 executable_size = sljit_get_generated_code_size(compiler);
14723 sljit_free_compiler(compiler);
14724 
14725 if (executable_func == NULL)
14726   {
14727   PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14728   return PCRE2_ERROR_NOMEMORY;
14729   }
14730 
14731 /* Reuse the function descriptor if possible. */
14732 if (re->executable_jit != NULL)
14733   functions = (executable_functions *)re->executable_jit;
14734 else
14735   {
14736   functions = SLJIT_MALLOC(sizeof(executable_functions), allocator_data);
14737   if (functions == NULL)
14738     {
14739     /* This case is highly unlikely since we just recently
14740     freed a lot of memory. Not impossible though. */
14741     sljit_free_code(executable_func, NULL);
14742     PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14743     return PCRE2_ERROR_NOMEMORY;
14744     }
14745   memset(functions, 0, sizeof(executable_functions));
14746   functions->top_bracket = re->top_bracket + 1;
14747   functions->limit_match = re->limit_match;
14748   re->executable_jit = functions;
14749   }
14750 
14751 /* Turn mode into an index. */
14752 if (mode == PCRE2_JIT_COMPLETE)
14753   mode = 0;
14754 else
14755   mode = (mode == PCRE2_JIT_PARTIAL_SOFT) ? 1 : 2;
14756 
14757 SLJIT_ASSERT(mode < JIT_NUMBER_OF_COMPILE_MODES);
14758 functions->executable_funcs[mode] = executable_func;
14759 functions->read_only_data_heads[mode] = common->read_only_data_head;
14760 functions->executable_sizes[mode] = executable_size;
14761 return 0;
14762 }
14763 
14764 #endif
14765 
14766 /*************************************************
14767 *        JIT compile a Regular Expression        *
14768 *************************************************/
14769 
14770 /* This function used JIT to convert a previously-compiled pattern into machine
14771 code.
14772 
14773 Arguments:
14774   code          a compiled pattern
14775   options       JIT option bits
14776 
14777 Returns:        0: success or (*NOJIT) was used
14778                <0: an error code
14779 */
14780 
14781 #define PUBLIC_JIT_COMPILE_OPTIONS \
14782   (PCRE2_JIT_COMPLETE|PCRE2_JIT_PARTIAL_SOFT|PCRE2_JIT_PARTIAL_HARD|PCRE2_JIT_INVALID_UTF)
14783 
14784 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_jit_compile(pcre2_code * code,uint32_t options)14785 pcre2_jit_compile(pcre2_code *code, uint32_t options)
14786 {
14787 pcre2_real_code *re = (pcre2_real_code *)code;
14788 #ifdef SUPPORT_JIT
14789 executable_functions *functions;
14790 static int executable_allocator_is_working = -1;
14791 #endif
14792 
14793 if (code == NULL)
14794   return PCRE2_ERROR_NULL;
14795 
14796 if ((options & ~PUBLIC_JIT_COMPILE_OPTIONS) != 0)
14797   return PCRE2_ERROR_JIT_BADOPTION;
14798 
14799 /* Support for invalid UTF was first introduced in JIT, with the option
14800 PCRE2_JIT_INVALID_UTF. Later, support was added to the interpreter, and the
14801 compile-time option PCRE2_MATCH_INVALID_UTF was created. This is now the
14802 preferred feature, with the earlier option deprecated. However, for backward
14803 compatibility, if the earlier option is set, it forces the new option so that
14804 if JIT matching falls back to the interpreter, there is still support for
14805 invalid UTF. However, if this function has already been successfully called
14806 without PCRE2_JIT_INVALID_UTF and without PCRE2_MATCH_INVALID_UTF (meaning that
14807 non-invalid-supporting JIT code was compiled), give an error.
14808 
14809 If in the future support for PCRE2_JIT_INVALID_UTF is withdrawn, the following
14810 actions are needed:
14811 
14812   1. Remove the definition from pcre2.h.in and from the list in
14813      PUBLIC_JIT_COMPILE_OPTIONS above.
14814 
14815   2. Replace PCRE2_JIT_INVALID_UTF with a local flag in this module.
14816 
14817   3. Replace PCRE2_JIT_INVALID_UTF in pcre2_jit_test.c.
14818 
14819   4. Delete the following short block of code. The setting of "re" and
14820      "functions" can be moved into the JIT-only block below, but if that is
14821      done, (void)re and (void)functions will be needed in the non-JIT case, to
14822      avoid compiler warnings.
14823 */
14824 
14825 #ifdef SUPPORT_JIT
14826 functions = (executable_functions *)re->executable_jit;
14827 #endif
14828 
14829 if ((options & PCRE2_JIT_INVALID_UTF) != 0)
14830   {
14831   if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) == 0)
14832     {
14833 #ifdef SUPPORT_JIT
14834     if (functions != NULL) return PCRE2_ERROR_JIT_BADOPTION;
14835 #endif
14836     re->overall_options |= PCRE2_MATCH_INVALID_UTF;
14837     }
14838   }
14839 
14840 /* The above tests are run with and without JIT support. This means that
14841 PCRE2_JIT_INVALID_UTF propagates back into the regex options (ensuring
14842 interpreter support) even in the absence of JIT. But now, if there is no JIT
14843 support, give an error return. */
14844 
14845 #ifndef SUPPORT_JIT
14846 return PCRE2_ERROR_JIT_BADOPTION;
14847 #else  /* SUPPORT_JIT */
14848 
14849 /* There is JIT support. Do the necessary. */
14850 
14851 if ((re->flags & PCRE2_NOJIT) != 0) return 0;
14852 
14853 if (executable_allocator_is_working == -1)
14854   {
14855   /* Checks whether the executable allocator is working. This check
14856      might run multiple times in multi-threaded environments, but the
14857      result should not be affected by it. */
14858   void *ptr = SLJIT_MALLOC_EXEC(32, NULL);
14859   if (ptr != NULL)
14860     {
14861     SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr), NULL);
14862     executable_allocator_is_working = 1;
14863     }
14864   else executable_allocator_is_working = 0;
14865   }
14866 
14867 if (!executable_allocator_is_working)
14868   return PCRE2_ERROR_NOMEMORY;
14869 
14870 if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0)
14871   options |= PCRE2_JIT_INVALID_UTF;
14872 
14873 if ((options & PCRE2_JIT_COMPLETE) != 0 && (functions == NULL
14874     || functions->executable_funcs[0] == NULL)) {
14875   uint32_t excluded_options = (PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD);
14876   int result = jit_compile(code, options & ~excluded_options);
14877   if (result != 0)
14878     return result;
14879   }
14880 
14881 if ((options & PCRE2_JIT_PARTIAL_SOFT) != 0 && (functions == NULL
14882     || functions->executable_funcs[1] == NULL)) {
14883   uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_HARD);
14884   int result = jit_compile(code, options & ~excluded_options);
14885   if (result != 0)
14886     return result;
14887   }
14888 
14889 if ((options & PCRE2_JIT_PARTIAL_HARD) != 0 && (functions == NULL
14890     || functions->executable_funcs[2] == NULL)) {
14891   uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT);
14892   int result = jit_compile(code, options & ~excluded_options);
14893   if (result != 0)
14894     return result;
14895   }
14896 
14897 return 0;
14898 
14899 #endif  /* SUPPORT_JIT */
14900 }
14901 
14902 /* JIT compiler uses an all-in-one approach. This improves security,
14903    since the code generator functions are not exported. */
14904 
14905 #define INCLUDED_FROM_PCRE2_JIT_COMPILE
14906 
14907 #include "pcre2_jit_match.c"
14908 #include "pcre2_jit_misc.c"
14909 
14910 /* End of pcre2_jit_compile.c */
14911