1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9                     This module by Zoltan Herczeg
10      Original API code Copyright (c) 1997-2012 University of Cambridge
11           New API code Copyright (c) 2016-2019 University of Cambridge
12 
13 -----------------------------------------------------------------------------
14 Redistribution and use in source and binary forms, with or without
15 modification, are permitted provided that the following conditions are met:
16 
17     * Redistributions of source code must retain the above copyright notice,
18       this list of conditions and the following disclaimer.
19 
20     * Redistributions in binary form must reproduce the above copyright
21       notice, this list of conditions and the following disclaimer in the
22       documentation and/or other materials provided with the distribution.
23 
24     * Neither the name of the University of Cambridge nor the names of its
25       contributors may be used to endorse or promote products derived from
26       this software without specific prior written permission.
27 
28 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 POSSIBILITY OF SUCH DAMAGE.
39 -----------------------------------------------------------------------------
40 */
41 
42 #ifdef HAVE_CONFIG_H
43 #include "config.h"
44 #endif
45 
46 #include "pcre2_internal.h"
47 
48 #ifdef SUPPORT_JIT
49 
50 /* All-in-one: Since we use the JIT compiler only from here,
51 we just include it. This way we don't need to touch the build
52 system files. */
53 
54 #define SLJIT_CONFIG_AUTO 1
55 #define SLJIT_CONFIG_STATIC 1
56 #define SLJIT_VERBOSE 0
57 
58 #ifdef PCRE2_DEBUG
59 #define SLJIT_DEBUG 1
60 #else
61 #define SLJIT_DEBUG 0
62 #endif
63 
64 #define SLJIT_MALLOC(size, allocator_data) pcre2_jit_malloc(size, allocator_data)
65 #define SLJIT_FREE(ptr, allocator_data) pcre2_jit_free(ptr, allocator_data)
66 
pcre2_jit_malloc(size_t size,void * allocator_data)67 static void * pcre2_jit_malloc(size_t size, void *allocator_data)
68 {
69 pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
70 return allocator->malloc(size, allocator->memory_data);
71 }
72 
pcre2_jit_free(void * ptr,void * allocator_data)73 static void pcre2_jit_free(void *ptr, void *allocator_data)
74 {
75 pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
76 allocator->free(ptr, allocator->memory_data);
77 }
78 
79 #include "sljit/sljitLir.c"
80 
81 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
82 #error Unsupported architecture
83 #endif
84 
85 /* Defines for debugging purposes. */
86 
87 /* 1 - Use unoptimized capturing brackets.
88    2 - Enable capture_last_ptr (includes option 1). */
89 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
90 
91 /* 1 - Always have a control head. */
92 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
93 
94 /* Allocate memory for the regex stack on the real machine stack.
95 Fast, but limited size. */
96 #define MACHINE_STACK_SIZE 32768
97 
98 /* Growth rate for stack allocated by the OS. Should be the multiply
99 of page size. */
100 #define STACK_GROWTH_RATE 8192
101 
102 /* Enable to check that the allocation could destroy temporaries. */
103 #if defined SLJIT_DEBUG && SLJIT_DEBUG
104 #define DESTROY_REGISTERS 1
105 #endif
106 
107 /*
108 Short summary about the backtracking mechanism empolyed by the jit code generator:
109 
110 The code generator follows the recursive nature of the PERL compatible regular
111 expressions. The basic blocks of regular expressions are condition checkers
112 whose execute different commands depending on the result of the condition check.
113 The relationship between the operators can be horizontal (concatenation) and
114 vertical (sub-expression) (See struct backtrack_common for more details).
115 
116   'ab' - 'a' and 'b' regexps are concatenated
117   'a+' - 'a' is the sub-expression of the '+' operator
118 
119 The condition checkers are boolean (true/false) checkers. Machine code is generated
120 for the checker itself and for the actions depending on the result of the checker.
121 The 'true' case is called as the matching path (expected path), and the other is called as
122 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
123 branches on the matching path.
124 
125  Greedy star operator (*) :
126    Matching path: match happens.
127    Backtrack path: match failed.
128  Non-greedy star operator (*?) :
129    Matching path: no need to perform a match.
130    Backtrack path: match is required.
131 
132 The following example shows how the code generated for a capturing bracket
133 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
134 we have the following regular expression:
135 
136    A(B|C)D
137 
138 The generated code will be the following:
139 
140  A matching path
141  '(' matching path (pushing arguments to the stack)
142  B matching path
143  ')' matching path (pushing arguments to the stack)
144  D matching path
145  return with successful match
146 
147  D backtrack path
148  ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
149  B backtrack path
150  C expected path
151  jump to D matching path
152  C backtrack path
153  A backtrack path
154 
155  Notice, that the order of backtrack code paths are the opposite of the fast
156  code paths. In this way the topmost value on the stack is always belong
157  to the current backtrack code path. The backtrack path must check
158  whether there is a next alternative. If so, it needs to jump back to
159  the matching path eventually. Otherwise it needs to clear out its own stack
160  frame and continue the execution on the backtrack code paths.
161 */
162 
163 /*
164 Saved stack frames:
165 
166 Atomic blocks and asserts require reloading the values of private data
167 when the backtrack mechanism performed. Because of OP_RECURSE, the data
168 are not necessarly known in compile time, thus we need a dynamic restore
169 mechanism.
170 
171 The stack frames are stored in a chain list, and have the following format:
172 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
173 
174 Thus we can restore the private data to a particular point in the stack.
175 */
176 
177 typedef struct jit_arguments {
178   /* Pointers first. */
179   struct sljit_stack *stack;
180   PCRE2_SPTR str;
181   PCRE2_SPTR begin;
182   PCRE2_SPTR end;
183   pcre2_match_data *match_data;
184   PCRE2_SPTR startchar_ptr;
185   PCRE2_UCHAR *mark_ptr;
186   int (*callout)(pcre2_callout_block *, void *);
187   void *callout_data;
188   /* Everything else after. */
189   sljit_uw offset_limit;
190   sljit_u32 limit_match;
191   sljit_u32 oveccount;
192   sljit_u32 options;
193 } jit_arguments;
194 
195 #define JIT_NUMBER_OF_COMPILE_MODES 3
196 
197 typedef struct executable_functions {
198   void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
199   void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
200   sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
201   sljit_u32 top_bracket;
202   sljit_u32 limit_match;
203 } executable_functions;
204 
205 typedef struct jump_list {
206   struct sljit_jump *jump;
207   struct jump_list *next;
208 } jump_list;
209 
210 typedef struct stub_list {
211   struct sljit_jump *start;
212   struct sljit_label *quit;
213   struct stub_list *next;
214 } stub_list;
215 
216 enum frame_types {
217   no_frame = -1,
218   no_stack = -2
219 };
220 
221 enum control_types {
222   type_mark = 0,
223   type_then_trap = 1
224 };
225 
226 enum  early_fail_types {
227   type_skip = 0,
228   type_fail = 1,
229   type_fail_range = 2
230 };
231 
232 typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
233 
234 /* The following structure is the key data type for the recursive
235 code generator. It is allocated by compile_matchingpath, and contains
236 the arguments for compile_backtrackingpath. Must be the first member
237 of its descendants. */
238 typedef struct backtrack_common {
239   /* Concatenation stack. */
240   struct backtrack_common *prev;
241   jump_list *nextbacktracks;
242   /* Internal stack (for component operators). */
243   struct backtrack_common *top;
244   jump_list *topbacktracks;
245   /* Opcode pointer. */
246   PCRE2_SPTR cc;
247 } backtrack_common;
248 
249 typedef struct assert_backtrack {
250   backtrack_common common;
251   jump_list *condfailed;
252   /* Less than 0 if a frame is not needed. */
253   int framesize;
254   /* Points to our private memory word on the stack. */
255   int private_data_ptr;
256   /* For iterators. */
257   struct sljit_label *matchingpath;
258 } assert_backtrack;
259 
260 typedef struct bracket_backtrack {
261   backtrack_common common;
262   /* Where to coninue if an alternative is successfully matched. */
263   struct sljit_label *alternative_matchingpath;
264   /* For rmin and rmax iterators. */
265   struct sljit_label *recursive_matchingpath;
266   /* For greedy ? operator. */
267   struct sljit_label *zero_matchingpath;
268   /* Contains the branches of a failed condition. */
269   union {
270     /* Both for OP_COND, OP_SCOND. */
271     jump_list *condfailed;
272     assert_backtrack *assert;
273     /* For OP_ONCE. Less than 0 if not needed. */
274     int framesize;
275     /* For brackets with >3 alternatives. */
276     struct sljit_put_label *matching_put_label;
277   } u;
278   /* Points to our private memory word on the stack. */
279   int private_data_ptr;
280 } bracket_backtrack;
281 
282 typedef struct bracketpos_backtrack {
283   backtrack_common common;
284   /* Points to our private memory word on the stack. */
285   int private_data_ptr;
286   /* Reverting stack is needed. */
287   int framesize;
288   /* Allocated stack size. */
289   int stacksize;
290 } bracketpos_backtrack;
291 
292 typedef struct braminzero_backtrack {
293   backtrack_common common;
294   struct sljit_label *matchingpath;
295 } braminzero_backtrack;
296 
297 typedef struct char_iterator_backtrack {
298   backtrack_common common;
299   /* Next iteration. */
300   struct sljit_label *matchingpath;
301   union {
302     jump_list *backtracks;
303     struct {
304       unsigned int othercasebit;
305       PCRE2_UCHAR chr;
306       BOOL enabled;
307     } charpos;
308   } u;
309 } char_iterator_backtrack;
310 
311 typedef struct ref_iterator_backtrack {
312   backtrack_common common;
313   /* Next iteration. */
314   struct sljit_label *matchingpath;
315 } ref_iterator_backtrack;
316 
317 typedef struct recurse_entry {
318   struct recurse_entry *next;
319   /* Contains the function entry label. */
320   struct sljit_label *entry_label;
321   /* Contains the function entry label. */
322   struct sljit_label *backtrack_label;
323   /* Collects the entry calls until the function is not created. */
324   jump_list *entry_calls;
325   /* Collects the backtrack calls until the function is not created. */
326   jump_list *backtrack_calls;
327   /* Points to the starting opcode. */
328   sljit_sw start;
329 } recurse_entry;
330 
331 typedef struct recurse_backtrack {
332   backtrack_common common;
333   /* Return to the matching path. */
334   struct sljit_label *matchingpath;
335   /* Recursive pattern. */
336   recurse_entry *entry;
337   /* Pattern is inlined. */
338   BOOL inlined_pattern;
339 } recurse_backtrack;
340 
341 #define OP_THEN_TRAP OP_TABLE_LENGTH
342 
343 typedef struct then_trap_backtrack {
344   backtrack_common common;
345   /* If then_trap is not NULL, this structure contains the real
346   then_trap for the backtracking path. */
347   struct then_trap_backtrack *then_trap;
348   /* Points to the starting opcode. */
349   sljit_sw start;
350   /* Exit point for the then opcodes of this alternative. */
351   jump_list *quit;
352   /* Frame size of the current alternative. */
353   int framesize;
354 } then_trap_backtrack;
355 
356 #define MAX_N_CHARS 12
357 #define MAX_DIFF_CHARS 5
358 
359 typedef struct fast_forward_char_data {
360   /* Number of characters in the chars array, 255 for any character. */
361   sljit_u8 count;
362   /* Number of last UTF-8 characters in the chars array. */
363   sljit_u8 last_count;
364   /* Available characters in the current position. */
365   PCRE2_UCHAR chars[MAX_DIFF_CHARS];
366 } fast_forward_char_data;
367 
368 #define MAX_CLASS_RANGE_SIZE 4
369 #define MAX_CLASS_CHARS_SIZE 3
370 
371 typedef struct compiler_common {
372   /* The sljit ceneric compiler. */
373   struct sljit_compiler *compiler;
374   /* Compiled regular expression. */
375   pcre2_real_code *re;
376   /* First byte code. */
377   PCRE2_SPTR start;
378   /* Maps private data offset to each opcode. */
379   sljit_s32 *private_data_ptrs;
380   /* Chain list of read-only data ptrs. */
381   void *read_only_data_head;
382   /* Tells whether the capturing bracket is optimized. */
383   sljit_u8 *optimized_cbracket;
384   /* Tells whether the starting offset is a target of then. */
385   sljit_u8 *then_offsets;
386   /* Current position where a THEN must jump. */
387   then_trap_backtrack *then_trap;
388   /* Starting offset of private data for capturing brackets. */
389   sljit_s32 cbra_ptr;
390   /* Output vector starting point. Must be divisible by 2. */
391   sljit_s32 ovector_start;
392   /* Points to the starting character of the current match. */
393   sljit_s32 start_ptr;
394   /* Last known position of the requested byte. */
395   sljit_s32 req_char_ptr;
396   /* Head of the last recursion. */
397   sljit_s32 recursive_head_ptr;
398   /* First inspected character for partial matching.
399      (Needed for avoiding zero length partial matches.) */
400   sljit_s32 start_used_ptr;
401   /* Starting pointer for partial soft matches. */
402   sljit_s32 hit_start;
403   /* Pointer of the match end position. */
404   sljit_s32 match_end_ptr;
405   /* Points to the marked string. */
406   sljit_s32 mark_ptr;
407   /* Recursive control verb management chain. */
408   sljit_s32 control_head_ptr;
409   /* Points to the last matched capture block index. */
410   sljit_s32 capture_last_ptr;
411   /* Fast forward skipping byte code pointer. */
412   PCRE2_SPTR fast_forward_bc_ptr;
413   /* Locals used by fast fail optimization. */
414   sljit_s32 early_fail_start_ptr;
415   sljit_s32 early_fail_end_ptr;
416 
417   /* Flipped and lower case tables. */
418   const sljit_u8 *fcc;
419   sljit_sw lcc;
420   /* Mode can be PCRE2_JIT_COMPLETE and others. */
421   int mode;
422   /* TRUE, when empty match is accepted for partial matching. */
423   BOOL allow_empty_partial;
424   /* TRUE, when minlength is greater than 0. */
425   BOOL might_be_empty;
426   /* \K is found in the pattern. */
427   BOOL has_set_som;
428   /* (*SKIP:arg) is found in the pattern. */
429   BOOL has_skip_arg;
430   /* (*THEN) is found in the pattern. */
431   BOOL has_then;
432   /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
433   BOOL has_skip_in_assert_back;
434   /* Quit is redirected by recurse, negative assertion, or positive assertion in conditional block. */
435   BOOL local_quit_available;
436   /* Currently in a positive assertion. */
437   BOOL in_positive_assertion;
438   /* Newline control. */
439   int nltype;
440   sljit_u32 nlmax;
441   sljit_u32 nlmin;
442   int newline;
443   int bsr_nltype;
444   sljit_u32 bsr_nlmax;
445   sljit_u32 bsr_nlmin;
446   /* Dollar endonly. */
447   int endonly;
448   /* Tables. */
449   sljit_sw ctypes;
450   /* Named capturing brackets. */
451   PCRE2_SPTR name_table;
452   sljit_sw name_count;
453   sljit_sw name_entry_size;
454 
455   /* Labels and jump lists. */
456   struct sljit_label *partialmatchlabel;
457   struct sljit_label *quit_label;
458   struct sljit_label *abort_label;
459   struct sljit_label *accept_label;
460   struct sljit_label *ff_newline_shortcut;
461   stub_list *stubs;
462   recurse_entry *entries;
463   recurse_entry *currententry;
464   jump_list *partialmatch;
465   jump_list *quit;
466   jump_list *positive_assertion_quit;
467   jump_list *abort;
468   jump_list *failed_match;
469   jump_list *accept;
470   jump_list *calllimit;
471   jump_list *stackalloc;
472   jump_list *revertframes;
473   jump_list *wordboundary;
474   jump_list *anynewline;
475   jump_list *hspace;
476   jump_list *vspace;
477   jump_list *casefulcmp;
478   jump_list *caselesscmp;
479   jump_list *reset_match;
480   BOOL unset_backref;
481   BOOL alt_circumflex;
482 #ifdef SUPPORT_UNICODE
483   BOOL utf;
484   BOOL invalid_utf;
485   BOOL ucp;
486   /* Points to saving area for iref. */
487   sljit_s32 iref_ptr;
488   jump_list *getucd;
489   jump_list *getucdtype;
490 #if PCRE2_CODE_UNIT_WIDTH == 8
491   jump_list *utfreadchar;
492   jump_list *utfreadtype8;
493   jump_list *utfpeakcharback;
494 #endif
495 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
496   jump_list *utfreadchar_invalid;
497   jump_list *utfreadnewline_invalid;
498   jump_list *utfmoveback_invalid;
499   jump_list *utfpeakcharback_invalid;
500 #endif
501 #endif /* SUPPORT_UNICODE */
502 } compiler_common;
503 
504 /* For byte_sequence_compare. */
505 
506 typedef struct compare_context {
507   int length;
508   int sourcereg;
509 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
510   int ucharptr;
511   union {
512     sljit_s32 asint;
513     sljit_u16 asushort;
514 #if PCRE2_CODE_UNIT_WIDTH == 8
515     sljit_u8 asbyte;
516     sljit_u8 asuchars[4];
517 #elif PCRE2_CODE_UNIT_WIDTH == 16
518     sljit_u16 asuchars[2];
519 #elif PCRE2_CODE_UNIT_WIDTH == 32
520     sljit_u32 asuchars[1];
521 #endif
522   } c;
523   union {
524     sljit_s32 asint;
525     sljit_u16 asushort;
526 #if PCRE2_CODE_UNIT_WIDTH == 8
527     sljit_u8 asbyte;
528     sljit_u8 asuchars[4];
529 #elif PCRE2_CODE_UNIT_WIDTH == 16
530     sljit_u16 asuchars[2];
531 #elif PCRE2_CODE_UNIT_WIDTH == 32
532     sljit_u32 asuchars[1];
533 #endif
534   } oc;
535 #endif
536 } compare_context;
537 
538 /* Undefine sljit macros. */
539 #undef CMP
540 
541 /* Used for accessing the elements of the stack. */
542 #define STACK(i)      ((i) * (int)sizeof(sljit_sw))
543 
544 #ifdef SLJIT_PREF_SHIFT_REG
545 #if SLJIT_PREF_SHIFT_REG == SLJIT_R2
546 /* Nothing. */
547 #elif SLJIT_PREF_SHIFT_REG == SLJIT_R3
548 #define SHIFT_REG_IS_R3
549 #else
550 #error "Unsupported shift register"
551 #endif
552 #endif
553 
554 #define TMP1          SLJIT_R0
555 #ifdef SHIFT_REG_IS_R3
556 #define TMP2          SLJIT_R3
557 #define TMP3          SLJIT_R2
558 #else
559 #define TMP2          SLJIT_R2
560 #define TMP3          SLJIT_R3
561 #endif
562 #define STR_PTR       SLJIT_R1
563 #define STR_END       SLJIT_S0
564 #define STACK_TOP     SLJIT_S1
565 #define STACK_LIMIT   SLJIT_S2
566 #define COUNT_MATCH   SLJIT_S3
567 #define ARGUMENTS     SLJIT_S4
568 #define RETURN_ADDR   SLJIT_R4
569 
570 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
571 #define HAS_VIRTUAL_REGISTERS 1
572 #else
573 #define HAS_VIRTUAL_REGISTERS 0
574 #endif
575 
576 /* Local space layout. */
577 /* These two locals can be used by the current opcode. */
578 #define LOCALS0          (0 * sizeof(sljit_sw))
579 #define LOCALS1          (1 * sizeof(sljit_sw))
580 /* Two local variables for possessive quantifiers (char1 cannot use them). */
581 #define POSSESSIVE0      (2 * sizeof(sljit_sw))
582 #define POSSESSIVE1      (3 * sizeof(sljit_sw))
583 /* Max limit of recursions. */
584 #define LIMIT_MATCH      (4 * sizeof(sljit_sw))
585 /* The output vector is stored on the stack, and contains pointers
586 to characters. The vector data is divided into two groups: the first
587 group contains the start / end character pointers, and the second is
588 the start pointers when the end of the capturing group has not yet reached. */
589 #define OVECTOR_START    (common->ovector_start)
590 #define OVECTOR(i)       (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
591 #define OVECTOR_PRIV(i)  (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
592 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
593 
594 #if PCRE2_CODE_UNIT_WIDTH == 8
595 #define MOV_UCHAR  SLJIT_MOV_U8
596 #define IN_UCHARS(x) (x)
597 #elif PCRE2_CODE_UNIT_WIDTH == 16
598 #define MOV_UCHAR  SLJIT_MOV_U16
599 #define UCHAR_SHIFT (1)
600 #define IN_UCHARS(x) ((x) * 2)
601 #elif PCRE2_CODE_UNIT_WIDTH == 32
602 #define MOV_UCHAR  SLJIT_MOV_U32
603 #define UCHAR_SHIFT (2)
604 #define IN_UCHARS(x) ((x) * 4)
605 #else
606 #error Unsupported compiling mode
607 #endif
608 
609 /* Shortcuts. */
610 #define DEFINE_COMPILER \
611   struct sljit_compiler *compiler = common->compiler
612 #define OP1(op, dst, dstw, src, srcw) \
613   sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
614 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
615   sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
616 #define OP_SRC(op, src, srcw) \
617   sljit_emit_op_src(compiler, (op), (src), (srcw))
618 #define LABEL() \
619   sljit_emit_label(compiler)
620 #define JUMP(type) \
621   sljit_emit_jump(compiler, (type))
622 #define JUMPTO(type, label) \
623   sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
624 #define JUMPHERE(jump) \
625   sljit_set_label((jump), sljit_emit_label(compiler))
626 #define SET_LABEL(jump, label) \
627   sljit_set_label((jump), (label))
628 #define CMP(type, src1, src1w, src2, src2w) \
629   sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
630 #define CMPTO(type, src1, src1w, src2, src2w, label) \
631   sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
632 #define OP_FLAGS(op, dst, dstw, type) \
633   sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
634 #define CMOV(type, dst_reg, src, srcw) \
635   sljit_emit_cmov(compiler, (type), (dst_reg), (src), (srcw))
636 #define GET_LOCAL_BASE(dst, dstw, offset) \
637   sljit_get_local_base(compiler, (dst), (dstw), (offset))
638 
639 #define READ_CHAR_MAX 0x7fffffff
640 
641 #define INVALID_UTF_CHAR -1
642 #define UNASSIGNED_UTF_CHAR 888
643 
644 #if defined SUPPORT_UNICODE
645 #if PCRE2_CODE_UNIT_WIDTH == 8
646 
647 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
648   { \
649   if (ptr[0] <= 0x7f) \
650     c = *ptr++; \
651   else if (ptr + 1 < end && ptr[1] >= 0x80 && ptr[1] < 0xc0) \
652     { \
653     c = ptr[1] - 0x80; \
654     \
655     if (ptr[0] >= 0xc2 && ptr[0] <= 0xdf) \
656       { \
657       c |= (ptr[0] - 0xc0) << 6; \
658       ptr += 2; \
659       } \
660     else if (ptr + 2 < end && ptr[2] >= 0x80 && ptr[2] < 0xc0) \
661       { \
662       c = c << 6 | (ptr[2] - 0x80); \
663       \
664       if (ptr[0] >= 0xe0 && ptr[0] <= 0xef) \
665         { \
666         c |= (ptr[0] - 0xe0) << 12; \
667         ptr += 3; \
668         \
669         if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
670           { \
671           invalid_action; \
672           } \
673         } \
674       else if (ptr + 3 < end && ptr[3] >= 0x80 && ptr[3] < 0xc0) \
675         { \
676         c = c << 6 | (ptr[3] - 0x80); \
677         \
678         if (ptr[0] >= 0xf0 && ptr[0] <= 0xf4) \
679           { \
680           c |= (ptr[0] - 0xf0) << 18; \
681           ptr += 4; \
682           \
683           if (c >= 0x110000 || c < 0x10000) \
684             { \
685             invalid_action; \
686             } \
687           } \
688         else \
689           { \
690           invalid_action; \
691           } \
692         } \
693       else \
694         { \
695         invalid_action; \
696         } \
697       } \
698     else \
699       { \
700       invalid_action; \
701       } \
702     } \
703   else \
704     { \
705     invalid_action; \
706     } \
707   }
708 
709 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
710   { \
711   c = ptr[-1]; \
712   if (c <= 0x7f) \
713     ptr--; \
714   else if (ptr - 1 > start && ptr[-1] >= 0x80 && ptr[-1] < 0xc0) \
715     { \
716     c -= 0x80; \
717     \
718     if (ptr[-2] >= 0xc2 && ptr[-2] <= 0xdf) \
719       { \
720       c |= (ptr[-2] - 0xc0) << 6; \
721       ptr -= 2; \
722       } \
723     else if (ptr - 2 > start && ptr[-2] >= 0x80 && ptr[-2] < 0xc0) \
724       { \
725       c = c << 6 | (ptr[-2] - 0x80); \
726       \
727       if (ptr[-3] >= 0xe0 && ptr[-3] <= 0xef) \
728         { \
729         c |= (ptr[-3] - 0xe0) << 12; \
730         ptr -= 3; \
731         \
732         if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
733           { \
734           invalid_action; \
735           } \
736         } \
737       else if (ptr - 3 > start && ptr[-3] >= 0x80 && ptr[-3] < 0xc0) \
738         { \
739         c = c << 6 | (ptr[-3] - 0x80); \
740         \
741         if (ptr[-4] >= 0xf0 && ptr[-4] <= 0xf4) \
742           { \
743           c |= (ptr[-4] - 0xf0) << 18; \
744           ptr -= 4; \
745           \
746           if (c >= 0x110000 || c < 0x10000) \
747             { \
748             invalid_action; \
749             } \
750           } \
751         else \
752           { \
753           invalid_action; \
754           } \
755         } \
756       else \
757         { \
758         invalid_action; \
759         } \
760       } \
761     else \
762       { \
763       invalid_action; \
764       } \
765     } \
766   else \
767     { \
768     invalid_action; \
769     } \
770   }
771 
772 #elif PCRE2_CODE_UNIT_WIDTH == 16
773 
774 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
775   { \
776   if (ptr[0] < 0xd800 || ptr[0] >= 0xe000) \
777     c = *ptr++; \
778   else if (ptr[0] < 0xdc00 && ptr + 1 < end && ptr[1] >= 0xdc00 && ptr[1] < 0xe000) \
779     { \
780     c = (((ptr[0] - 0xd800) << 10) | (ptr[1] - 0xdc00)) + 0x10000; \
781     ptr += 2; \
782     } \
783   else \
784     { \
785     invalid_action; \
786     } \
787   }
788 
789 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
790   { \
791   c = ptr[-1]; \
792   if (c < 0xd800 || c >= 0xe000) \
793     ptr--; \
794   else if (c >= 0xdc00 && ptr - 1 > start && ptr[-2] >= 0xd800 && ptr[-2] < 0xdc00) \
795     { \
796     c = (((ptr[-2] - 0xd800) << 10) | (c - 0xdc00)) + 0x10000; \
797     ptr -= 2; \
798     } \
799   else \
800     { \
801     invalid_action; \
802     } \
803   }
804 
805 
806 #elif PCRE2_CODE_UNIT_WIDTH == 32
807 
808 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
809   { \
810   if (ptr[0] < 0xd800 || (ptr[0] >= 0xe000 && ptr[0] < 0x110000)) \
811     c = *ptr++; \
812   else \
813     { \
814     invalid_action; \
815     } \
816   }
817 
818 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
819   { \
820   c = ptr[-1]; \
821   if (ptr[-1] < 0xd800 || (ptr[-1] >= 0xe000 && ptr[-1] < 0x110000)) \
822     ptr--; \
823   else \
824     { \
825     invalid_action; \
826     } \
827   }
828 
829 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
830 #endif /* SUPPORT_UNICODE */
831 
bracketend(PCRE2_SPTR cc)832 static PCRE2_SPTR bracketend(PCRE2_SPTR cc)
833 {
834 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
835 do cc += GET(cc, 1); while (*cc == OP_ALT);
836 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
837 cc += 1 + LINK_SIZE;
838 return cc;
839 }
840 
no_alternatives(PCRE2_SPTR cc)841 static int no_alternatives(PCRE2_SPTR cc)
842 {
843 int count = 0;
844 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
845 do
846   {
847   cc += GET(cc, 1);
848   count++;
849   }
850 while (*cc == OP_ALT);
851 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
852 return count;
853 }
854 
855 /* Functions whose might need modification for all new supported opcodes:
856  next_opcode
857  check_opcode_types
858  set_private_data_ptrs
859  get_framesize
860  init_frame
861  get_recurse_data_length
862  copy_recurse_data
863  compile_matchingpath
864  compile_backtrackingpath
865 */
866 
next_opcode(compiler_common * common,PCRE2_SPTR cc)867 static PCRE2_SPTR next_opcode(compiler_common *common, PCRE2_SPTR cc)
868 {
869 SLJIT_UNUSED_ARG(common);
870 switch(*cc)
871   {
872   case OP_SOD:
873   case OP_SOM:
874   case OP_SET_SOM:
875   case OP_NOT_WORD_BOUNDARY:
876   case OP_WORD_BOUNDARY:
877   case OP_NOT_DIGIT:
878   case OP_DIGIT:
879   case OP_NOT_WHITESPACE:
880   case OP_WHITESPACE:
881   case OP_NOT_WORDCHAR:
882   case OP_WORDCHAR:
883   case OP_ANY:
884   case OP_ALLANY:
885   case OP_NOTPROP:
886   case OP_PROP:
887   case OP_ANYNL:
888   case OP_NOT_HSPACE:
889   case OP_HSPACE:
890   case OP_NOT_VSPACE:
891   case OP_VSPACE:
892   case OP_EXTUNI:
893   case OP_EODN:
894   case OP_EOD:
895   case OP_CIRC:
896   case OP_CIRCM:
897   case OP_DOLL:
898   case OP_DOLLM:
899   case OP_CRSTAR:
900   case OP_CRMINSTAR:
901   case OP_CRPLUS:
902   case OP_CRMINPLUS:
903   case OP_CRQUERY:
904   case OP_CRMINQUERY:
905   case OP_CRRANGE:
906   case OP_CRMINRANGE:
907   case OP_CRPOSSTAR:
908   case OP_CRPOSPLUS:
909   case OP_CRPOSQUERY:
910   case OP_CRPOSRANGE:
911   case OP_CLASS:
912   case OP_NCLASS:
913   case OP_REF:
914   case OP_REFI:
915   case OP_DNREF:
916   case OP_DNREFI:
917   case OP_RECURSE:
918   case OP_CALLOUT:
919   case OP_ALT:
920   case OP_KET:
921   case OP_KETRMAX:
922   case OP_KETRMIN:
923   case OP_KETRPOS:
924   case OP_REVERSE:
925   case OP_ASSERT:
926   case OP_ASSERT_NOT:
927   case OP_ASSERTBACK:
928   case OP_ASSERTBACK_NOT:
929   case OP_ASSERT_NA:
930   case OP_ASSERTBACK_NA:
931   case OP_ONCE:
932   case OP_SCRIPT_RUN:
933   case OP_BRA:
934   case OP_BRAPOS:
935   case OP_CBRA:
936   case OP_CBRAPOS:
937   case OP_COND:
938   case OP_SBRA:
939   case OP_SBRAPOS:
940   case OP_SCBRA:
941   case OP_SCBRAPOS:
942   case OP_SCOND:
943   case OP_CREF:
944   case OP_DNCREF:
945   case OP_RREF:
946   case OP_DNRREF:
947   case OP_FALSE:
948   case OP_TRUE:
949   case OP_BRAZERO:
950   case OP_BRAMINZERO:
951   case OP_BRAPOSZERO:
952   case OP_PRUNE:
953   case OP_SKIP:
954   case OP_THEN:
955   case OP_COMMIT:
956   case OP_FAIL:
957   case OP_ACCEPT:
958   case OP_ASSERT_ACCEPT:
959   case OP_CLOSE:
960   case OP_SKIPZERO:
961   return cc + PRIV(OP_lengths)[*cc];
962 
963   case OP_CHAR:
964   case OP_CHARI:
965   case OP_NOT:
966   case OP_NOTI:
967   case OP_STAR:
968   case OP_MINSTAR:
969   case OP_PLUS:
970   case OP_MINPLUS:
971   case OP_QUERY:
972   case OP_MINQUERY:
973   case OP_UPTO:
974   case OP_MINUPTO:
975   case OP_EXACT:
976   case OP_POSSTAR:
977   case OP_POSPLUS:
978   case OP_POSQUERY:
979   case OP_POSUPTO:
980   case OP_STARI:
981   case OP_MINSTARI:
982   case OP_PLUSI:
983   case OP_MINPLUSI:
984   case OP_QUERYI:
985   case OP_MINQUERYI:
986   case OP_UPTOI:
987   case OP_MINUPTOI:
988   case OP_EXACTI:
989   case OP_POSSTARI:
990   case OP_POSPLUSI:
991   case OP_POSQUERYI:
992   case OP_POSUPTOI:
993   case OP_NOTSTAR:
994   case OP_NOTMINSTAR:
995   case OP_NOTPLUS:
996   case OP_NOTMINPLUS:
997   case OP_NOTQUERY:
998   case OP_NOTMINQUERY:
999   case OP_NOTUPTO:
1000   case OP_NOTMINUPTO:
1001   case OP_NOTEXACT:
1002   case OP_NOTPOSSTAR:
1003   case OP_NOTPOSPLUS:
1004   case OP_NOTPOSQUERY:
1005   case OP_NOTPOSUPTO:
1006   case OP_NOTSTARI:
1007   case OP_NOTMINSTARI:
1008   case OP_NOTPLUSI:
1009   case OP_NOTMINPLUSI:
1010   case OP_NOTQUERYI:
1011   case OP_NOTMINQUERYI:
1012   case OP_NOTUPTOI:
1013   case OP_NOTMINUPTOI:
1014   case OP_NOTEXACTI:
1015   case OP_NOTPOSSTARI:
1016   case OP_NOTPOSPLUSI:
1017   case OP_NOTPOSQUERYI:
1018   case OP_NOTPOSUPTOI:
1019   cc += PRIV(OP_lengths)[*cc];
1020 #ifdef SUPPORT_UNICODE
1021   if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1022 #endif
1023   return cc;
1024 
1025   /* Special cases. */
1026   case OP_TYPESTAR:
1027   case OP_TYPEMINSTAR:
1028   case OP_TYPEPLUS:
1029   case OP_TYPEMINPLUS:
1030   case OP_TYPEQUERY:
1031   case OP_TYPEMINQUERY:
1032   case OP_TYPEUPTO:
1033   case OP_TYPEMINUPTO:
1034   case OP_TYPEEXACT:
1035   case OP_TYPEPOSSTAR:
1036   case OP_TYPEPOSPLUS:
1037   case OP_TYPEPOSQUERY:
1038   case OP_TYPEPOSUPTO:
1039   return cc + PRIV(OP_lengths)[*cc] - 1;
1040 
1041   case OP_ANYBYTE:
1042 #ifdef SUPPORT_UNICODE
1043   if (common->utf) return NULL;
1044 #endif
1045   return cc + 1;
1046 
1047   case OP_CALLOUT_STR:
1048   return cc + GET(cc, 1 + 2*LINK_SIZE);
1049 
1050 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1051   case OP_XCLASS:
1052   return cc + GET(cc, 1);
1053 #endif
1054 
1055   case OP_MARK:
1056   case OP_COMMIT_ARG:
1057   case OP_PRUNE_ARG:
1058   case OP_SKIP_ARG:
1059   case OP_THEN_ARG:
1060   return cc + 1 + 2 + cc[1];
1061 
1062   default:
1063   SLJIT_UNREACHABLE();
1064   return NULL;
1065   }
1066 }
1067 
check_opcode_types(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend)1068 static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend)
1069 {
1070 int count;
1071 PCRE2_SPTR slot;
1072 PCRE2_SPTR assert_back_end = cc - 1;
1073 PCRE2_SPTR assert_na_end = cc - 1;
1074 
1075 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
1076 while (cc < ccend)
1077   {
1078   switch(*cc)
1079     {
1080     case OP_SET_SOM:
1081     common->has_set_som = TRUE;
1082     common->might_be_empty = TRUE;
1083     cc += 1;
1084     break;
1085 
1086     case OP_REFI:
1087 #ifdef SUPPORT_UNICODE
1088     if (common->iref_ptr == 0)
1089       {
1090       common->iref_ptr = common->ovector_start;
1091       common->ovector_start += 3 * sizeof(sljit_sw);
1092       }
1093 #endif /* SUPPORT_UNICODE */
1094     /* Fall through. */
1095     case OP_REF:
1096     common->optimized_cbracket[GET2(cc, 1)] = 0;
1097     cc += 1 + IMM2_SIZE;
1098     break;
1099 
1100     case OP_ASSERT_NA:
1101     case OP_ASSERTBACK_NA:
1102     slot = bracketend(cc);
1103     if (slot > assert_na_end)
1104       assert_na_end = slot;
1105     cc += 1 + LINK_SIZE;
1106     break;
1107 
1108     case OP_CBRAPOS:
1109     case OP_SCBRAPOS:
1110     common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
1111     cc += 1 + LINK_SIZE + IMM2_SIZE;
1112     break;
1113 
1114     case OP_COND:
1115     case OP_SCOND:
1116     /* Only AUTO_CALLOUT can insert this opcode. We do
1117        not intend to support this case. */
1118     if (cc[1 + LINK_SIZE] == OP_CALLOUT || cc[1 + LINK_SIZE] == OP_CALLOUT_STR)
1119       return FALSE;
1120     cc += 1 + LINK_SIZE;
1121     break;
1122 
1123     case OP_CREF:
1124     common->optimized_cbracket[GET2(cc, 1)] = 0;
1125     cc += 1 + IMM2_SIZE;
1126     break;
1127 
1128     case OP_DNREF:
1129     case OP_DNREFI:
1130     case OP_DNCREF:
1131     count = GET2(cc, 1 + IMM2_SIZE);
1132     slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
1133     while (count-- > 0)
1134       {
1135       common->optimized_cbracket[GET2(slot, 0)] = 0;
1136       slot += common->name_entry_size;
1137       }
1138     cc += 1 + 2 * IMM2_SIZE;
1139     break;
1140 
1141     case OP_RECURSE:
1142     /* Set its value only once. */
1143     if (common->recursive_head_ptr == 0)
1144       {
1145       common->recursive_head_ptr = common->ovector_start;
1146       common->ovector_start += sizeof(sljit_sw);
1147       }
1148     cc += 1 + LINK_SIZE;
1149     break;
1150 
1151     case OP_CALLOUT:
1152     case OP_CALLOUT_STR:
1153     if (common->capture_last_ptr == 0)
1154       {
1155       common->capture_last_ptr = common->ovector_start;
1156       common->ovector_start += sizeof(sljit_sw);
1157       }
1158     cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE);
1159     break;
1160 
1161     case OP_ASSERTBACK:
1162     slot = bracketend(cc);
1163     if (slot > assert_back_end)
1164       assert_back_end = slot;
1165     cc += 1 + LINK_SIZE;
1166     break;
1167 
1168     case OP_THEN_ARG:
1169     common->has_then = TRUE;
1170     common->control_head_ptr = 1;
1171     /* Fall through. */
1172 
1173     case OP_COMMIT_ARG:
1174     case OP_PRUNE_ARG:
1175     if (cc < assert_na_end)
1176       return FALSE;
1177     /* Fall through */
1178     case OP_MARK:
1179     if (common->mark_ptr == 0)
1180       {
1181       common->mark_ptr = common->ovector_start;
1182       common->ovector_start += sizeof(sljit_sw);
1183       }
1184     cc += 1 + 2 + cc[1];
1185     break;
1186 
1187     case OP_THEN:
1188     common->has_then = TRUE;
1189     common->control_head_ptr = 1;
1190     cc += 1;
1191     break;
1192 
1193     case OP_SKIP:
1194     if (cc < assert_back_end)
1195       common->has_skip_in_assert_back = TRUE;
1196     if (cc < assert_na_end)
1197       return FALSE;
1198     cc += 1;
1199     break;
1200 
1201     case OP_SKIP_ARG:
1202     common->control_head_ptr = 1;
1203     common->has_skip_arg = TRUE;
1204     if (cc < assert_back_end)
1205       common->has_skip_in_assert_back = TRUE;
1206     if (cc < assert_na_end)
1207       return FALSE;
1208     cc += 1 + 2 + cc[1];
1209     break;
1210 
1211     case OP_PRUNE:
1212     case OP_COMMIT:
1213     case OP_ASSERT_ACCEPT:
1214     if (cc < assert_na_end)
1215       return FALSE;
1216     cc++;
1217     break;
1218 
1219     default:
1220     cc = next_opcode(common, cc);
1221     if (cc == NULL)
1222       return FALSE;
1223     break;
1224     }
1225   }
1226 return TRUE;
1227 }
1228 
1229 #define EARLY_FAIL_ENHANCE_MAX (1 + 1)
1230 
1231 /*
1232 start:
1233   0 - skip / early fail allowed
1234   1 - only early fail with range allowed
1235   >1 - (start - 1) early fail is processed
1236 
1237 return: current number of iterators enhanced with fast fail
1238 */
detect_early_fail(compiler_common * common,PCRE2_SPTR cc,int * private_data_start,sljit_s32 depth,int start)1239 static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start, sljit_s32 depth, int start)
1240 {
1241 PCRE2_SPTR next_alt;
1242 PCRE2_SPTR end;
1243 PCRE2_SPTR accelerated_start;
1244 int result = 0;
1245 int count;
1246 BOOL fast_forward_allowed = TRUE;
1247 
1248 SLJIT_ASSERT(*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA);
1249 SLJIT_ASSERT(*cc != OP_CBRA || common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] != 0);
1250 SLJIT_ASSERT(start < EARLY_FAIL_ENHANCE_MAX);
1251 
1252 next_alt = cc + GET(cc, 1);
1253 if (*next_alt == OP_ALT)
1254   fast_forward_allowed = FALSE;
1255 
1256 do
1257   {
1258   count = start;
1259   cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1260 
1261   while (TRUE)
1262     {
1263     accelerated_start = NULL;
1264 
1265     switch(*cc)
1266       {
1267       case OP_SOD:
1268       case OP_SOM:
1269       case OP_SET_SOM:
1270       case OP_NOT_WORD_BOUNDARY:
1271       case OP_WORD_BOUNDARY:
1272       case OP_EODN:
1273       case OP_EOD:
1274       case OP_CIRC:
1275       case OP_CIRCM:
1276       case OP_DOLL:
1277       case OP_DOLLM:
1278       /* Zero width assertions. */
1279       cc++;
1280       continue;
1281 
1282       case OP_NOT_DIGIT:
1283       case OP_DIGIT:
1284       case OP_NOT_WHITESPACE:
1285       case OP_WHITESPACE:
1286       case OP_NOT_WORDCHAR:
1287       case OP_WORDCHAR:
1288       case OP_ANY:
1289       case OP_ALLANY:
1290       case OP_ANYBYTE:
1291       case OP_NOT_HSPACE:
1292       case OP_HSPACE:
1293       case OP_NOT_VSPACE:
1294       case OP_VSPACE:
1295       fast_forward_allowed = FALSE;
1296       cc++;
1297       continue;
1298 
1299       case OP_ANYNL:
1300       case OP_EXTUNI:
1301       fast_forward_allowed = FALSE;
1302       if (count == 0)
1303         count = 1;
1304       cc++;
1305       continue;
1306 
1307       case OP_NOTPROP:
1308       case OP_PROP:
1309       fast_forward_allowed = FALSE;
1310       cc += 1 + 2;
1311       continue;
1312 
1313       case OP_CHAR:
1314       case OP_CHARI:
1315       case OP_NOT:
1316       case OP_NOTI:
1317       fast_forward_allowed = FALSE;
1318       cc += 2;
1319 #ifdef SUPPORT_UNICODE
1320       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1321 #endif
1322       continue;
1323 
1324       case OP_TYPESTAR:
1325       case OP_TYPEMINSTAR:
1326       case OP_TYPEPLUS:
1327       case OP_TYPEMINPLUS:
1328       case OP_TYPEPOSSTAR:
1329       case OP_TYPEPOSPLUS:
1330       /* The type or prop opcode is skipped in the next iteration. */
1331       cc += 1;
1332 
1333       if (cc[0] != OP_ANYNL && cc[0] != OP_EXTUNI)
1334         {
1335         accelerated_start = cc - 1;
1336         break;
1337         }
1338 
1339       if (count == 0)
1340         count = 1;
1341       fast_forward_allowed = FALSE;
1342       continue;
1343 
1344       case OP_TYPEUPTO:
1345       case OP_TYPEMINUPTO:
1346       case OP_TYPEEXACT:
1347       case OP_TYPEPOSUPTO:
1348       cc += IMM2_SIZE;
1349       /* Fall through */
1350 
1351       case OP_TYPEQUERY:
1352       case OP_TYPEMINQUERY:
1353       case OP_TYPEPOSQUERY:
1354       /* The type or prop opcode is skipped in the next iteration. */
1355       fast_forward_allowed = FALSE;
1356       if (count == 0)
1357         count = 1;
1358       cc += 1;
1359       continue;
1360 
1361       case OP_STAR:
1362       case OP_MINSTAR:
1363       case OP_PLUS:
1364       case OP_MINPLUS:
1365       case OP_POSSTAR:
1366       case OP_POSPLUS:
1367 
1368       case OP_STARI:
1369       case OP_MINSTARI:
1370       case OP_PLUSI:
1371       case OP_MINPLUSI:
1372       case OP_POSSTARI:
1373       case OP_POSPLUSI:
1374 
1375       case OP_NOTSTAR:
1376       case OP_NOTMINSTAR:
1377       case OP_NOTPLUS:
1378       case OP_NOTMINPLUS:
1379       case OP_NOTPOSSTAR:
1380       case OP_NOTPOSPLUS:
1381 
1382       case OP_NOTSTARI:
1383       case OP_NOTMINSTARI:
1384       case OP_NOTPLUSI:
1385       case OP_NOTMINPLUSI:
1386       case OP_NOTPOSSTARI:
1387       case OP_NOTPOSPLUSI:
1388       accelerated_start = cc;
1389       cc += 2;
1390 #ifdef SUPPORT_UNICODE
1391       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1392 #endif
1393       break;
1394 
1395       case OP_UPTO:
1396       case OP_MINUPTO:
1397       case OP_EXACT:
1398       case OP_POSUPTO:
1399       case OP_UPTOI:
1400       case OP_MINUPTOI:
1401       case OP_EXACTI:
1402       case OP_POSUPTOI:
1403       case OP_NOTUPTO:
1404       case OP_NOTMINUPTO:
1405       case OP_NOTEXACT:
1406       case OP_NOTPOSUPTO:
1407       case OP_NOTUPTOI:
1408       case OP_NOTMINUPTOI:
1409       case OP_NOTEXACTI:
1410       case OP_NOTPOSUPTOI:
1411       cc += IMM2_SIZE;
1412       /* Fall through */
1413 
1414       case OP_QUERY:
1415       case OP_MINQUERY:
1416       case OP_POSQUERY:
1417       case OP_QUERYI:
1418       case OP_MINQUERYI:
1419       case OP_POSQUERYI:
1420       case OP_NOTQUERY:
1421       case OP_NOTMINQUERY:
1422       case OP_NOTPOSQUERY:
1423       case OP_NOTQUERYI:
1424       case OP_NOTMINQUERYI:
1425       case OP_NOTPOSQUERYI:
1426       fast_forward_allowed = FALSE;
1427       if (count == 0)
1428         count = 1;
1429       cc += 2;
1430 #ifdef SUPPORT_UNICODE
1431       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1432 #endif
1433       continue;
1434 
1435       case OP_CLASS:
1436       case OP_NCLASS:
1437 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1438       case OP_XCLASS:
1439       accelerated_start = cc;
1440       cc += ((*cc == OP_XCLASS) ? GET(cc, 1) : (unsigned int)(1 + (32 / sizeof(PCRE2_UCHAR))));
1441 #else
1442       accelerated_start = cc;
1443       cc += (1 + (32 / sizeof(PCRE2_UCHAR)));
1444 #endif
1445 
1446       switch (*cc)
1447         {
1448         case OP_CRSTAR:
1449         case OP_CRMINSTAR:
1450         case OP_CRPLUS:
1451         case OP_CRMINPLUS:
1452         case OP_CRPOSSTAR:
1453         case OP_CRPOSPLUS:
1454         cc++;
1455         break;
1456 
1457         case OP_CRRANGE:
1458         case OP_CRMINRANGE:
1459         case OP_CRPOSRANGE:
1460         cc += 2 * IMM2_SIZE;
1461         /* Fall through */
1462         case OP_CRQUERY:
1463         case OP_CRMINQUERY:
1464         case OP_CRPOSQUERY:
1465         cc++;
1466         if (count == 0)
1467           count = 1;
1468         /* Fall through */
1469         default:
1470         accelerated_start = NULL;
1471         fast_forward_allowed = FALSE;
1472         continue;
1473         }
1474       break;
1475 
1476       case OP_ONCE:
1477       case OP_BRA:
1478       case OP_CBRA:
1479       end = cc + GET(cc, 1);
1480 
1481       if (*end == OP_KET && PRIVATE_DATA(end) == 0)
1482         {
1483         if (*cc == OP_CBRA)
1484           {
1485           if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1486             break;
1487           cc += IMM2_SIZE;
1488           }
1489 
1490         cc += 1 + LINK_SIZE;
1491         continue;
1492         }
1493 
1494       fast_forward_allowed = FALSE;
1495       if (depth >= 4)
1496         break;
1497 
1498       end = bracketend(cc) - (1 + LINK_SIZE);
1499       if (*end != OP_KET || PRIVATE_DATA(end) != 0)
1500         break;
1501 
1502       if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1503         break;
1504 
1505       count = detect_early_fail(common, cc, private_data_start, depth + 1, count);
1506       if (count < EARLY_FAIL_ENHANCE_MAX)
1507         {
1508         cc = end + (1 + LINK_SIZE);
1509         continue;
1510         }
1511       break;
1512 
1513       case OP_KET:
1514       SLJIT_ASSERT(PRIVATE_DATA(cc) == 0);
1515       if (cc >= next_alt)
1516         break;
1517       cc += 1 + LINK_SIZE;
1518       continue;
1519       }
1520 
1521     if (accelerated_start != NULL)
1522       {
1523       if (count == 0)
1524         {
1525         count++;
1526 
1527         if (fast_forward_allowed)
1528           {
1529           common->fast_forward_bc_ptr = accelerated_start;
1530           common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_skip;
1531           *private_data_start += sizeof(sljit_sw);
1532           }
1533         else
1534           {
1535           common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail;
1536 
1537           if (common->early_fail_start_ptr == 0)
1538             common->early_fail_start_ptr = *private_data_start;
1539 
1540           *private_data_start += sizeof(sljit_sw);
1541           common->early_fail_end_ptr = *private_data_start;
1542 
1543           if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1544             return EARLY_FAIL_ENHANCE_MAX;
1545           }
1546         }
1547       else
1548         {
1549         common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail_range;
1550 
1551         if (common->early_fail_start_ptr == 0)
1552           common->early_fail_start_ptr = *private_data_start;
1553 
1554         *private_data_start += 2 * sizeof(sljit_sw);
1555         common->early_fail_end_ptr = *private_data_start;
1556 
1557         if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1558           return EARLY_FAIL_ENHANCE_MAX;
1559         }
1560 
1561       count++;
1562 
1563       if (count < EARLY_FAIL_ENHANCE_MAX)
1564         continue;
1565       }
1566 
1567     break;
1568     }
1569 
1570   if (*cc != OP_ALT && *cc != OP_KET)
1571     result = EARLY_FAIL_ENHANCE_MAX;
1572   else if (result < count)
1573     result = count;
1574 
1575   cc = next_alt;
1576   next_alt = cc + GET(cc, 1);
1577   }
1578 while (*cc == OP_ALT);
1579 
1580 return result;
1581 }
1582 
get_class_iterator_size(PCRE2_SPTR cc)1583 static int get_class_iterator_size(PCRE2_SPTR cc)
1584 {
1585 sljit_u32 min;
1586 sljit_u32 max;
1587 switch(*cc)
1588   {
1589   case OP_CRSTAR:
1590   case OP_CRPLUS:
1591   return 2;
1592 
1593   case OP_CRMINSTAR:
1594   case OP_CRMINPLUS:
1595   case OP_CRQUERY:
1596   case OP_CRMINQUERY:
1597   return 1;
1598 
1599   case OP_CRRANGE:
1600   case OP_CRMINRANGE:
1601   min = GET2(cc, 1);
1602   max = GET2(cc, 1 + IMM2_SIZE);
1603   if (max == 0)
1604     return (*cc == OP_CRRANGE) ? 2 : 1;
1605   max -= min;
1606   if (max > 2)
1607     max = 2;
1608   return max;
1609 
1610   default:
1611   return 0;
1612   }
1613 }
1614 
detect_repeat(compiler_common * common,PCRE2_SPTR begin)1615 static BOOL detect_repeat(compiler_common *common, PCRE2_SPTR begin)
1616 {
1617 PCRE2_SPTR end = bracketend(begin);
1618 PCRE2_SPTR next;
1619 PCRE2_SPTR next_end;
1620 PCRE2_SPTR max_end;
1621 PCRE2_UCHAR type;
1622 sljit_sw length = end - begin;
1623 sljit_s32 min, max, i;
1624 
1625 /* Detect fixed iterations first. */
1626 if (end[-(1 + LINK_SIZE)] != OP_KET)
1627   return FALSE;
1628 
1629 /* Already detected repeat. */
1630 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
1631   return TRUE;
1632 
1633 next = end;
1634 min = 1;
1635 while (1)
1636   {
1637   if (*next != *begin)
1638     break;
1639   next_end = bracketend(next);
1640   if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1641     break;
1642   next = next_end;
1643   min++;
1644   }
1645 
1646 if (min == 2)
1647   return FALSE;
1648 
1649 max = 0;
1650 max_end = next;
1651 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1652   {
1653   type = *next;
1654   while (1)
1655     {
1656     if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1657       break;
1658     next_end = bracketend(next + 2 + LINK_SIZE);
1659     if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1660       break;
1661     next = next_end;
1662     max++;
1663     }
1664 
1665   if (next[0] == type && next[1] == *begin && max >= 1)
1666     {
1667     next_end = bracketend(next + 1);
1668     if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1669       {
1670       for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1671         if (*next_end != OP_KET)
1672           break;
1673 
1674       if (i == max)
1675         {
1676         common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1677         common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1678         /* +2 the original and the last. */
1679         common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1680         if (min == 1)
1681           return TRUE;
1682         min--;
1683         max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1684         }
1685       }
1686     }
1687   }
1688 
1689 if (min >= 3)
1690   {
1691   common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1692   common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1693   common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1694   return TRUE;
1695   }
1696 
1697 return FALSE;
1698 }
1699 
1700 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1701     case OP_MINSTAR: \
1702     case OP_MINPLUS: \
1703     case OP_QUERY: \
1704     case OP_MINQUERY: \
1705     case OP_MINSTARI: \
1706     case OP_MINPLUSI: \
1707     case OP_QUERYI: \
1708     case OP_MINQUERYI: \
1709     case OP_NOTMINSTAR: \
1710     case OP_NOTMINPLUS: \
1711     case OP_NOTQUERY: \
1712     case OP_NOTMINQUERY: \
1713     case OP_NOTMINSTARI: \
1714     case OP_NOTMINPLUSI: \
1715     case OP_NOTQUERYI: \
1716     case OP_NOTMINQUERYI:
1717 
1718 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1719     case OP_STAR: \
1720     case OP_PLUS: \
1721     case OP_STARI: \
1722     case OP_PLUSI: \
1723     case OP_NOTSTAR: \
1724     case OP_NOTPLUS: \
1725     case OP_NOTSTARI: \
1726     case OP_NOTPLUSI:
1727 
1728 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1729     case OP_UPTO: \
1730     case OP_MINUPTO: \
1731     case OP_UPTOI: \
1732     case OP_MINUPTOI: \
1733     case OP_NOTUPTO: \
1734     case OP_NOTMINUPTO: \
1735     case OP_NOTUPTOI: \
1736     case OP_NOTMINUPTOI:
1737 
1738 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1739     case OP_TYPEMINSTAR: \
1740     case OP_TYPEMINPLUS: \
1741     case OP_TYPEQUERY: \
1742     case OP_TYPEMINQUERY:
1743 
1744 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1745     case OP_TYPESTAR: \
1746     case OP_TYPEPLUS:
1747 
1748 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1749     case OP_TYPEUPTO: \
1750     case OP_TYPEMINUPTO:
1751 
set_private_data_ptrs(compiler_common * common,int * private_data_start,PCRE2_SPTR ccend)1752 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, PCRE2_SPTR ccend)
1753 {
1754 PCRE2_SPTR cc = common->start;
1755 PCRE2_SPTR alternative;
1756 PCRE2_SPTR end = NULL;
1757 int private_data_ptr = *private_data_start;
1758 int space, size, bracketlen;
1759 BOOL repeat_check = TRUE;
1760 
1761 while (cc < ccend)
1762   {
1763   space = 0;
1764   size = 0;
1765   bracketlen = 0;
1766   if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1767     break;
1768 
1769   if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1770     {
1771     if (detect_repeat(common, cc))
1772       {
1773       /* These brackets are converted to repeats, so no global
1774       based single character repeat is allowed. */
1775       if (cc >= end)
1776         end = bracketend(cc);
1777       }
1778     }
1779   repeat_check = TRUE;
1780 
1781   switch(*cc)
1782     {
1783     case OP_KET:
1784     if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1785       {
1786       common->private_data_ptrs[cc - common->start] = private_data_ptr;
1787       private_data_ptr += sizeof(sljit_sw);
1788       cc += common->private_data_ptrs[cc + 1 - common->start];
1789       }
1790     cc += 1 + LINK_SIZE;
1791     break;
1792 
1793     case OP_ASSERT:
1794     case OP_ASSERT_NOT:
1795     case OP_ASSERTBACK:
1796     case OP_ASSERTBACK_NOT:
1797     case OP_ASSERT_NA:
1798     case OP_ASSERTBACK_NA:
1799     case OP_ONCE:
1800     case OP_SCRIPT_RUN:
1801     case OP_BRAPOS:
1802     case OP_SBRA:
1803     case OP_SBRAPOS:
1804     case OP_SCOND:
1805     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1806     private_data_ptr += sizeof(sljit_sw);
1807     bracketlen = 1 + LINK_SIZE;
1808     break;
1809 
1810     case OP_CBRAPOS:
1811     case OP_SCBRAPOS:
1812     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1813     private_data_ptr += sizeof(sljit_sw);
1814     bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1815     break;
1816 
1817     case OP_COND:
1818     /* Might be a hidden SCOND. */
1819     alternative = cc + GET(cc, 1);
1820     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1821       {
1822       common->private_data_ptrs[cc - common->start] = private_data_ptr;
1823       private_data_ptr += sizeof(sljit_sw);
1824       }
1825     bracketlen = 1 + LINK_SIZE;
1826     break;
1827 
1828     case OP_BRA:
1829     bracketlen = 1 + LINK_SIZE;
1830     break;
1831 
1832     case OP_CBRA:
1833     case OP_SCBRA:
1834     bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1835     break;
1836 
1837     case OP_BRAZERO:
1838     case OP_BRAMINZERO:
1839     case OP_BRAPOSZERO:
1840     size = 1;
1841     repeat_check = FALSE;
1842     break;
1843 
1844     CASE_ITERATOR_PRIVATE_DATA_1
1845     size = -2;
1846     space = 1;
1847     break;
1848 
1849     CASE_ITERATOR_PRIVATE_DATA_2A
1850     size = -2;
1851     space = 2;
1852     break;
1853 
1854     CASE_ITERATOR_PRIVATE_DATA_2B
1855     size = -(2 + IMM2_SIZE);
1856     space = 2;
1857     break;
1858 
1859     CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1860     size = 1;
1861     space = 1;
1862     break;
1863 
1864     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1865     size = 1;
1866     if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1867       space = 2;
1868     break;
1869 
1870     case OP_TYPEUPTO:
1871     size = 1 + IMM2_SIZE;
1872     if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1873       space = 2;
1874     break;
1875 
1876     case OP_TYPEMINUPTO:
1877     size = 1 + IMM2_SIZE;
1878     space = 2;
1879     break;
1880 
1881     case OP_CLASS:
1882     case OP_NCLASS:
1883     size = 1 + 32 / sizeof(PCRE2_UCHAR);
1884     space = get_class_iterator_size(cc + size);
1885     break;
1886 
1887 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1888     case OP_XCLASS:
1889     size = GET(cc, 1);
1890     space = get_class_iterator_size(cc + size);
1891     break;
1892 #endif
1893 
1894     default:
1895     cc = next_opcode(common, cc);
1896     SLJIT_ASSERT(cc != NULL);
1897     break;
1898     }
1899 
1900   /* Character iterators, which are not inside a repeated bracket,
1901      gets a private slot instead of allocating it on the stack. */
1902   if (space > 0 && cc >= end)
1903     {
1904     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1905     private_data_ptr += sizeof(sljit_sw) * space;
1906     }
1907 
1908   if (size != 0)
1909     {
1910     if (size < 0)
1911       {
1912       cc += -size;
1913 #ifdef SUPPORT_UNICODE
1914       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1915 #endif
1916       }
1917     else
1918       cc += size;
1919     }
1920 
1921   if (bracketlen > 0)
1922     {
1923     if (cc >= end)
1924       {
1925       end = bracketend(cc);
1926       if (end[-1 - LINK_SIZE] == OP_KET)
1927         end = NULL;
1928       }
1929     cc += bracketlen;
1930     }
1931   }
1932 *private_data_start = private_data_ptr;
1933 }
1934 
1935 /* Returns with a frame_types (always < 0) if no need for frame. */
get_framesize(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,BOOL recursive,BOOL * needs_control_head)1936 static int get_framesize(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL recursive, BOOL *needs_control_head)
1937 {
1938 int length = 0;
1939 int possessive = 0;
1940 BOOL stack_restore = FALSE;
1941 BOOL setsom_found = recursive;
1942 BOOL setmark_found = recursive;
1943 /* The last capture is a local variable even for recursions. */
1944 BOOL capture_last_found = FALSE;
1945 
1946 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1947 SLJIT_ASSERT(common->control_head_ptr != 0);
1948 *needs_control_head = TRUE;
1949 #else
1950 *needs_control_head = FALSE;
1951 #endif
1952 
1953 if (ccend == NULL)
1954   {
1955   ccend = bracketend(cc) - (1 + LINK_SIZE);
1956   if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1957     {
1958     possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1959     /* This is correct regardless of common->capture_last_ptr. */
1960     capture_last_found = TRUE;
1961     }
1962   cc = next_opcode(common, cc);
1963   }
1964 
1965 SLJIT_ASSERT(cc != NULL);
1966 while (cc < ccend)
1967   switch(*cc)
1968     {
1969     case OP_SET_SOM:
1970     SLJIT_ASSERT(common->has_set_som);
1971     stack_restore = TRUE;
1972     if (!setsom_found)
1973       {
1974       length += 2;
1975       setsom_found = TRUE;
1976       }
1977     cc += 1;
1978     break;
1979 
1980     case OP_MARK:
1981     case OP_COMMIT_ARG:
1982     case OP_PRUNE_ARG:
1983     case OP_THEN_ARG:
1984     SLJIT_ASSERT(common->mark_ptr != 0);
1985     stack_restore = TRUE;
1986     if (!setmark_found)
1987       {
1988       length += 2;
1989       setmark_found = TRUE;
1990       }
1991     if (common->control_head_ptr != 0)
1992       *needs_control_head = TRUE;
1993     cc += 1 + 2 + cc[1];
1994     break;
1995 
1996     case OP_RECURSE:
1997     stack_restore = TRUE;
1998     if (common->has_set_som && !setsom_found)
1999       {
2000       length += 2;
2001       setsom_found = TRUE;
2002       }
2003     if (common->mark_ptr != 0 && !setmark_found)
2004       {
2005       length += 2;
2006       setmark_found = TRUE;
2007       }
2008     if (common->capture_last_ptr != 0 && !capture_last_found)
2009       {
2010       length += 2;
2011       capture_last_found = TRUE;
2012       }
2013     cc += 1 + LINK_SIZE;
2014     break;
2015 
2016     case OP_CBRA:
2017     case OP_CBRAPOS:
2018     case OP_SCBRA:
2019     case OP_SCBRAPOS:
2020     stack_restore = TRUE;
2021     if (common->capture_last_ptr != 0 && !capture_last_found)
2022       {
2023       length += 2;
2024       capture_last_found = TRUE;
2025       }
2026     length += 3;
2027     cc += 1 + LINK_SIZE + IMM2_SIZE;
2028     break;
2029 
2030     case OP_THEN:
2031     stack_restore = TRUE;
2032     if (common->control_head_ptr != 0)
2033       *needs_control_head = TRUE;
2034     cc ++;
2035     break;
2036 
2037     default:
2038     stack_restore = TRUE;
2039     /* Fall through. */
2040 
2041     case OP_NOT_WORD_BOUNDARY:
2042     case OP_WORD_BOUNDARY:
2043     case OP_NOT_DIGIT:
2044     case OP_DIGIT:
2045     case OP_NOT_WHITESPACE:
2046     case OP_WHITESPACE:
2047     case OP_NOT_WORDCHAR:
2048     case OP_WORDCHAR:
2049     case OP_ANY:
2050     case OP_ALLANY:
2051     case OP_ANYBYTE:
2052     case OP_NOTPROP:
2053     case OP_PROP:
2054     case OP_ANYNL:
2055     case OP_NOT_HSPACE:
2056     case OP_HSPACE:
2057     case OP_NOT_VSPACE:
2058     case OP_VSPACE:
2059     case OP_EXTUNI:
2060     case OP_EODN:
2061     case OP_EOD:
2062     case OP_CIRC:
2063     case OP_CIRCM:
2064     case OP_DOLL:
2065     case OP_DOLLM:
2066     case OP_CHAR:
2067     case OP_CHARI:
2068     case OP_NOT:
2069     case OP_NOTI:
2070 
2071     case OP_EXACT:
2072     case OP_POSSTAR:
2073     case OP_POSPLUS:
2074     case OP_POSQUERY:
2075     case OP_POSUPTO:
2076 
2077     case OP_EXACTI:
2078     case OP_POSSTARI:
2079     case OP_POSPLUSI:
2080     case OP_POSQUERYI:
2081     case OP_POSUPTOI:
2082 
2083     case OP_NOTEXACT:
2084     case OP_NOTPOSSTAR:
2085     case OP_NOTPOSPLUS:
2086     case OP_NOTPOSQUERY:
2087     case OP_NOTPOSUPTO:
2088 
2089     case OP_NOTEXACTI:
2090     case OP_NOTPOSSTARI:
2091     case OP_NOTPOSPLUSI:
2092     case OP_NOTPOSQUERYI:
2093     case OP_NOTPOSUPTOI:
2094 
2095     case OP_TYPEEXACT:
2096     case OP_TYPEPOSSTAR:
2097     case OP_TYPEPOSPLUS:
2098     case OP_TYPEPOSQUERY:
2099     case OP_TYPEPOSUPTO:
2100 
2101     case OP_CLASS:
2102     case OP_NCLASS:
2103     case OP_XCLASS:
2104 
2105     case OP_CALLOUT:
2106     case OP_CALLOUT_STR:
2107 
2108     cc = next_opcode(common, cc);
2109     SLJIT_ASSERT(cc != NULL);
2110     break;
2111     }
2112 
2113 /* Possessive quantifiers can use a special case. */
2114 if (SLJIT_UNLIKELY(possessive == length))
2115   return stack_restore ? no_frame : no_stack;
2116 
2117 if (length > 0)
2118   return length + 1;
2119 return stack_restore ? no_frame : no_stack;
2120 }
2121 
init_frame(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,int stackpos,int stacktop)2122 static void init_frame(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, int stackpos, int stacktop)
2123 {
2124 DEFINE_COMPILER;
2125 BOOL setsom_found = FALSE;
2126 BOOL setmark_found = FALSE;
2127 /* The last capture is a local variable even for recursions. */
2128 BOOL capture_last_found = FALSE;
2129 int offset;
2130 
2131 /* >= 1 + shortest item size (2) */
2132 SLJIT_UNUSED_ARG(stacktop);
2133 SLJIT_ASSERT(stackpos >= stacktop + 2);
2134 
2135 stackpos = STACK(stackpos);
2136 if (ccend == NULL)
2137   {
2138   ccend = bracketend(cc) - (1 + LINK_SIZE);
2139   if (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)
2140     cc = next_opcode(common, cc);
2141   }
2142 
2143 SLJIT_ASSERT(cc != NULL);
2144 while (cc < ccend)
2145   switch(*cc)
2146     {
2147     case OP_SET_SOM:
2148     SLJIT_ASSERT(common->has_set_som);
2149     if (!setsom_found)
2150       {
2151       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2152       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2153       stackpos -= (int)sizeof(sljit_sw);
2154       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2155       stackpos -= (int)sizeof(sljit_sw);
2156       setsom_found = TRUE;
2157       }
2158     cc += 1;
2159     break;
2160 
2161     case OP_MARK:
2162     case OP_COMMIT_ARG:
2163     case OP_PRUNE_ARG:
2164     case OP_THEN_ARG:
2165     SLJIT_ASSERT(common->mark_ptr != 0);
2166     if (!setmark_found)
2167       {
2168       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2169       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2170       stackpos -= (int)sizeof(sljit_sw);
2171       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2172       stackpos -= (int)sizeof(sljit_sw);
2173       setmark_found = TRUE;
2174       }
2175     cc += 1 + 2 + cc[1];
2176     break;
2177 
2178     case OP_RECURSE:
2179     if (common->has_set_som && !setsom_found)
2180       {
2181       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2182       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2183       stackpos -= (int)sizeof(sljit_sw);
2184       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2185       stackpos -= (int)sizeof(sljit_sw);
2186       setsom_found = TRUE;
2187       }
2188     if (common->mark_ptr != 0 && !setmark_found)
2189       {
2190       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2191       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2192       stackpos -= (int)sizeof(sljit_sw);
2193       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2194       stackpos -= (int)sizeof(sljit_sw);
2195       setmark_found = TRUE;
2196       }
2197     if (common->capture_last_ptr != 0 && !capture_last_found)
2198       {
2199       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2200       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2201       stackpos -= (int)sizeof(sljit_sw);
2202       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2203       stackpos -= (int)sizeof(sljit_sw);
2204       capture_last_found = TRUE;
2205       }
2206     cc += 1 + LINK_SIZE;
2207     break;
2208 
2209     case OP_CBRA:
2210     case OP_CBRAPOS:
2211     case OP_SCBRA:
2212     case OP_SCBRAPOS:
2213     if (common->capture_last_ptr != 0 && !capture_last_found)
2214       {
2215       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2216       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2217       stackpos -= (int)sizeof(sljit_sw);
2218       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2219       stackpos -= (int)sizeof(sljit_sw);
2220       capture_last_found = TRUE;
2221       }
2222     offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2223     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
2224     stackpos -= (int)sizeof(sljit_sw);
2225     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
2226     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
2227     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2228     stackpos -= (int)sizeof(sljit_sw);
2229     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
2230     stackpos -= (int)sizeof(sljit_sw);
2231 
2232     cc += 1 + LINK_SIZE + IMM2_SIZE;
2233     break;
2234 
2235     default:
2236     cc = next_opcode(common, cc);
2237     SLJIT_ASSERT(cc != NULL);
2238     break;
2239     }
2240 
2241 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
2242 SLJIT_ASSERT(stackpos == STACK(stacktop));
2243 }
2244 
2245 #define RECURSE_TMP_REG_COUNT 3
2246 
2247 typedef struct delayed_mem_copy_status {
2248   struct sljit_compiler *compiler;
2249   int store_bases[RECURSE_TMP_REG_COUNT];
2250   int store_offsets[RECURSE_TMP_REG_COUNT];
2251   int tmp_regs[RECURSE_TMP_REG_COUNT];
2252   int saved_tmp_regs[RECURSE_TMP_REG_COUNT];
2253   int next_tmp_reg;
2254 } delayed_mem_copy_status;
2255 
delayed_mem_copy_init(delayed_mem_copy_status * status,compiler_common * common)2256 static void delayed_mem_copy_init(delayed_mem_copy_status *status, compiler_common *common)
2257 {
2258 int i;
2259 
2260 for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2261   {
2262   SLJIT_ASSERT(status->tmp_regs[i] >= 0);
2263   SLJIT_ASSERT(sljit_get_register_index(status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]);
2264 
2265   status->store_bases[i] = -1;
2266   }
2267 status->next_tmp_reg = 0;
2268 status->compiler = common->compiler;
2269 }
2270 
delayed_mem_copy_move(delayed_mem_copy_status * status,int load_base,sljit_sw load_offset,int store_base,sljit_sw store_offset)2271 static void delayed_mem_copy_move(delayed_mem_copy_status *status, int load_base, sljit_sw load_offset,
2272   int store_base, sljit_sw store_offset)
2273 {
2274 struct sljit_compiler *compiler = status->compiler;
2275 int next_tmp_reg = status->next_tmp_reg;
2276 int tmp_reg = status->tmp_regs[next_tmp_reg];
2277 
2278 SLJIT_ASSERT(load_base > 0 && store_base > 0);
2279 
2280 if (status->store_bases[next_tmp_reg] == -1)
2281   {
2282   /* Preserve virtual registers. */
2283   if (sljit_get_register_index(status->saved_tmp_regs[next_tmp_reg]) < 0)
2284     OP1(SLJIT_MOV, status->saved_tmp_regs[next_tmp_reg], 0, tmp_reg, 0);
2285   }
2286 else
2287   OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2288 
2289 OP1(SLJIT_MOV, tmp_reg, 0, SLJIT_MEM1(load_base), load_offset);
2290 status->store_bases[next_tmp_reg] = store_base;
2291 status->store_offsets[next_tmp_reg] = store_offset;
2292 
2293 status->next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2294 }
2295 
delayed_mem_copy_finish(delayed_mem_copy_status * status)2296 static void delayed_mem_copy_finish(delayed_mem_copy_status *status)
2297 {
2298 struct sljit_compiler *compiler = status->compiler;
2299 int next_tmp_reg = status->next_tmp_reg;
2300 int tmp_reg, saved_tmp_reg, i;
2301 
2302 for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2303   {
2304   if (status->store_bases[next_tmp_reg] != -1)
2305     {
2306     tmp_reg = status->tmp_regs[next_tmp_reg];
2307     saved_tmp_reg = status->saved_tmp_regs[next_tmp_reg];
2308 
2309     OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2310 
2311     /* Restore virtual registers. */
2312     if (sljit_get_register_index(saved_tmp_reg) < 0)
2313       OP1(SLJIT_MOV, tmp_reg, 0, saved_tmp_reg, 0);
2314     }
2315 
2316   next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2317   }
2318 }
2319 
2320 #undef RECURSE_TMP_REG_COUNT
2321 
get_recurse_data_length(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,BOOL * needs_control_head,BOOL * has_quit,BOOL * has_accept)2322 static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
2323   BOOL *needs_control_head, BOOL *has_quit, BOOL *has_accept)
2324 {
2325 int length = 1;
2326 int size;
2327 PCRE2_SPTR alternative;
2328 BOOL quit_found = FALSE;
2329 BOOL accept_found = FALSE;
2330 BOOL setsom_found = FALSE;
2331 BOOL setmark_found = FALSE;
2332 BOOL capture_last_found = FALSE;
2333 BOOL control_head_found = FALSE;
2334 
2335 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2336 SLJIT_ASSERT(common->control_head_ptr != 0);
2337 control_head_found = TRUE;
2338 #endif
2339 
2340 /* Calculate the sum of the private machine words. */
2341 while (cc < ccend)
2342   {
2343   size = 0;
2344   switch(*cc)
2345     {
2346     case OP_SET_SOM:
2347     SLJIT_ASSERT(common->has_set_som);
2348     setsom_found = TRUE;
2349     cc += 1;
2350     break;
2351 
2352     case OP_RECURSE:
2353     if (common->has_set_som)
2354       setsom_found = TRUE;
2355     if (common->mark_ptr != 0)
2356       setmark_found = TRUE;
2357     if (common->capture_last_ptr != 0)
2358       capture_last_found = TRUE;
2359     cc += 1 + LINK_SIZE;
2360     break;
2361 
2362     case OP_KET:
2363     if (PRIVATE_DATA(cc) != 0)
2364       {
2365       length++;
2366       SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2367       cc += PRIVATE_DATA(cc + 1);
2368       }
2369     cc += 1 + LINK_SIZE;
2370     break;
2371 
2372     case OP_ASSERT:
2373     case OP_ASSERT_NOT:
2374     case OP_ASSERTBACK:
2375     case OP_ASSERTBACK_NOT:
2376     case OP_ASSERT_NA:
2377     case OP_ASSERTBACK_NA:
2378     case OP_ONCE:
2379     case OP_SCRIPT_RUN:
2380     case OP_BRAPOS:
2381     case OP_SBRA:
2382     case OP_SBRAPOS:
2383     case OP_SCOND:
2384     length++;
2385     SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
2386     cc += 1 + LINK_SIZE;
2387     break;
2388 
2389     case OP_CBRA:
2390     case OP_SCBRA:
2391     length += 2;
2392     if (common->capture_last_ptr != 0)
2393       capture_last_found = TRUE;
2394     if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
2395       length++;
2396     cc += 1 + LINK_SIZE + IMM2_SIZE;
2397     break;
2398 
2399     case OP_CBRAPOS:
2400     case OP_SCBRAPOS:
2401     length += 2 + 2;
2402     if (common->capture_last_ptr != 0)
2403       capture_last_found = TRUE;
2404     cc += 1 + LINK_SIZE + IMM2_SIZE;
2405     break;
2406 
2407     case OP_COND:
2408     /* Might be a hidden SCOND. */
2409     alternative = cc + GET(cc, 1);
2410     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2411       length++;
2412     cc += 1 + LINK_SIZE;
2413     break;
2414 
2415     CASE_ITERATOR_PRIVATE_DATA_1
2416     if (PRIVATE_DATA(cc) != 0)
2417       length++;
2418     cc += 2;
2419 #ifdef SUPPORT_UNICODE
2420     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2421 #endif
2422     break;
2423 
2424     CASE_ITERATOR_PRIVATE_DATA_2A
2425     if (PRIVATE_DATA(cc) != 0)
2426       length += 2;
2427     cc += 2;
2428 #ifdef SUPPORT_UNICODE
2429     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2430 #endif
2431     break;
2432 
2433     CASE_ITERATOR_PRIVATE_DATA_2B
2434     if (PRIVATE_DATA(cc) != 0)
2435       length += 2;
2436     cc += 2 + IMM2_SIZE;
2437 #ifdef SUPPORT_UNICODE
2438     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2439 #endif
2440     break;
2441 
2442     CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2443     if (PRIVATE_DATA(cc) != 0)
2444       length++;
2445     cc += 1;
2446     break;
2447 
2448     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2449     if (PRIVATE_DATA(cc) != 0)
2450       length += 2;
2451     cc += 1;
2452     break;
2453 
2454     CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2455     if (PRIVATE_DATA(cc) != 0)
2456       length += 2;
2457     cc += 1 + IMM2_SIZE;
2458     break;
2459 
2460     case OP_CLASS:
2461     case OP_NCLASS:
2462 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2463     case OP_XCLASS:
2464     size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2465 #else
2466     size = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2467 #endif
2468     if (PRIVATE_DATA(cc) != 0)
2469       length += get_class_iterator_size(cc + size);
2470     cc += size;
2471     break;
2472 
2473     case OP_MARK:
2474     case OP_COMMIT_ARG:
2475     case OP_PRUNE_ARG:
2476     case OP_THEN_ARG:
2477     SLJIT_ASSERT(common->mark_ptr != 0);
2478     if (!setmark_found)
2479       setmark_found = TRUE;
2480     if (common->control_head_ptr != 0)
2481       control_head_found = TRUE;
2482     if (*cc != OP_MARK)
2483       quit_found = TRUE;
2484 
2485     cc += 1 + 2 + cc[1];
2486     break;
2487 
2488     case OP_PRUNE:
2489     case OP_SKIP:
2490     case OP_COMMIT:
2491     quit_found = TRUE;
2492     cc++;
2493     break;
2494 
2495     case OP_SKIP_ARG:
2496     quit_found = TRUE;
2497     cc += 1 + 2 + cc[1];
2498     break;
2499 
2500     case OP_THEN:
2501     SLJIT_ASSERT(common->control_head_ptr != 0);
2502     quit_found = TRUE;
2503     if (!control_head_found)
2504       control_head_found = TRUE;
2505     cc++;
2506     break;
2507 
2508     case OP_ACCEPT:
2509     case OP_ASSERT_ACCEPT:
2510     accept_found = TRUE;
2511     cc++;
2512     break;
2513 
2514     default:
2515     cc = next_opcode(common, cc);
2516     SLJIT_ASSERT(cc != NULL);
2517     break;
2518     }
2519   }
2520 SLJIT_ASSERT(cc == ccend);
2521 
2522 if (control_head_found)
2523   length++;
2524 if (capture_last_found)
2525   length++;
2526 if (quit_found)
2527   {
2528   if (setsom_found)
2529     length++;
2530   if (setmark_found)
2531     length++;
2532   }
2533 
2534 *needs_control_head = control_head_found;
2535 *has_quit = quit_found;
2536 *has_accept = accept_found;
2537 return length;
2538 }
2539 
2540 enum copy_recurse_data_types {
2541   recurse_copy_from_global,
2542   recurse_copy_private_to_global,
2543   recurse_copy_shared_to_global,
2544   recurse_copy_kept_shared_to_global,
2545   recurse_swap_global
2546 };
2547 
copy_recurse_data(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,int type,int stackptr,int stacktop,BOOL has_quit)2548 static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
2549   int type, int stackptr, int stacktop, BOOL has_quit)
2550 {
2551 delayed_mem_copy_status status;
2552 PCRE2_SPTR alternative;
2553 sljit_sw private_srcw[2];
2554 sljit_sw shared_srcw[3];
2555 sljit_sw kept_shared_srcw[2];
2556 int private_count, shared_count, kept_shared_count;
2557 int from_sp, base_reg, offset, i;
2558 BOOL setsom_found = FALSE;
2559 BOOL setmark_found = FALSE;
2560 BOOL capture_last_found = FALSE;
2561 BOOL control_head_found = FALSE;
2562 
2563 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2564 SLJIT_ASSERT(common->control_head_ptr != 0);
2565 control_head_found = TRUE;
2566 #endif
2567 
2568 switch (type)
2569   {
2570   case recurse_copy_from_global:
2571   from_sp = TRUE;
2572   base_reg = STACK_TOP;
2573   break;
2574 
2575   case recurse_copy_private_to_global:
2576   case recurse_copy_shared_to_global:
2577   case recurse_copy_kept_shared_to_global:
2578   from_sp = FALSE;
2579   base_reg = STACK_TOP;
2580   break;
2581 
2582   default:
2583   SLJIT_ASSERT(type == recurse_swap_global);
2584   from_sp = FALSE;
2585   base_reg = TMP2;
2586   break;
2587   }
2588 
2589 stackptr = STACK(stackptr);
2590 stacktop = STACK(stacktop);
2591 
2592 status.tmp_regs[0] = TMP1;
2593 status.saved_tmp_regs[0] = TMP1;
2594 
2595 if (base_reg != TMP2)
2596   {
2597   status.tmp_regs[1] = TMP2;
2598   status.saved_tmp_regs[1] = TMP2;
2599   }
2600 else
2601   {
2602   status.saved_tmp_regs[1] = RETURN_ADDR;
2603   if (HAS_VIRTUAL_REGISTERS)
2604     status.tmp_regs[1] = STR_PTR;
2605   else
2606     status.tmp_regs[1] = RETURN_ADDR;
2607   }
2608 
2609 status.saved_tmp_regs[2] = TMP3;
2610 if (HAS_VIRTUAL_REGISTERS)
2611   status.tmp_regs[2] = STR_END;
2612 else
2613   status.tmp_regs[2] = TMP3;
2614 
2615 delayed_mem_copy_init(&status, common);
2616 
2617 if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2618   {
2619   SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2620 
2621   if (!from_sp)
2622     delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->recursive_head_ptr);
2623 
2624   if (from_sp || type == recurse_swap_global)
2625     delayed_mem_copy_move(&status, SLJIT_SP, common->recursive_head_ptr, base_reg, stackptr);
2626   }
2627 
2628 stackptr += sizeof(sljit_sw);
2629 
2630 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2631 if (type != recurse_copy_shared_to_global)
2632   {
2633   if (!from_sp)
2634     delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->control_head_ptr);
2635 
2636   if (from_sp || type == recurse_swap_global)
2637     delayed_mem_copy_move(&status, SLJIT_SP, common->control_head_ptr, base_reg, stackptr);
2638   }
2639 
2640 stackptr += sizeof(sljit_sw);
2641 #endif
2642 
2643 while (cc < ccend)
2644   {
2645   private_count = 0;
2646   shared_count = 0;
2647   kept_shared_count = 0;
2648 
2649   switch(*cc)
2650     {
2651     case OP_SET_SOM:
2652     SLJIT_ASSERT(common->has_set_som);
2653     if (has_quit && !setsom_found)
2654       {
2655       kept_shared_srcw[0] = OVECTOR(0);
2656       kept_shared_count = 1;
2657       setsom_found = TRUE;
2658       }
2659     cc += 1;
2660     break;
2661 
2662     case OP_RECURSE:
2663     if (has_quit)
2664       {
2665       if (common->has_set_som && !setsom_found)
2666         {
2667         kept_shared_srcw[0] = OVECTOR(0);
2668         kept_shared_count = 1;
2669         setsom_found = TRUE;
2670         }
2671       if (common->mark_ptr != 0 && !setmark_found)
2672         {
2673         kept_shared_srcw[kept_shared_count] = common->mark_ptr;
2674         kept_shared_count++;
2675         setmark_found = TRUE;
2676         }
2677       }
2678     if (common->capture_last_ptr != 0 && !capture_last_found)
2679       {
2680       shared_srcw[0] = common->capture_last_ptr;
2681       shared_count = 1;
2682       capture_last_found = TRUE;
2683       }
2684     cc += 1 + LINK_SIZE;
2685     break;
2686 
2687     case OP_KET:
2688     if (PRIVATE_DATA(cc) != 0)
2689       {
2690       private_count = 1;
2691       private_srcw[0] = PRIVATE_DATA(cc);
2692       SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2693       cc += PRIVATE_DATA(cc + 1);
2694       }
2695     cc += 1 + LINK_SIZE;
2696     break;
2697 
2698     case OP_ASSERT:
2699     case OP_ASSERT_NOT:
2700     case OP_ASSERTBACK:
2701     case OP_ASSERTBACK_NOT:
2702     case OP_ASSERT_NA:
2703     case OP_ASSERTBACK_NA:
2704     case OP_ONCE:
2705     case OP_SCRIPT_RUN:
2706     case OP_BRAPOS:
2707     case OP_SBRA:
2708     case OP_SBRAPOS:
2709     case OP_SCOND:
2710     private_count = 1;
2711     private_srcw[0] = PRIVATE_DATA(cc);
2712     cc += 1 + LINK_SIZE;
2713     break;
2714 
2715     case OP_CBRA:
2716     case OP_SCBRA:
2717     offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2718     shared_srcw[0] = OVECTOR(offset);
2719     shared_srcw[1] = OVECTOR(offset + 1);
2720     shared_count = 2;
2721 
2722     if (common->capture_last_ptr != 0 && !capture_last_found)
2723       {
2724       shared_srcw[2] = common->capture_last_ptr;
2725       shared_count = 3;
2726       capture_last_found = TRUE;
2727       }
2728 
2729     if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
2730       {
2731       private_count = 1;
2732       private_srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
2733       }
2734     cc += 1 + LINK_SIZE + IMM2_SIZE;
2735     break;
2736 
2737     case OP_CBRAPOS:
2738     case OP_SCBRAPOS:
2739     offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2740     shared_srcw[0] = OVECTOR(offset);
2741     shared_srcw[1] = OVECTOR(offset + 1);
2742     shared_count = 2;
2743 
2744     if (common->capture_last_ptr != 0 && !capture_last_found)
2745       {
2746       shared_srcw[2] = common->capture_last_ptr;
2747       shared_count = 3;
2748       capture_last_found = TRUE;
2749       }
2750 
2751     private_count = 2;
2752     private_srcw[0] = PRIVATE_DATA(cc);
2753     private_srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
2754     cc += 1 + LINK_SIZE + IMM2_SIZE;
2755     break;
2756 
2757     case OP_COND:
2758     /* Might be a hidden SCOND. */
2759     alternative = cc + GET(cc, 1);
2760     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2761       {
2762       private_count = 1;
2763       private_srcw[0] = PRIVATE_DATA(cc);
2764       }
2765     cc += 1 + LINK_SIZE;
2766     break;
2767 
2768     CASE_ITERATOR_PRIVATE_DATA_1
2769     if (PRIVATE_DATA(cc))
2770       {
2771       private_count = 1;
2772       private_srcw[0] = PRIVATE_DATA(cc);
2773       }
2774     cc += 2;
2775 #ifdef SUPPORT_UNICODE
2776     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2777 #endif
2778     break;
2779 
2780     CASE_ITERATOR_PRIVATE_DATA_2A
2781     if (PRIVATE_DATA(cc))
2782       {
2783       private_count = 2;
2784       private_srcw[0] = PRIVATE_DATA(cc);
2785       private_srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2786       }
2787     cc += 2;
2788 #ifdef SUPPORT_UNICODE
2789     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2790 #endif
2791     break;
2792 
2793     CASE_ITERATOR_PRIVATE_DATA_2B
2794     if (PRIVATE_DATA(cc))
2795       {
2796       private_count = 2;
2797       private_srcw[0] = PRIVATE_DATA(cc);
2798       private_srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2799       }
2800     cc += 2 + IMM2_SIZE;
2801 #ifdef SUPPORT_UNICODE
2802     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2803 #endif
2804     break;
2805 
2806     CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2807     if (PRIVATE_DATA(cc))
2808       {
2809       private_count = 1;
2810       private_srcw[0] = PRIVATE_DATA(cc);
2811       }
2812     cc += 1;
2813     break;
2814 
2815     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2816     if (PRIVATE_DATA(cc))
2817       {
2818       private_count = 2;
2819       private_srcw[0] = PRIVATE_DATA(cc);
2820       private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2821       }
2822     cc += 1;
2823     break;
2824 
2825     CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2826     if (PRIVATE_DATA(cc))
2827       {
2828       private_count = 2;
2829       private_srcw[0] = PRIVATE_DATA(cc);
2830       private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2831       }
2832     cc += 1 + IMM2_SIZE;
2833     break;
2834 
2835     case OP_CLASS:
2836     case OP_NCLASS:
2837 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2838     case OP_XCLASS:
2839     i = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2840 #else
2841     i = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2842 #endif
2843     if (PRIVATE_DATA(cc) != 0)
2844       switch(get_class_iterator_size(cc + i))
2845         {
2846         case 1:
2847         private_count = 1;
2848         private_srcw[0] = PRIVATE_DATA(cc);
2849         break;
2850 
2851         case 2:
2852         private_count = 2;
2853         private_srcw[0] = PRIVATE_DATA(cc);
2854         private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2855         break;
2856 
2857         default:
2858         SLJIT_UNREACHABLE();
2859         break;
2860         }
2861     cc += i;
2862     break;
2863 
2864     case OP_MARK:
2865     case OP_COMMIT_ARG:
2866     case OP_PRUNE_ARG:
2867     case OP_THEN_ARG:
2868     SLJIT_ASSERT(common->mark_ptr != 0);
2869     if (has_quit && !setmark_found)
2870       {
2871       kept_shared_srcw[0] = common->mark_ptr;
2872       kept_shared_count = 1;
2873       setmark_found = TRUE;
2874       }
2875     if (common->control_head_ptr != 0 && !control_head_found)
2876       {
2877       private_srcw[0] = common->control_head_ptr;
2878       private_count = 1;
2879       control_head_found = TRUE;
2880       }
2881     cc += 1 + 2 + cc[1];
2882     break;
2883 
2884     case OP_THEN:
2885     SLJIT_ASSERT(common->control_head_ptr != 0);
2886     if (!control_head_found)
2887       {
2888       private_srcw[0] = common->control_head_ptr;
2889       private_count = 1;
2890       control_head_found = TRUE;
2891       }
2892     cc++;
2893     break;
2894 
2895     default:
2896     cc = next_opcode(common, cc);
2897     SLJIT_ASSERT(cc != NULL);
2898     break;
2899     }
2900 
2901   if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2902     {
2903     SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2904 
2905     for (i = 0; i < private_count; i++)
2906       {
2907       SLJIT_ASSERT(private_srcw[i] != 0);
2908 
2909       if (!from_sp)
2910         delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, private_srcw[i]);
2911 
2912       if (from_sp || type == recurse_swap_global)
2913         delayed_mem_copy_move(&status, SLJIT_SP, private_srcw[i], base_reg, stackptr);
2914 
2915       stackptr += sizeof(sljit_sw);
2916       }
2917     }
2918   else
2919     stackptr += sizeof(sljit_sw) * private_count;
2920 
2921   if (type != recurse_copy_private_to_global && type != recurse_copy_kept_shared_to_global)
2922     {
2923     SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_swap_global);
2924 
2925     for (i = 0; i < shared_count; i++)
2926       {
2927       SLJIT_ASSERT(shared_srcw[i] != 0);
2928 
2929       if (!from_sp)
2930         delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, shared_srcw[i]);
2931 
2932       if (from_sp || type == recurse_swap_global)
2933         delayed_mem_copy_move(&status, SLJIT_SP, shared_srcw[i], base_reg, stackptr);
2934 
2935       stackptr += sizeof(sljit_sw);
2936       }
2937     }
2938   else
2939     stackptr += sizeof(sljit_sw) * shared_count;
2940 
2941   if (type != recurse_copy_private_to_global && type != recurse_swap_global)
2942     {
2943     SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_copy_kept_shared_to_global);
2944 
2945     for (i = 0; i < kept_shared_count; i++)
2946       {
2947       SLJIT_ASSERT(kept_shared_srcw[i] != 0);
2948 
2949       if (!from_sp)
2950         delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, kept_shared_srcw[i]);
2951 
2952       if (from_sp || type == recurse_swap_global)
2953         delayed_mem_copy_move(&status, SLJIT_SP, kept_shared_srcw[i], base_reg, stackptr);
2954 
2955       stackptr += sizeof(sljit_sw);
2956       }
2957     }
2958   else
2959     stackptr += sizeof(sljit_sw) * kept_shared_count;
2960   }
2961 
2962 SLJIT_ASSERT(cc == ccend && stackptr == stacktop);
2963 
2964 delayed_mem_copy_finish(&status);
2965 }
2966 
set_then_offsets(compiler_common * common,PCRE2_SPTR cc,sljit_u8 * current_offset)2967 static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_SPTR cc, sljit_u8 *current_offset)
2968 {
2969 PCRE2_SPTR end = bracketend(cc);
2970 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
2971 
2972 /* Assert captures then. */
2973 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA)
2974   current_offset = NULL;
2975 /* Conditional block does not. */
2976 if (*cc == OP_COND || *cc == OP_SCOND)
2977   has_alternatives = FALSE;
2978 
2979 cc = next_opcode(common, cc);
2980 if (has_alternatives)
2981   current_offset = common->then_offsets + (cc - common->start);
2982 
2983 while (cc < end)
2984   {
2985   if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
2986     cc = set_then_offsets(common, cc, current_offset);
2987   else
2988     {
2989     if (*cc == OP_ALT && has_alternatives)
2990       current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2991     if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2992       *current_offset = 1;
2993     cc = next_opcode(common, cc);
2994     }
2995   }
2996 
2997 return end;
2998 }
2999 
3000 #undef CASE_ITERATOR_PRIVATE_DATA_1
3001 #undef CASE_ITERATOR_PRIVATE_DATA_2A
3002 #undef CASE_ITERATOR_PRIVATE_DATA_2B
3003 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
3004 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
3005 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
3006 
is_powerof2(unsigned int value)3007 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
3008 {
3009 return (value & (value - 1)) == 0;
3010 }
3011 
set_jumps(jump_list * list,struct sljit_label * label)3012 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
3013 {
3014 while (list)
3015   {
3016   /* sljit_set_label is clever enough to do nothing
3017   if either the jump or the label is NULL. */
3018   SET_LABEL(list->jump, label);
3019   list = list->next;
3020   }
3021 }
3022 
add_jump(struct sljit_compiler * compiler,jump_list ** list,struct sljit_jump * jump)3023 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
3024 {
3025 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
3026 if (list_item)
3027   {
3028   list_item->next = *list;
3029   list_item->jump = jump;
3030   *list = list_item;
3031   }
3032 }
3033 
add_stub(compiler_common * common,struct sljit_jump * start)3034 static void add_stub(compiler_common *common, struct sljit_jump *start)
3035 {
3036 DEFINE_COMPILER;
3037 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
3038 
3039 if (list_item)
3040   {
3041   list_item->start = start;
3042   list_item->quit = LABEL();
3043   list_item->next = common->stubs;
3044   common->stubs = list_item;
3045   }
3046 }
3047 
flush_stubs(compiler_common * common)3048 static void flush_stubs(compiler_common *common)
3049 {
3050 DEFINE_COMPILER;
3051 stub_list *list_item = common->stubs;
3052 
3053 while (list_item)
3054   {
3055   JUMPHERE(list_item->start);
3056   add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
3057   JUMPTO(SLJIT_JUMP, list_item->quit);
3058   list_item = list_item->next;
3059   }
3060 common->stubs = NULL;
3061 }
3062 
count_match(compiler_common * common)3063 static SLJIT_INLINE void count_match(compiler_common *common)
3064 {
3065 DEFINE_COMPILER;
3066 
3067 OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
3068 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
3069 }
3070 
allocate_stack(compiler_common * common,int size)3071 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
3072 {
3073 /* May destroy all locals and registers except TMP2. */
3074 DEFINE_COMPILER;
3075 
3076 SLJIT_ASSERT(size > 0);
3077 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
3078 #ifdef DESTROY_REGISTERS
3079 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
3080 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3081 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3082 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
3083 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
3084 #endif
3085 add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
3086 }
3087 
free_stack(compiler_common * common,int size)3088 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
3089 {
3090 DEFINE_COMPILER;
3091 
3092 SLJIT_ASSERT(size > 0);
3093 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
3094 }
3095 
allocate_read_only_data(compiler_common * common,sljit_uw size)3096 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
3097 {
3098 DEFINE_COMPILER;
3099 sljit_uw *result;
3100 
3101 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3102   return NULL;
3103 
3104 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
3105 if (SLJIT_UNLIKELY(result == NULL))
3106   {
3107   sljit_set_compiler_memory_error(compiler);
3108   return NULL;
3109   }
3110 
3111 *(void**)result = common->read_only_data_head;
3112 common->read_only_data_head = (void *)result;
3113 return result + 1;
3114 }
3115 
reset_ovector(compiler_common * common,int length)3116 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
3117 {
3118 DEFINE_COMPILER;
3119 struct sljit_label *loop;
3120 sljit_s32 i;
3121 
3122 /* At this point we can freely use all temporary registers. */
3123 SLJIT_ASSERT(length > 1);
3124 /* TMP1 returns with begin - 1. */
3125 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
3126 if (length < 8)
3127   {
3128   for (i = 1; i < length; i++)
3129     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
3130   }
3131 else
3132   {
3133   if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3134     {
3135     GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
3136     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3137     loop = LABEL();
3138     sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
3139     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3140     JUMPTO(SLJIT_NOT_ZERO, loop);
3141     }
3142   else
3143     {
3144     GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));
3145     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3146     loop = LABEL();
3147     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);
3148     OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));
3149     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3150     JUMPTO(SLJIT_NOT_ZERO, loop);
3151     }
3152   }
3153 }
3154 
reset_early_fail(compiler_common * common)3155 static SLJIT_INLINE void reset_early_fail(compiler_common *common)
3156 {
3157 DEFINE_COMPILER;
3158 sljit_u32 size = (sljit_u32)(common->early_fail_end_ptr - common->early_fail_start_ptr);
3159 sljit_u32 uncleared_size;
3160 sljit_s32 src = SLJIT_IMM;
3161 sljit_s32 i;
3162 struct sljit_label *loop;
3163 
3164 SLJIT_ASSERT(common->early_fail_start_ptr < common->early_fail_end_ptr);
3165 
3166 if (size == sizeof(sljit_sw))
3167   {
3168   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->early_fail_start_ptr, SLJIT_IMM, 0);
3169   return;
3170   }
3171 
3172 if (sljit_get_register_index(TMP3) >= 0 && !sljit_has_cpu_feature(SLJIT_HAS_ZERO_REGISTER))
3173   {
3174   OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
3175   src = TMP3;
3176   }
3177 
3178 if (size <= 6 * sizeof(sljit_sw))
3179   {
3180   for (i = common->early_fail_start_ptr; i < common->early_fail_end_ptr; i += sizeof(sljit_sw))
3181     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, src, 0);
3182   return;
3183   }
3184 
3185 GET_LOCAL_BASE(TMP1, 0, common->early_fail_start_ptr);
3186 
3187 uncleared_size = ((size / sizeof(sljit_sw)) % 3) * sizeof(sljit_sw);
3188 
3189 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, size - uncleared_size);
3190 
3191 loop = LABEL();
3192 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3193 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3194 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * (sljit_sw)sizeof(sljit_sw), src, 0);
3195 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * (sljit_sw)sizeof(sljit_sw), src, 0);
3196 CMPTO(SLJIT_LESS, TMP1, 0, TMP2, 0, loop);
3197 
3198 if (uncleared_size >= sizeof(sljit_sw))
3199   OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3200 
3201 if (uncleared_size >= 2 * sizeof(sljit_sw))
3202   OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), sizeof(sljit_sw), src, 0);
3203 }
3204 
do_reset_match(compiler_common * common,int length)3205 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
3206 {
3207 DEFINE_COMPILER;
3208 struct sljit_label *loop;
3209 int i;
3210 
3211 SLJIT_ASSERT(length > 1);
3212 /* OVECTOR(1) contains the "string begin - 1" constant. */
3213 if (length > 2)
3214   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3215 if (length < 8)
3216   {
3217   for (i = 2; i < length; i++)
3218     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
3219   }
3220 else
3221   {
3222   if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3223     {
3224     GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
3225     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3226     loop = LABEL();
3227     sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
3228     OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3229     JUMPTO(SLJIT_NOT_ZERO, loop);
3230     }
3231   else
3232     {
3233     GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));
3234     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3235     loop = LABEL();
3236     OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
3237     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));
3238     OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3239     JUMPTO(SLJIT_NOT_ZERO, loop);
3240     }
3241   }
3242 
3243 if (!HAS_VIRTUAL_REGISTERS)
3244   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, stack));
3245 else
3246   OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
3247 
3248 if (common->mark_ptr != 0)
3249   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
3250 if (common->control_head_ptr != 0)
3251   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
3252 if (HAS_VIRTUAL_REGISTERS)
3253   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
3254 
3255 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3256 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));
3257 }
3258 
do_search_mark(sljit_sw * current,PCRE2_SPTR skip_arg)3259 static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg)
3260 {
3261 while (current != NULL)
3262   {
3263   switch (current[1])
3264     {
3265     case type_then_trap:
3266     break;
3267 
3268     case type_mark:
3269     if (PRIV(strcmp)(skip_arg, (PCRE2_SPTR)current[2]) == 0)
3270       return current[3];
3271     break;
3272 
3273     default:
3274     SLJIT_UNREACHABLE();
3275     break;
3276     }
3277   SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
3278   current = (sljit_sw*)current[0];
3279   }
3280 return 0;
3281 }
3282 
copy_ovector(compiler_common * common,int topbracket)3283 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
3284 {
3285 DEFINE_COMPILER;
3286 struct sljit_label *loop;
3287 BOOL has_pre;
3288 
3289 /* At this point we can freely use all registers. */
3290 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3291 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
3292 
3293 if (HAS_VIRTUAL_REGISTERS)
3294   {
3295   OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
3296   OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3297   if (common->mark_ptr != 0)
3298     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3299   OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount));
3300   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3301   if (common->mark_ptr != 0)
3302     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
3303   OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, match_data),
3304     SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3305   }
3306 else
3307   {
3308   OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3309   OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, match_data));
3310   if (common->mark_ptr != 0)
3311     OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3312   OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, oveccount));
3313   OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3314   if (common->mark_ptr != 0)
3315     OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R0, 0);
3316   OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3317   }
3318 
3319 has_pre = sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
3320 
3321 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
3322 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
3323 
3324 loop = LABEL();
3325 
3326 if (has_pre)
3327   sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
3328 else
3329   {
3330   OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
3331   OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
3332   }
3333 
3334 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(PCRE2_SIZE));
3335 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);
3336 /* Copy the integer value to the output buffer */
3337 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3338 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
3339 #endif
3340 
3341 SLJIT_ASSERT(sizeof(PCRE2_SIZE) == 4 || sizeof(PCRE2_SIZE) == 8);
3342 OP1(((sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV), SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);
3343 
3344 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3345 JUMPTO(SLJIT_NOT_ZERO, loop);
3346 
3347 /* Calculate the return value, which is the maximum ovector value. */
3348 if (topbracket > 1)
3349   {
3350   if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))) == SLJIT_SUCCESS)
3351     {
3352     GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
3353     OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3354 
3355     /* OVECTOR(0) is never equal to SLJIT_S2. */
3356     loop = LABEL();
3357     sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
3358     OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3359     CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3360     OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3361     }
3362   else
3363     {
3364     GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));
3365     OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3366 
3367     /* OVECTOR(0) is never equal to SLJIT_S2. */
3368     loop = LABEL();
3369     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
3370     OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * (sljit_sw)sizeof(sljit_sw));
3371     OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3372     CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3373     OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3374     }
3375   }
3376 else
3377   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
3378 }
3379 
return_with_partial_match(compiler_common * common,struct sljit_label * quit)3380 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
3381 {
3382 DEFINE_COMPILER;
3383 sljit_s32 mov_opcode;
3384 sljit_s32 arguments_reg = !HAS_VIRTUAL_REGISTERS ? ARGUMENTS : SLJIT_R1;
3385 
3386 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S0, str_end_must_be_saved_reg0);
3387 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
3388   && (common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start != 0 : common->hit_start == 0));
3389 
3390 if (arguments_reg != ARGUMENTS)
3391   OP1(SLJIT_MOV, arguments_reg, 0, ARGUMENTS, 0);
3392 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP),
3393   common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start : common->start_ptr);
3394 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_PARTIAL);
3395 
3396 /* Store match begin and end. */
3397 OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, begin));
3398 OP1(SLJIT_MOV, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0);
3399 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, match_data));
3400 
3401 mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
3402 
3403 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S1, 0);
3404 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3405 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
3406 #endif
3407 OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector), SLJIT_R2, 0);
3408 
3409 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_S1, 0);
3410 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3411 OP2(SLJIT_ASHR, STR_END, 0, STR_END, 0, SLJIT_IMM, UCHAR_SHIFT);
3412 #endif
3413 OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector) + sizeof(PCRE2_SIZE), STR_END, 0);
3414 
3415 JUMPTO(SLJIT_JUMP, quit);
3416 }
3417 
check_start_used_ptr(compiler_common * common)3418 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
3419 {
3420 /* May destroy TMP1. */
3421 DEFINE_COMPILER;
3422 struct sljit_jump *jump;
3423 
3424 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3425   {
3426   /* The value of -1 must be kept for start_used_ptr! */
3427   OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
3428   /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
3429   is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
3430   jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
3431   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3432   JUMPHERE(jump);
3433   }
3434 else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3435   {
3436   jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3437   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3438   JUMPHERE(jump);
3439   }
3440 }
3441 
char_has_othercase(compiler_common * common,PCRE2_SPTR cc)3442 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR cc)
3443 {
3444 /* Detects if the character has an othercase. */
3445 unsigned int c;
3446 
3447 #ifdef SUPPORT_UNICODE
3448 if (common->utf || common->ucp)
3449   {
3450   if (common->utf)
3451     {
3452     GETCHAR(c, cc);
3453     }
3454   else
3455     c = *cc;
3456 
3457   if (c > 127)
3458     return c != UCD_OTHERCASE(c);
3459 
3460   return common->fcc[c] != c;
3461   }
3462 else
3463 #endif
3464   c = *cc;
3465 return MAX_255(c) ? common->fcc[c] != c : FALSE;
3466 }
3467 
char_othercase(compiler_common * common,unsigned int c)3468 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
3469 {
3470 /* Returns with the othercase. */
3471 #ifdef SUPPORT_UNICODE
3472 if ((common->utf || common->ucp) && c > 127)
3473   return UCD_OTHERCASE(c);
3474 #endif
3475 return TABLE_GET(c, common->fcc, c);
3476 }
3477 
char_get_othercase_bit(compiler_common * common,PCRE2_SPTR cc)3478 static unsigned int char_get_othercase_bit(compiler_common *common, PCRE2_SPTR cc)
3479 {
3480 /* Detects if the character and its othercase has only 1 bit difference. */
3481 unsigned int c, oc, bit;
3482 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3483 int n;
3484 #endif
3485 
3486 #ifdef SUPPORT_UNICODE
3487 if (common->utf || common->ucp)
3488   {
3489   if (common->utf)
3490     {
3491     GETCHAR(c, cc);
3492     }
3493   else
3494     c = *cc;
3495 
3496   if (c <= 127)
3497     oc = common->fcc[c];
3498   else
3499     oc = UCD_OTHERCASE(c);
3500   }
3501 else
3502   {
3503   c = *cc;
3504   oc = TABLE_GET(c, common->fcc, c);
3505   }
3506 #else
3507 c = *cc;
3508 oc = TABLE_GET(c, common->fcc, c);
3509 #endif
3510 
3511 SLJIT_ASSERT(c != oc);
3512 
3513 bit = c ^ oc;
3514 /* Optimized for English alphabet. */
3515 if (c <= 127 && bit == 0x20)
3516   return (0 << 8) | 0x20;
3517 
3518 /* Since c != oc, they must have at least 1 bit difference. */
3519 if (!is_powerof2(bit))
3520   return 0;
3521 
3522 #if PCRE2_CODE_UNIT_WIDTH == 8
3523 
3524 #ifdef SUPPORT_UNICODE
3525 if (common->utf && c > 127)
3526   {
3527   n = GET_EXTRALEN(*cc);
3528   while ((bit & 0x3f) == 0)
3529     {
3530     n--;
3531     bit >>= 6;
3532     }
3533   return (n << 8) | bit;
3534   }
3535 #endif /* SUPPORT_UNICODE */
3536 return (0 << 8) | bit;
3537 
3538 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3539 
3540 #ifdef SUPPORT_UNICODE
3541 if (common->utf && c > 65535)
3542   {
3543   if (bit >= (1u << 10))
3544     bit >>= 10;
3545   else
3546     return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
3547   }
3548 #endif /* SUPPORT_UNICODE */
3549 return (bit < 256) ? ((0u << 8) | bit) : ((1u << 8) | (bit >> 8));
3550 
3551 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3552 }
3553 
check_partial(compiler_common * common,BOOL force)3554 static void check_partial(compiler_common *common, BOOL force)
3555 {
3556 /* Checks whether a partial matching is occurred. Does not modify registers. */
3557 DEFINE_COMPILER;
3558 struct sljit_jump *jump = NULL;
3559 
3560 SLJIT_ASSERT(!force || common->mode != PCRE2_JIT_COMPLETE);
3561 
3562 if (common->mode == PCRE2_JIT_COMPLETE)
3563   return;
3564 
3565 if (!force && !common->allow_empty_partial)
3566   jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3567 else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3568   jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
3569 
3570 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3571   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3572 else
3573   {
3574   if (common->partialmatchlabel != NULL)
3575     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3576   else
3577     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3578   }
3579 
3580 if (jump != NULL)
3581   JUMPHERE(jump);
3582 }
3583 
check_str_end(compiler_common * common,jump_list ** end_reached)3584 static void check_str_end(compiler_common *common, jump_list **end_reached)
3585 {
3586 /* Does not affect registers. Usually used in a tight spot. */
3587 DEFINE_COMPILER;
3588 struct sljit_jump *jump;
3589 
3590 if (common->mode == PCRE2_JIT_COMPLETE)
3591   {
3592   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3593   return;
3594   }
3595 
3596 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3597 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3598   {
3599   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3600   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3601   add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
3602   }
3603 else
3604   {
3605   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3606   if (common->partialmatchlabel != NULL)
3607     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3608   else
3609     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3610   }
3611 JUMPHERE(jump);
3612 }
3613 
detect_partial_match(compiler_common * common,jump_list ** backtracks)3614 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
3615 {
3616 DEFINE_COMPILER;
3617 struct sljit_jump *jump;
3618 
3619 if (common->mode == PCRE2_JIT_COMPLETE)
3620   {
3621   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3622   return;
3623   }
3624 
3625 /* Partial matching mode. */
3626 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3627 if (!common->allow_empty_partial)
3628   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3629 else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3630   add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1));
3631 
3632 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3633   {
3634   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3635   add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3636   }
3637 else
3638   {
3639   if (common->partialmatchlabel != NULL)
3640     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3641   else
3642     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3643   }
3644 JUMPHERE(jump);
3645 }
3646 
process_partial_match(compiler_common * common)3647 static void process_partial_match(compiler_common *common)
3648 {
3649 DEFINE_COMPILER;
3650 struct sljit_jump *jump;
3651 
3652 /* Partial matching mode. */
3653 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3654   {
3655   jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3656   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3657   JUMPHERE(jump);
3658   }
3659 else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3660   {
3661   if (common->partialmatchlabel != NULL)
3662     CMPTO(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0, common->partialmatchlabel);
3663   else
3664     add_jump(compiler, &common->partialmatch, CMP(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3665   }
3666 }
3667 
detect_partial_match_to(compiler_common * common,struct sljit_label * label)3668 static void detect_partial_match_to(compiler_common *common, struct sljit_label *label)
3669 {
3670 DEFINE_COMPILER;
3671 
3672 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, label);
3673 process_partial_match(common);
3674 }
3675 
peek_char(compiler_common * common,sljit_u32 max,sljit_s32 dst,sljit_sw dstw,jump_list ** backtracks)3676 static void peek_char(compiler_common *common, sljit_u32 max, sljit_s32 dst, sljit_sw dstw, jump_list **backtracks)
3677 {
3678 /* Reads the character into TMP1, keeps STR_PTR.
3679 Does not check STR_END. TMP2, dst, RETURN_ADDR Destroyed. */
3680 DEFINE_COMPILER;
3681 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3682 struct sljit_jump *jump;
3683 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3684 
3685 SLJIT_UNUSED_ARG(max);
3686 SLJIT_UNUSED_ARG(dst);
3687 SLJIT_UNUSED_ARG(dstw);
3688 SLJIT_UNUSED_ARG(backtracks);
3689 
3690 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3691 
3692 #ifdef SUPPORT_UNICODE
3693 #if PCRE2_CODE_UNIT_WIDTH == 8
3694 if (common->utf)
3695   {
3696   if (max < 128) return;
3697 
3698   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3699   OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3700   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3701   add_jump(compiler, common->invalid_utf ? &common->utfreadchar_invalid : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3702   OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3703   if (backtracks && common->invalid_utf)
3704     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3705   JUMPHERE(jump);
3706   }
3707 #elif PCRE2_CODE_UNIT_WIDTH == 16
3708 if (common->utf)
3709   {
3710   if (max < 0xd800) return;
3711 
3712   OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3713 
3714   if (common->invalid_utf)
3715     {
3716     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3717     OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3718     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3719     add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3720     OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3721     if (backtracks && common->invalid_utf)
3722       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3723     }
3724   else
3725     {
3726     /* TMP2 contains the high surrogate. */
3727     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
3728     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3729     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3730     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
3731     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3732     }
3733 
3734   JUMPHERE(jump);
3735   }
3736 #elif PCRE2_CODE_UNIT_WIDTH == 32
3737 if (common->invalid_utf)
3738   {
3739   if (max < 0xd800) return;
3740 
3741   if (backtracks != NULL)
3742     {
3743     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3744     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3745     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
3746     }
3747   else
3748     {
3749     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3750     OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
3751     CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
3752     OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3753     CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
3754     }
3755   }
3756 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3757 #endif /* SUPPORT_UNICODE */
3758 }
3759 
peek_char_back(compiler_common * common,sljit_u32 max,jump_list ** backtracks)3760 static void peek_char_back(compiler_common *common, sljit_u32 max, jump_list **backtracks)
3761 {
3762 /* Reads one character back without moving STR_PTR. TMP2 must
3763 contain the start of the subject buffer. Affects TMP1, TMP2, and RETURN_ADDR. */
3764 DEFINE_COMPILER;
3765 
3766 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3767 struct sljit_jump *jump;
3768 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3769 
3770 SLJIT_UNUSED_ARG(max);
3771 SLJIT_UNUSED_ARG(backtracks);
3772 
3773 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3774 
3775 #ifdef SUPPORT_UNICODE
3776 #if PCRE2_CODE_UNIT_WIDTH == 8
3777 if (common->utf)
3778   {
3779   if (max < 128) return;
3780 
3781   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3782   if (common->invalid_utf)
3783     {
3784     add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3785     if (backtracks != NULL)
3786       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3787     }
3788   else
3789     add_jump(compiler, &common->utfpeakcharback, JUMP(SLJIT_FAST_CALL));
3790   JUMPHERE(jump);
3791   }
3792 #elif PCRE2_CODE_UNIT_WIDTH == 16
3793 if (common->utf)
3794   {
3795   if (max < 0xd800) return;
3796 
3797   if (common->invalid_utf)
3798     {
3799     jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3800     add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3801     if (backtracks != NULL)
3802       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3803     }
3804   else
3805     {
3806     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3807     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xdc00);
3808     /* TMP2 contains the low surrogate. */
3809     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3810     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
3811     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3812     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
3813     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3814     }
3815     JUMPHERE(jump);
3816   }
3817 #elif PCRE2_CODE_UNIT_WIDTH == 32
3818 if (common->invalid_utf)
3819   {
3820   OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3821   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3822   add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
3823   }
3824 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3825 #endif /* SUPPORT_UNICODE */
3826 }
3827 
3828 #define READ_CHAR_UPDATE_STR_PTR 0x1
3829 #define READ_CHAR_UTF8_NEWLINE 0x2
3830 #define READ_CHAR_NEWLINE (READ_CHAR_UPDATE_STR_PTR | READ_CHAR_UTF8_NEWLINE)
3831 #define READ_CHAR_VALID_UTF 0x4
3832 
read_char(compiler_common * common,sljit_u32 min,sljit_u32 max,jump_list ** backtracks,sljit_u32 options)3833 static void read_char(compiler_common *common, sljit_u32 min, sljit_u32 max,
3834   jump_list **backtracks, sljit_u32 options)
3835 {
3836 /* Reads the precise value of a character into TMP1, if the character is
3837 between min and max (c >= min && c <= max). Otherwise it returns with a value
3838 outside the range. Does not check STR_END. */
3839 DEFINE_COMPILER;
3840 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3841 struct sljit_jump *jump;
3842 #endif
3843 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3844 struct sljit_jump *jump2;
3845 #endif
3846 
3847 SLJIT_UNUSED_ARG(min);
3848 SLJIT_UNUSED_ARG(max);
3849 SLJIT_UNUSED_ARG(backtracks);
3850 SLJIT_UNUSED_ARG(options);
3851 SLJIT_ASSERT(min <= max);
3852 
3853 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3854 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3855 
3856 #ifdef SUPPORT_UNICODE
3857 #if PCRE2_CODE_UNIT_WIDTH == 8
3858 if (common->utf)
3859   {
3860   if (max < 128 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
3861 
3862   if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
3863     {
3864     jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3865 
3866     if (options & READ_CHAR_UTF8_NEWLINE)
3867       add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
3868     else
3869       add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3870 
3871     if (backtracks != NULL)
3872       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3873     JUMPHERE(jump);
3874     return;
3875     }
3876 
3877   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3878   if (min >= 0x10000)
3879     {
3880     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
3881     if (options & READ_CHAR_UPDATE_STR_PTR)
3882       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3883     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3884     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
3885     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3886     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3887     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3888     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3889     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3890     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3891     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3892     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
3893     if (!(options & READ_CHAR_UPDATE_STR_PTR))
3894       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
3895     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3896     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3897     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3898     JUMPHERE(jump2);
3899     if (options & READ_CHAR_UPDATE_STR_PTR)
3900       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3901     }
3902   else if (min >= 0x800 && max <= 0xffff)
3903     {
3904     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
3905     if (options & READ_CHAR_UPDATE_STR_PTR)
3906       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3907     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3908     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
3909     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3910     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3911     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3912     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3913     if (!(options & READ_CHAR_UPDATE_STR_PTR))
3914       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3915     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3916     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3917     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3918     JUMPHERE(jump2);
3919     if (options & READ_CHAR_UPDATE_STR_PTR)
3920       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3921     }
3922   else if (max >= 0x800)
3923     {
3924     add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3925     }
3926   else if (max < 128)
3927     {
3928     OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3929     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3930     }
3931   else
3932     {
3933     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3934     if (!(options & READ_CHAR_UPDATE_STR_PTR))
3935       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3936     else
3937       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3938     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3939     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3940     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3941     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3942     if (options & READ_CHAR_UPDATE_STR_PTR)
3943       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3944     }
3945   JUMPHERE(jump);
3946   }
3947 #elif PCRE2_CODE_UNIT_WIDTH == 16
3948 if (common->utf)
3949   {
3950   if (max < 0xd800 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
3951 
3952   if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
3953     {
3954     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3955     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3956 
3957     if (options & READ_CHAR_UTF8_NEWLINE)
3958       add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
3959     else
3960       add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3961 
3962     if (backtracks != NULL)
3963       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3964     JUMPHERE(jump);
3965     return;
3966     }
3967 
3968   if (max >= 0x10000)
3969     {
3970     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3971     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
3972     /* TMP2 contains the high surrogate. */
3973     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3974     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3975     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3976     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
3977     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3978     JUMPHERE(jump);
3979     return;
3980     }
3981 
3982   /* Skip low surrogate if necessary. */
3983   OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3984 
3985   if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
3986     {
3987     if (options & READ_CHAR_UPDATE_STR_PTR)
3988       OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3989     OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
3990     if (options & READ_CHAR_UPDATE_STR_PTR)
3991       CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0);
3992     if (max >= 0xd800)
3993       CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, 0x10000);
3994     }
3995   else
3996     {
3997     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
3998     if (options & READ_CHAR_UPDATE_STR_PTR)
3999       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4000     if (max >= 0xd800)
4001       OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
4002     JUMPHERE(jump);
4003     }
4004   }
4005 #elif PCRE2_CODE_UNIT_WIDTH == 32
4006 if (common->invalid_utf)
4007   {
4008   if (backtracks != NULL)
4009     {
4010     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4011     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4012     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
4013     }
4014   else
4015     {
4016     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4017     OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
4018     CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4019     OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4020     CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4021     }
4022   }
4023 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4024 #endif /* SUPPORT_UNICODE */
4025 }
4026 
4027 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4028 
is_char7_bitset(const sljit_u8 * bitset,BOOL nclass)4029 static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
4030 {
4031 /* Tells whether the character codes below 128 are enough
4032 to determine a match. */
4033 const sljit_u8 value = nclass ? 0xff : 0;
4034 const sljit_u8 *end = bitset + 32;
4035 
4036 bitset += 16;
4037 do
4038   {
4039   if (*bitset++ != value)
4040     return FALSE;
4041   }
4042 while (bitset < end);
4043 return TRUE;
4044 }
4045 
read_char7_type(compiler_common * common,jump_list ** backtracks,BOOL negated)4046 static void read_char7_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4047 {
4048 /* Reads the precise character type of a character into TMP1, if the character
4049 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
4050 full_read argument tells whether characters above max are accepted or not. */
4051 DEFINE_COMPILER;
4052 struct sljit_jump *jump;
4053 
4054 SLJIT_ASSERT(common->utf);
4055 
4056 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4057 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4058 
4059 /* All values > 127 are zero in ctypes. */
4060 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4061 
4062 if (negated)
4063   {
4064   jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4065 
4066   if (common->invalid_utf)
4067     {
4068     add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4069     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4070     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4071     }
4072   else
4073     {
4074     OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4075     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4076     }
4077   JUMPHERE(jump);
4078   }
4079 }
4080 
4081 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4082 
read_char8_type(compiler_common * common,jump_list ** backtracks,BOOL negated)4083 static void read_char8_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4084 {
4085 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
4086 DEFINE_COMPILER;
4087 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
4088 struct sljit_jump *jump;
4089 #endif
4090 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4091 struct sljit_jump *jump2;
4092 #endif
4093 
4094 SLJIT_UNUSED_ARG(backtracks);
4095 SLJIT_UNUSED_ARG(negated);
4096 
4097 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4098 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4099 
4100 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4101 if (common->utf)
4102   {
4103   /* The result of this read may be unused, but saves an "else" part. */
4104   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4105   jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4106 
4107   if (!negated)
4108     {
4109     if (common->invalid_utf)
4110       add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4111 
4112     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4113     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4114     OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4115     if (common->invalid_utf)
4116       add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe0 - 0xc2));
4117 
4118     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4119     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4120     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4121     if (common->invalid_utf)
4122       add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40));
4123 
4124     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4125     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4126     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4127     JUMPHERE(jump2);
4128     }
4129   else if (common->invalid_utf)
4130     {
4131     add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4132     OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
4133     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4134 
4135     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4136     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4137     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4138     JUMPHERE(jump2);
4139     }
4140   else
4141     add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
4142 
4143   JUMPHERE(jump);
4144   return;
4145   }
4146 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4147 
4148 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32
4149 if (common->invalid_utf && negated)
4150   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x110000));
4151 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32 */
4152 
4153 #if PCRE2_CODE_UNIT_WIDTH != 8
4154 /* The ctypes array contains only 256 values. */
4155 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4156 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4157 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4158 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4159 #if PCRE2_CODE_UNIT_WIDTH != 8
4160 JUMPHERE(jump);
4161 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4162 
4163 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
4164 if (common->utf && negated)
4165   {
4166   /* Skip low surrogate if necessary. */
4167   if (!common->invalid_utf)
4168     {
4169     OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4170 
4171     if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4172       {
4173       OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4174       OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
4175       CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0);
4176       }
4177     else
4178       {
4179       jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4180       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4181       JUMPHERE(jump);
4182       }
4183     return;
4184     }
4185 
4186   OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4187   jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4188   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4189   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4190 
4191   OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4192   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4193   OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
4194   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4195 
4196   JUMPHERE(jump);
4197   return;
4198   }
4199 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 */
4200 }
4201 
move_back(compiler_common * common,jump_list ** backtracks,BOOL must_be_valid)4202 static void move_back(compiler_common *common, jump_list **backtracks, BOOL must_be_valid)
4203 {
4204 /* Goes one character back. Affects STR_PTR and TMP1. If must_be_valid is TRUE,
4205 TMP2 is not used. Otherwise TMP2 must contain the start of the subject buffer,
4206 and it is destroyed. Does not modify STR_PTR for invalid character sequences. */
4207 DEFINE_COMPILER;
4208 
4209 SLJIT_UNUSED_ARG(backtracks);
4210 SLJIT_UNUSED_ARG(must_be_valid);
4211 
4212 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4213 struct sljit_jump *jump;
4214 #endif
4215 
4216 #ifdef SUPPORT_UNICODE
4217 #if PCRE2_CODE_UNIT_WIDTH == 8
4218 struct sljit_label *label;
4219 
4220 if (common->utf)
4221   {
4222   if (!must_be_valid && common->invalid_utf)
4223     {
4224     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4225     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4226     jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4227     add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4228     if (backtracks != NULL)
4229       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4230     JUMPHERE(jump);
4231     return;
4232     }
4233 
4234   label = LABEL();
4235   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4236   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4237   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4238   CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
4239   return;
4240   }
4241 #elif PCRE2_CODE_UNIT_WIDTH == 16
4242 if (common->utf)
4243   {
4244   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4245   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4246 
4247   if (!must_be_valid && common->invalid_utf)
4248     {
4249     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4250     jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0xd800);
4251     add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4252     if (backtracks != NULL)
4253       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4254     JUMPHERE(jump);
4255     return;
4256     }
4257 
4258   /* Skip low surrogate if necessary. */
4259   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4260   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
4261   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4262   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4263   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4264   return;
4265   }
4266 #elif PCRE2_CODE_UNIT_WIDTH == 32
4267 if (common->invalid_utf && !must_be_valid)
4268   {
4269   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4270   if (backtracks != NULL)
4271     {
4272     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4273     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4274     return;
4275     }
4276 
4277   OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
4278   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
4279   OP2(SLJIT_SHL,  TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4280   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4281   return;
4282   }
4283 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4284 #endif /* SUPPORT_UNICODE */
4285 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4286 }
4287 
check_newlinechar(compiler_common * common,int nltype,jump_list ** backtracks,BOOL jumpifmatch)4288 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
4289 {
4290 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
4291 DEFINE_COMPILER;
4292 struct sljit_jump *jump;
4293 
4294 if (nltype == NLTYPE_ANY)
4295   {
4296   add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4297   sljit_set_current_flags(compiler, SLJIT_SET_Z);
4298   add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
4299   }
4300 else if (nltype == NLTYPE_ANYCRLF)
4301   {
4302   if (jumpifmatch)
4303     {
4304     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
4305     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4306     }
4307   else
4308     {
4309     jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4310     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4311     JUMPHERE(jump);
4312     }
4313   }
4314 else
4315   {
4316   SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
4317   add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4318   }
4319 }
4320 
4321 #ifdef SUPPORT_UNICODE
4322 
4323 #if PCRE2_CODE_UNIT_WIDTH == 8
do_utfreadchar(compiler_common * common)4324 static void do_utfreadchar(compiler_common *common)
4325 {
4326 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
4327 of the character (>= 0xc0). Return char value in TMP1. */
4328 DEFINE_COMPILER;
4329 struct sljit_jump *jump;
4330 
4331 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4332 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4333 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4334 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4335 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4336 
4337 /* Searching for the first zero. */
4338 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4339 jump = JUMP(SLJIT_NOT_ZERO);
4340 /* Two byte sequence. */
4341 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3000);
4342 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4343 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4344 
4345 JUMPHERE(jump);
4346 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4347 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4348 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4349 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4350 
4351 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4352 jump = JUMP(SLJIT_NOT_ZERO);
4353 /* Three byte sequence. */
4354 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0000);
4355 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4356 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4357 
4358 /* Four byte sequence. */
4359 JUMPHERE(jump);
4360 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4361 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0000);
4362 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4363 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4364 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4365 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4366 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4367 }
4368 
do_utfreadtype8(compiler_common * common)4369 static void do_utfreadtype8(compiler_common *common)
4370 {
4371 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
4372 of the character (>= 0xc0). Return value in TMP1. */
4373 DEFINE_COMPILER;
4374 struct sljit_jump *jump;
4375 struct sljit_jump *compare;
4376 
4377 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4378 
4379 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
4380 jump = JUMP(SLJIT_NOT_ZERO);
4381 /* Two byte sequence. */
4382 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4383 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4384 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
4385 /* The upper 5 bits are known at this point. */
4386 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
4387 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4388 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4389 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
4390 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4391 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4392 
4393 JUMPHERE(compare);
4394 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4395 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4396 
4397 /* We only have types for characters less than 256. */
4398 JUMPHERE(jump);
4399 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4400 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4401 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4402 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4403 }
4404 
do_utfreadchar_invalid(compiler_common * common)4405 static void do_utfreadchar_invalid(compiler_common *common)
4406 {
4407 /* Slow decoding a UTF-8 character. TMP1 contains the first byte
4408 of the character (>= 0xc0). Return char value in TMP1. STR_PTR is
4409 undefined for invalid characters. */
4410 DEFINE_COMPILER;
4411 sljit_s32 i;
4412 sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4413 struct sljit_jump *jump;
4414 struct sljit_jump *buffer_end_close;
4415 struct sljit_label *three_byte_entry;
4416 struct sljit_label *exit_invalid_label;
4417 struct sljit_jump *exit_invalid[11];
4418 
4419 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4420 
4421 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc2);
4422 
4423 /* Usually more than 3 characters remained in the subject buffer. */
4424 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4425 
4426 /* Not a valid start of a multi-byte sequence, no more bytes read. */
4427 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xf5 - 0xc2);
4428 
4429 buffer_end_close = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4430 
4431 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4432 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4433 /* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4434 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4435 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4436 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4437 
4438 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4439 jump = JUMP(SLJIT_NOT_ZERO);
4440 
4441 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4442 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4443 
4444 JUMPHERE(jump);
4445 
4446 /* Three-byte sequence. */
4447 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4448 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4449 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4450 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4451 if (has_cmov)
4452   {
4453   OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
4454   CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0x20000);
4455   exit_invalid[2] = NULL;
4456   }
4457 else
4458   exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4459 
4460 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4461 jump = JUMP(SLJIT_NOT_ZERO);
4462 
4463 three_byte_entry = LABEL();
4464 
4465 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2d800);
4466 if (has_cmov)
4467   {
4468   OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4469   CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0xd800);
4470   exit_invalid[3] = NULL;
4471   }
4472 else
4473   exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4474 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4475 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4476 
4477 if (has_cmov)
4478   {
4479   OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4480   CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4481   exit_invalid[4] = NULL;
4482   }
4483 else
4484   exit_invalid[4] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4485 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4486 
4487 JUMPHERE(jump);
4488 
4489 /* Four-byte sequence. */
4490 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4491 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4492 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4493 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4494 if (has_cmov)
4495   {
4496   OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
4497   CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0);
4498   exit_invalid[5] = NULL;
4499   }
4500 else
4501   exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4502 
4503 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc10000);
4504 if (has_cmov)
4505   {
4506   OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x100000);
4507   CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000);
4508   exit_invalid[6] = NULL;
4509   }
4510 else
4511   exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4512 
4513 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4514 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4515 
4516 JUMPHERE(buffer_end_close);
4517 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4518 exit_invalid[7] = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4519 
4520 /* Two-byte sequence. */
4521 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4522 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4523 /* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4524 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4525 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4526 exit_invalid[8] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4527 
4528 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4529 jump = JUMP(SLJIT_NOT_ZERO);
4530 
4531 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4532 
4533 /* Three-byte sequence. */
4534 JUMPHERE(jump);
4535 exit_invalid[9] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4536 
4537 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4538 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4539 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4540 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4541 if (has_cmov)
4542   {
4543   OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
4544   CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4545   exit_invalid[10] = NULL;
4546   }
4547 else
4548   exit_invalid[10] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4549 
4550 /* One will be substracted from STR_PTR later. */
4551 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4552 
4553 /* Four byte sequences are not possible. */
4554 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x30000, three_byte_entry);
4555 
4556 exit_invalid_label = LABEL();
4557 for (i = 0; i < 11; i++)
4558   sljit_set_label(exit_invalid[i], exit_invalid_label);
4559 
4560 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4561 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4562 }
4563 
do_utfreadnewline_invalid(compiler_common * common)4564 static void do_utfreadnewline_invalid(compiler_common *common)
4565 {
4566 /* Slow decoding a UTF-8 character, specialized for newlines.
4567 TMP1 contains the first byte of the character (>= 0xc0). Return
4568 char value in TMP1. */
4569 DEFINE_COMPILER;
4570 struct sljit_label *loop;
4571 struct sljit_label *skip_start;
4572 struct sljit_label *three_byte_exit;
4573 struct sljit_jump *jump[5];
4574 
4575 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4576 
4577 if (common->nltype != NLTYPE_ANY)
4578   {
4579   SLJIT_ASSERT(common->nltype != NLTYPE_FIXED || common->newline < 128);
4580 
4581   /* All newlines are ascii, just skip intermediate octets. */
4582   jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4583   loop = LABEL();
4584   OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4585   OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4586   CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4587   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4588 
4589   JUMPHERE(jump[0]);
4590 
4591   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4592   OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4593   return;
4594   }
4595 
4596 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4597 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4598 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4599 
4600 jump[1] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xc2);
4601 jump[2] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xe2);
4602 
4603 skip_start = LABEL();
4604 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4605 jump[3] = CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80);
4606 
4607 /* Skip intermediate octets. */
4608 loop = LABEL();
4609 jump[4] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4610 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4611 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4612 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4613 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4614 
4615 JUMPHERE(jump[3]);
4616 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4617 
4618 three_byte_exit = LABEL();
4619 JUMPHERE(jump[0]);
4620 JUMPHERE(jump[4]);
4621 
4622 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4623 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4624 
4625 /* Two byte long newline: 0x85. */
4626 JUMPHERE(jump[1]);
4627 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x85, skip_start);
4628 
4629 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x85);
4630 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4631 
4632 /* Three byte long newlines: 0x2028 and 0x2029. */
4633 JUMPHERE(jump[2]);
4634 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, skip_start);
4635 CMPTO(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0, three_byte_exit);
4636 
4637 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4638 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4639 
4640 OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 0x80);
4641 CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40, skip_start);
4642 
4643 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0x2000);
4644 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4645 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4646 }
4647 
do_utfmoveback_invalid(compiler_common * common)4648 static void do_utfmoveback_invalid(compiler_common *common)
4649 {
4650 /* Goes one character back. */
4651 DEFINE_COMPILER;
4652 sljit_s32 i;
4653 struct sljit_jump *jump;
4654 struct sljit_jump *buffer_start_close;
4655 struct sljit_label *exit_ok_label;
4656 struct sljit_label *exit_invalid_label;
4657 struct sljit_jump *exit_invalid[7];
4658 
4659 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4660 
4661 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4662 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
4663 
4664 /* Two-byte sequence. */
4665 buffer_start_close = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4666 
4667 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4668 
4669 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4670 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x20);
4671 
4672 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4673 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4674 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4675 
4676 /* Three-byte sequence. */
4677 JUMPHERE(jump);
4678 exit_invalid[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4679 
4680 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4681 
4682 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4683 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x10);
4684 
4685 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4686 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4687 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4688 
4689 /* Four-byte sequence. */
4690 JUMPHERE(jump);
4691 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4692 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40);
4693 
4694 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4695 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0);
4696 exit_invalid[3] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x05);
4697 
4698 exit_ok_label = LABEL();
4699 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4700 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4701 
4702 /* Two-byte sequence. */
4703 JUMPHERE(buffer_start_close);
4704 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4705 
4706 exit_invalid[4] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4707 
4708 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4709 
4710 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4711 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20, exit_ok_label);
4712 
4713 /* Three-byte sequence. */
4714 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4715 exit_invalid[5] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4716 exit_invalid[6] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4717 
4718 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4719 
4720 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4721 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10, exit_ok_label);
4722 
4723 /* Four-byte sequences are not possible. */
4724 
4725 exit_invalid_label = LABEL();
4726 sljit_set_label(exit_invalid[5], exit_invalid_label);
4727 sljit_set_label(exit_invalid[6], exit_invalid_label);
4728 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4729 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4730 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4731 
4732 JUMPHERE(exit_invalid[4]);
4733 /* -2 + 4 = 2 */
4734 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4735 
4736 exit_invalid_label = LABEL();
4737 for (i = 0; i < 4; i++)
4738   sljit_set_label(exit_invalid[i], exit_invalid_label);
4739 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4740 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(4));
4741 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4742 }
4743 
do_utfpeakcharback(compiler_common * common)4744 static void do_utfpeakcharback(compiler_common *common)
4745 {
4746 /* Peak a character back. Does not modify STR_PTR. */
4747 DEFINE_COMPILER;
4748 struct sljit_jump *jump[2];
4749 
4750 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4751 
4752 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4753 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4754 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20);
4755 
4756 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4757 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4758 jump[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10);
4759 
4760 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
4761 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4762 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
4763 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4764 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4765 
4766 JUMPHERE(jump[1]);
4767 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4768 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4769 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4770 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4771 
4772 JUMPHERE(jump[0]);
4773 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4774 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4775 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4776 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4777 
4778 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4779 }
4780 
do_utfpeakcharback_invalid(compiler_common * common)4781 static void do_utfpeakcharback_invalid(compiler_common *common)
4782 {
4783 /* Peak a character back. Does not modify STR_PTR. */
4784 DEFINE_COMPILER;
4785 sljit_s32 i;
4786 sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4787 struct sljit_jump *jump[2];
4788 struct sljit_label *two_byte_entry;
4789 struct sljit_label *three_byte_entry;
4790 struct sljit_label *exit_invalid_label;
4791 struct sljit_jump *exit_invalid[8];
4792 
4793 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4794 
4795 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
4796 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
4797 jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4798 
4799 /* Two-byte sequence. */
4800 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4801 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4802 jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x1e);
4803 
4804 two_byte_entry = LABEL();
4805 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4806 /* If TMP1 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4807 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4808 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4809 
4810 JUMPHERE(jump[1]);
4811 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
4812 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4813 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4814 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4815 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4816 
4817 /* Three-byte sequence. */
4818 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4819 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
4820 jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x10);
4821 
4822 three_byte_entry = LABEL();
4823 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
4824 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4825 
4826 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4827 if (has_cmov)
4828   {
4829   OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4830   CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, -0xd800);
4831   exit_invalid[2] = NULL;
4832   }
4833 else
4834   exit_invalid[2] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4835 
4836 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4837 if (has_cmov)
4838   {
4839   OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4840   CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4841   exit_invalid[3] = NULL;
4842   }
4843 else
4844   exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4845 
4846 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4847 
4848 JUMPHERE(jump[1]);
4849 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0 - 0x80);
4850 exit_invalid[4] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4851 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
4852 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4853 
4854 /* Four-byte sequence. */
4855 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
4856 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4857 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
4858 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18);
4859 /* ADD is used instead of OR because of the SUB 0x10000 above. */
4860 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4861 
4862 if (has_cmov)
4863   {
4864   OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x100000);
4865   CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000);
4866   exit_invalid[5] = NULL;
4867   }
4868 else
4869   exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4870 
4871 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4872 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4873 
4874 JUMPHERE(jump[0]);
4875 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4876 jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4877 
4878 /* Two-byte sequence. */
4879 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4880 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4881 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
4882 
4883 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
4884 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4885 exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4886 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4887 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4888 
4889 /* Three-byte sequence. */
4890 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4891 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
4892 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x10, three_byte_entry);
4893 
4894 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4895 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4896 
4897 JUMPHERE(jump[0]);
4898 exit_invalid[7] = CMP(SLJIT_GREATER, TMP2, 0, STR_PTR, 0);
4899 
4900 /* Two-byte sequence. */
4901 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4902 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4903 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
4904 
4905 exit_invalid_label = LABEL();
4906 for (i = 0; i < 8; i++)
4907   sljit_set_label(exit_invalid[i], exit_invalid_label);
4908 
4909 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4910 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4911 }
4912 
4913 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
4914 
4915 #if PCRE2_CODE_UNIT_WIDTH == 16
4916 
do_utfreadchar_invalid(compiler_common * common)4917 static void do_utfreadchar_invalid(compiler_common *common)
4918 {
4919 /* Slow decoding a UTF-16 character. TMP1 contains the first half
4920 of the character (>= 0xd800). Return char value in TMP1. STR_PTR is
4921 undefined for invalid characters. */
4922 DEFINE_COMPILER;
4923 struct sljit_jump *exit_invalid[3];
4924 
4925 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4926 
4927 /* TMP2 contains the high surrogate. */
4928 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
4929 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4930 
4931 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4932 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
4933 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4934 
4935 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
4936 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
4937 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
4938 
4939 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4940 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4941 
4942 JUMPHERE(exit_invalid[0]);
4943 JUMPHERE(exit_invalid[1]);
4944 JUMPHERE(exit_invalid[2]);
4945 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4946 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4947 }
4948 
do_utfreadnewline_invalid(compiler_common * common)4949 static void do_utfreadnewline_invalid(compiler_common *common)
4950 {
4951 /* Slow decoding a UTF-16 character, specialized for newlines.
4952 TMP1 contains the first half of the character (>= 0xd800). Return
4953 char value in TMP1. */
4954 
4955 DEFINE_COMPILER;
4956 struct sljit_jump *exit_invalid[2];
4957 
4958 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4959 
4960 /* TMP2 contains the high surrogate. */
4961 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4962 
4963 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4964 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
4965 
4966 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
4967 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
4968 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
4969 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
4970 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
4971 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4972 
4973 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4974 
4975 JUMPHERE(exit_invalid[0]);
4976 JUMPHERE(exit_invalid[1]);
4977 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4978 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4979 }
4980 
do_utfmoveback_invalid(compiler_common * common)4981 static void do_utfmoveback_invalid(compiler_common *common)
4982 {
4983 /* Goes one character back. */
4984 DEFINE_COMPILER;
4985 struct sljit_jump *exit_invalid[3];
4986 
4987 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4988 
4989 exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400);
4990 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4991 
4992 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4993 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4994 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
4995 
4996 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4997 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4998 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4999 
5000 JUMPHERE(exit_invalid[0]);
5001 JUMPHERE(exit_invalid[1]);
5002 JUMPHERE(exit_invalid[2]);
5003 
5004 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5005 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
5006 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5007 }
5008 
do_utfpeakcharback_invalid(compiler_common * common)5009 static void do_utfpeakcharback_invalid(compiler_common *common)
5010 {
5011 /* Peak a character back. Does not modify STR_PTR. */
5012 DEFINE_COMPILER;
5013 struct sljit_jump *jump;
5014 struct sljit_jump *exit_invalid[3];
5015 
5016 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5017 
5018 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000);
5019 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5020 exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
5021 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5022 
5023 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5024 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
5025 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
5026 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
5027 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5028 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5029 
5030 JUMPHERE(jump);
5031 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5032 
5033 JUMPHERE(exit_invalid[0]);
5034 JUMPHERE(exit_invalid[1]);
5035 JUMPHERE(exit_invalid[2]);
5036 
5037 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5038 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5039 }
5040 
5041 #endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
5042 
5043 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
5044 #define UCD_BLOCK_MASK 127
5045 #define UCD_BLOCK_SHIFT 7
5046 
do_getucd(compiler_common * common)5047 static void do_getucd(compiler_common *common)
5048 {
5049 /* Search the UCD record for the character comes in TMP1.
5050 Returns chartype in TMP1 and UCD offset in TMP2. */
5051 DEFINE_COMPILER;
5052 #if PCRE2_CODE_UNIT_WIDTH == 32
5053 struct sljit_jump *jump;
5054 #endif
5055 
5056 #if defined SLJIT_DEBUG && SLJIT_DEBUG
5057 /* dummy_ucd_record */
5058 const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5059 SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5060 SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5061 #endif
5062 
5063 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5064 
5065 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5066 
5067 #if PCRE2_CODE_UNIT_WIDTH == 32
5068 if (!common->utf)
5069   {
5070   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5071   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5072   JUMPHERE(jump);
5073   }
5074 #endif
5075 
5076 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5077 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5078 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5079 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5080 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5081 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5082 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5083 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5084 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5085 }
5086 
do_getucdtype(compiler_common * common)5087 static void do_getucdtype(compiler_common *common)
5088 {
5089 /* Search the UCD record for the character comes in TMP1.
5090 Returns chartype in TMP1 and UCD offset in TMP2. */
5091 DEFINE_COMPILER;
5092 #if PCRE2_CODE_UNIT_WIDTH == 32
5093 struct sljit_jump *jump;
5094 #endif
5095 
5096 #if defined SLJIT_DEBUG && SLJIT_DEBUG
5097 /* dummy_ucd_record */
5098 const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5099 SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5100 SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5101 #endif
5102 
5103 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5104 
5105 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5106 
5107 #if PCRE2_CODE_UNIT_WIDTH == 32
5108 if (!common->utf)
5109   {
5110   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5111   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5112   JUMPHERE(jump);
5113   }
5114 #endif
5115 
5116 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5117 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5118 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5119 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5120 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5121 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5122 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5123 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5124 
5125 /* TMP2 is multiplied by 12. Same as (TMP2 << 2) + ((TMP2 << 2) << 1). */
5126 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5127 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
5128 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5129 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 1);
5130 
5131 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5132 }
5133 
5134 #endif /* SUPPORT_UNICODE */
5135 
mainloop_entry(compiler_common * common)5136 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common)
5137 {
5138 DEFINE_COMPILER;
5139 struct sljit_label *mainloop;
5140 struct sljit_label *newlinelabel = NULL;
5141 struct sljit_jump *start;
5142 struct sljit_jump *end = NULL;
5143 struct sljit_jump *end2 = NULL;
5144 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5145 struct sljit_label *loop;
5146 struct sljit_jump *jump;
5147 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5148 jump_list *newline = NULL;
5149 sljit_u32 overall_options = common->re->overall_options;
5150 BOOL hascrorlf = (common->re->flags & PCRE2_HASCRORLF) != 0;
5151 BOOL newlinecheck = FALSE;
5152 BOOL readuchar = FALSE;
5153 
5154 if (!(hascrorlf || (overall_options & PCRE2_FIRSTLINE) != 0)
5155     && (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
5156   newlinecheck = TRUE;
5157 
5158 SLJIT_ASSERT(common->abort_label == NULL);
5159 
5160 if ((overall_options & PCRE2_FIRSTLINE) != 0)
5161   {
5162   /* Search for the end of the first line. */
5163   SLJIT_ASSERT(common->match_end_ptr != 0);
5164   OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5165 
5166   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5167     {
5168     mainloop = LABEL();
5169     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5170     end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5171     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5172     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5173     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
5174     CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
5175     JUMPHERE(end);
5176     OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5177     }
5178   else
5179     {
5180     end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5181     mainloop = LABEL();
5182     /* Continual stores does not cause data dependency. */
5183     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5184     read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
5185     check_newlinechar(common, common->nltype, &newline, TRUE);
5186     CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
5187     JUMPHERE(end);
5188     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5189     set_jumps(newline, LABEL());
5190     }
5191 
5192   OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5193   }
5194 else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0)
5195   {
5196   /* Check whether offset limit is set and valid. */
5197   SLJIT_ASSERT(common->match_end_ptr != 0);
5198 
5199   if (HAS_VIRTUAL_REGISTERS)
5200     {
5201     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5202     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5203     }
5204   else
5205     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5206 
5207   OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5208   end = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET);
5209   if (HAS_VIRTUAL_REGISTERS)
5210     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5211   else
5212     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
5213 
5214 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5215   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5216 #endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5217   if (HAS_VIRTUAL_REGISTERS)
5218     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5219 
5220   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
5221   end2 = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5222   OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5223   JUMPHERE(end2);
5224   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
5225   add_jump(compiler, &common->abort, CMP(SLJIT_LESS, TMP2, 0, STR_PTR, 0));
5226   JUMPHERE(end);
5227   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, TMP2, 0);
5228   }
5229 
5230 start = JUMP(SLJIT_JUMP);
5231 
5232 if (newlinecheck)
5233   {
5234   newlinelabel = LABEL();
5235   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5236   end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5237   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5238   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
5239   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
5240 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5241   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5242 #endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5243   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5244   end2 = JUMP(SLJIT_JUMP);
5245   }
5246 
5247 mainloop = LABEL();
5248 
5249 /* Increasing the STR_PTR here requires one less jump in the most common case. */
5250 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5251 if (common->utf && !common->invalid_utf) readuchar = TRUE;
5252 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5253 if (newlinecheck) readuchar = TRUE;
5254 
5255 if (readuchar)
5256   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5257 
5258 if (newlinecheck)
5259   CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
5260 
5261 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5262 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5263 #if PCRE2_CODE_UNIT_WIDTH == 8
5264 if (common->invalid_utf)
5265   {
5266   /* Skip continuation code units. */
5267   loop = LABEL();
5268   jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5269   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5270   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5271   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5272   CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x40, loop);
5273   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5274   JUMPHERE(jump);
5275   }
5276 else if (common->utf)
5277   {
5278   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5279   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5280   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5281   JUMPHERE(jump);
5282   }
5283 #elif PCRE2_CODE_UNIT_WIDTH == 16
5284 if (common->invalid_utf)
5285   {
5286   /* Skip continuation code units. */
5287   loop = LABEL();
5288   jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5289   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5290   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5291   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5292   CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400, loop);
5293   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5294   JUMPHERE(jump);
5295   }
5296 else if (common->utf)
5297   {
5298   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5299 
5300   if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
5301     {
5302     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5303     OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
5304     CMOV(SLJIT_LESS, STR_PTR, TMP2, 0);
5305     }
5306   else
5307     {
5308     OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
5309     OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
5310     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5311     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5312     }
5313   }
5314 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
5315 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5316 JUMPHERE(start);
5317 
5318 if (newlinecheck)
5319   {
5320   JUMPHERE(end);
5321   JUMPHERE(end2);
5322   }
5323 
5324 return mainloop;
5325 }
5326 
5327 
add_prefix_char(PCRE2_UCHAR chr,fast_forward_char_data * chars,BOOL last)5328 static SLJIT_INLINE void add_prefix_char(PCRE2_UCHAR chr, fast_forward_char_data *chars, BOOL last)
5329 {
5330 sljit_u32 i, count = chars->count;
5331 
5332 if (count == 255)
5333   return;
5334 
5335 if (count == 0)
5336   {
5337   chars->count = 1;
5338   chars->chars[0] = chr;
5339 
5340   if (last)
5341     chars->last_count = 1;
5342   return;
5343   }
5344 
5345 for (i = 0; i < count; i++)
5346   if (chars->chars[i] == chr)
5347     return;
5348 
5349 if (count >= MAX_DIFF_CHARS)
5350   {
5351   chars->count = 255;
5352   return;
5353   }
5354 
5355 chars->chars[count] = chr;
5356 chars->count = count + 1;
5357 
5358 if (last)
5359   chars->last_count++;
5360 }
5361 
scan_prefix(compiler_common * common,PCRE2_SPTR cc,fast_forward_char_data * chars,int max_chars,sljit_u32 * rec_count)5362 static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars, int max_chars, sljit_u32 *rec_count)
5363 {
5364 /* Recursive function, which scans prefix literals. */
5365 BOOL last, any, class, caseless;
5366 int len, repeat, len_save, consumed = 0;
5367 sljit_u32 chr; /* Any unicode character. */
5368 sljit_u8 *bytes, *bytes_end, byte;
5369 PCRE2_SPTR alternative, cc_save, oc;
5370 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5371 PCRE2_UCHAR othercase[4];
5372 #elif defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
5373 PCRE2_UCHAR othercase[2];
5374 #else
5375 PCRE2_UCHAR othercase[1];
5376 #endif
5377 
5378 repeat = 1;
5379 while (TRUE)
5380   {
5381   if (*rec_count == 0)
5382     return 0;
5383   (*rec_count)--;
5384 
5385   last = TRUE;
5386   any = FALSE;
5387   class = FALSE;
5388   caseless = FALSE;
5389 
5390   switch (*cc)
5391     {
5392     case OP_CHARI:
5393     caseless = TRUE;
5394     /* Fall through */
5395     case OP_CHAR:
5396     last = FALSE;
5397     cc++;
5398     break;
5399 
5400     case OP_SOD:
5401     case OP_SOM:
5402     case OP_SET_SOM:
5403     case OP_NOT_WORD_BOUNDARY:
5404     case OP_WORD_BOUNDARY:
5405     case OP_EODN:
5406     case OP_EOD:
5407     case OP_CIRC:
5408     case OP_CIRCM:
5409     case OP_DOLL:
5410     case OP_DOLLM:
5411     /* Zero width assertions. */
5412     cc++;
5413     continue;
5414 
5415     case OP_ASSERT:
5416     case OP_ASSERT_NOT:
5417     case OP_ASSERTBACK:
5418     case OP_ASSERTBACK_NOT:
5419     case OP_ASSERT_NA:
5420     case OP_ASSERTBACK_NA:
5421     cc = bracketend(cc);
5422     continue;
5423 
5424     case OP_PLUSI:
5425     case OP_MINPLUSI:
5426     case OP_POSPLUSI:
5427     caseless = TRUE;
5428     /* Fall through */
5429     case OP_PLUS:
5430     case OP_MINPLUS:
5431     case OP_POSPLUS:
5432     cc++;
5433     break;
5434 
5435     case OP_EXACTI:
5436     caseless = TRUE;
5437     /* Fall through */
5438     case OP_EXACT:
5439     repeat = GET2(cc, 1);
5440     last = FALSE;
5441     cc += 1 + IMM2_SIZE;
5442     break;
5443 
5444     case OP_QUERYI:
5445     case OP_MINQUERYI:
5446     case OP_POSQUERYI:
5447     caseless = TRUE;
5448     /* Fall through */
5449     case OP_QUERY:
5450     case OP_MINQUERY:
5451     case OP_POSQUERY:
5452     len = 1;
5453     cc++;
5454 #ifdef SUPPORT_UNICODE
5455     if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5456 #endif
5457     max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
5458     if (max_chars == 0)
5459       return consumed;
5460     last = FALSE;
5461     break;
5462 
5463     case OP_KET:
5464     cc += 1 + LINK_SIZE;
5465     continue;
5466 
5467     case OP_ALT:
5468     cc += GET(cc, 1);
5469     continue;
5470 
5471     case OP_ONCE:
5472     case OP_BRA:
5473     case OP_BRAPOS:
5474     case OP_CBRA:
5475     case OP_CBRAPOS:
5476     alternative = cc + GET(cc, 1);
5477     while (*alternative == OP_ALT)
5478       {
5479       max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
5480       if (max_chars == 0)
5481         return consumed;
5482       alternative += GET(alternative, 1);
5483       }
5484 
5485     if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
5486       cc += IMM2_SIZE;
5487     cc += 1 + LINK_SIZE;
5488     continue;
5489 
5490     case OP_CLASS:
5491 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5492     if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
5493       return consumed;
5494 #endif
5495     class = TRUE;
5496     break;
5497 
5498     case OP_NCLASS:
5499 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5500     if (common->utf) return consumed;
5501 #endif
5502     class = TRUE;
5503     break;
5504 
5505 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
5506     case OP_XCLASS:
5507 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5508     if (common->utf) return consumed;
5509 #endif
5510     any = TRUE;
5511     cc += GET(cc, 1);
5512     break;
5513 #endif
5514 
5515     case OP_DIGIT:
5516 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5517     if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
5518       return consumed;
5519 #endif
5520     any = TRUE;
5521     cc++;
5522     break;
5523 
5524     case OP_WHITESPACE:
5525 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5526     if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
5527       return consumed;
5528 #endif
5529     any = TRUE;
5530     cc++;
5531     break;
5532 
5533     case OP_WORDCHAR:
5534 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5535     if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
5536       return consumed;
5537 #endif
5538     any = TRUE;
5539     cc++;
5540     break;
5541 
5542     case OP_NOT:
5543     case OP_NOTI:
5544     cc++;
5545     /* Fall through. */
5546     case OP_NOT_DIGIT:
5547     case OP_NOT_WHITESPACE:
5548     case OP_NOT_WORDCHAR:
5549     case OP_ANY:
5550     case OP_ALLANY:
5551 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5552     if (common->utf) return consumed;
5553 #endif
5554     any = TRUE;
5555     cc++;
5556     break;
5557 
5558 #ifdef SUPPORT_UNICODE
5559     case OP_NOTPROP:
5560     case OP_PROP:
5561 #if PCRE2_CODE_UNIT_WIDTH != 32
5562     if (common->utf) return consumed;
5563 #endif
5564     any = TRUE;
5565     cc += 1 + 2;
5566     break;
5567 #endif
5568 
5569     case OP_TYPEEXACT:
5570     repeat = GET2(cc, 1);
5571     cc += 1 + IMM2_SIZE;
5572     continue;
5573 
5574     case OP_NOTEXACT:
5575     case OP_NOTEXACTI:
5576 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5577     if (common->utf) return consumed;
5578 #endif
5579     any = TRUE;
5580     repeat = GET2(cc, 1);
5581     cc += 1 + IMM2_SIZE + 1;
5582     break;
5583 
5584     default:
5585     return consumed;
5586     }
5587 
5588   if (any)
5589     {
5590     do
5591       {
5592       chars->count = 255;
5593 
5594       consumed++;
5595       if (--max_chars == 0)
5596         return consumed;
5597       chars++;
5598       }
5599     while (--repeat > 0);
5600 
5601     repeat = 1;
5602     continue;
5603     }
5604 
5605   if (class)
5606     {
5607     bytes = (sljit_u8*) (cc + 1);
5608     cc += 1 + 32 / sizeof(PCRE2_UCHAR);
5609 
5610     switch (*cc)
5611       {
5612       case OP_CRSTAR:
5613       case OP_CRMINSTAR:
5614       case OP_CRPOSSTAR:
5615       case OP_CRQUERY:
5616       case OP_CRMINQUERY:
5617       case OP_CRPOSQUERY:
5618       max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
5619       if (max_chars == 0)
5620         return consumed;
5621       break;
5622 
5623       default:
5624       case OP_CRPLUS:
5625       case OP_CRMINPLUS:
5626       case OP_CRPOSPLUS:
5627       break;
5628 
5629       case OP_CRRANGE:
5630       case OP_CRMINRANGE:
5631       case OP_CRPOSRANGE:
5632       repeat = GET2(cc, 1);
5633       if (repeat <= 0)
5634         return consumed;
5635       break;
5636       }
5637 
5638     do
5639       {
5640       if (bytes[31] & 0x80)
5641         chars->count = 255;
5642       else if (chars->count != 255)
5643         {
5644         bytes_end = bytes + 32;
5645         chr = 0;
5646         do
5647           {
5648           byte = *bytes++;
5649           SLJIT_ASSERT((chr & 0x7) == 0);
5650           if (byte == 0)
5651             chr += 8;
5652           else
5653             {
5654             do
5655               {
5656               if ((byte & 0x1) != 0)
5657                 add_prefix_char(chr, chars, TRUE);
5658               byte >>= 1;
5659               chr++;
5660               }
5661             while (byte != 0);
5662             chr = (chr + 7) & ~7;
5663             }
5664           }
5665         while (chars->count != 255 && bytes < bytes_end);
5666         bytes = bytes_end - 32;
5667         }
5668 
5669       consumed++;
5670       if (--max_chars == 0)
5671         return consumed;
5672       chars++;
5673       }
5674     while (--repeat > 0);
5675 
5676     switch (*cc)
5677       {
5678       case OP_CRSTAR:
5679       case OP_CRMINSTAR:
5680       case OP_CRPOSSTAR:
5681       return consumed;
5682 
5683       case OP_CRQUERY:
5684       case OP_CRMINQUERY:
5685       case OP_CRPOSQUERY:
5686       cc++;
5687       break;
5688 
5689       case OP_CRRANGE:
5690       case OP_CRMINRANGE:
5691       case OP_CRPOSRANGE:
5692       if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
5693         return consumed;
5694       cc += 1 + 2 * IMM2_SIZE;
5695       break;
5696       }
5697 
5698     repeat = 1;
5699     continue;
5700     }
5701 
5702   len = 1;
5703 #ifdef SUPPORT_UNICODE
5704   if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5705 #endif
5706 
5707   if (caseless && char_has_othercase(common, cc))
5708     {
5709 #ifdef SUPPORT_UNICODE
5710     if (common->utf)
5711       {
5712       GETCHAR(chr, cc);
5713       if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
5714         return consumed;
5715       }
5716     else
5717 #endif
5718       {
5719       chr = *cc;
5720 #ifdef SUPPORT_UNICODE
5721       if (common->ucp && chr > 127)
5722         othercase[0] = UCD_OTHERCASE(chr);
5723       else
5724 #endif
5725         othercase[0] = TABLE_GET(chr, common->fcc, chr);
5726       }
5727     }
5728   else
5729     {
5730     caseless = FALSE;
5731     othercase[0] = 0; /* Stops compiler warning - PH */
5732     }
5733 
5734   len_save = len;
5735   cc_save = cc;
5736   while (TRUE)
5737     {
5738     oc = othercase;
5739     do
5740       {
5741       len--;
5742       consumed++;
5743 
5744       chr = *cc;
5745       add_prefix_char(*cc, chars, len == 0);
5746 
5747       if (caseless)
5748         add_prefix_char(*oc, chars, len == 0);
5749 
5750       if (--max_chars == 0)
5751         return consumed;
5752       chars++;
5753       cc++;
5754       oc++;
5755       }
5756     while (len > 0);
5757 
5758     if (--repeat == 0)
5759       break;
5760 
5761     len = len_save;
5762     cc = cc_save;
5763     }
5764 
5765   repeat = 1;
5766   if (last)
5767     return consumed;
5768   }
5769 }
5770 
5771 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
jumpto_if_not_utf_char_start(struct sljit_compiler * compiler,sljit_s32 reg,struct sljit_label * label)5772 static void jumpto_if_not_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg, struct sljit_label *label)
5773 {
5774 #if PCRE2_CODE_UNIT_WIDTH == 8
5775 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
5776 CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0x80, label);
5777 #elif PCRE2_CODE_UNIT_WIDTH == 16
5778 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
5779 CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00, label);
5780 #else
5781 #error "Unknown code width"
5782 #endif
5783 }
5784 #endif
5785 
5786 #include "pcre2_jit_simd_inc.h"
5787 
5788 #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
5789 
check_fast_forward_char_pair_simd(compiler_common * common,fast_forward_char_data * chars,int max)5790 static BOOL check_fast_forward_char_pair_simd(compiler_common *common, fast_forward_char_data *chars, int max)
5791 {
5792   sljit_s32 i, j, max_i = 0, max_j = 0;
5793   sljit_u32 max_pri = 0;
5794   PCRE2_UCHAR a1, a2, a_pri, b1, b2, b_pri;
5795 
5796   for (i = max - 1; i >= 1; i--)
5797     {
5798     if (chars[i].last_count > 2)
5799       {
5800       a1 = chars[i].chars[0];
5801       a2 = chars[i].chars[1];
5802       a_pri = chars[i].last_count;
5803 
5804       j = i - max_fast_forward_char_pair_offset();
5805       if (j < 0)
5806         j = 0;
5807 
5808       while (j < i)
5809         {
5810         b_pri = chars[j].last_count;
5811         if (b_pri > 2 && a_pri + b_pri >= max_pri)
5812           {
5813           b1 = chars[j].chars[0];
5814           b2 = chars[j].chars[1];
5815 
5816           if (a1 != b1 && a1 != b2 && a2 != b1 && a2 != b2)
5817             {
5818             max_pri = a_pri + b_pri;
5819             max_i = i;
5820             max_j = j;
5821             }
5822           }
5823         j++;
5824         }
5825       }
5826     }
5827 
5828 if (max_pri == 0)
5829   return FALSE;
5830 
5831 fast_forward_char_pair_simd(common, max_i, chars[max_i].chars[0], chars[max_i].chars[1], max_j, chars[max_j].chars[0], chars[max_j].chars[1]);
5832 return TRUE;
5833 }
5834 
5835 #endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
5836 
fast_forward_first_char2(compiler_common * common,PCRE2_UCHAR char1,PCRE2_UCHAR char2,sljit_s32 offset)5837 static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
5838 {
5839 DEFINE_COMPILER;
5840 struct sljit_label *start;
5841 struct sljit_jump *match;
5842 struct sljit_jump *partial_quit;
5843 PCRE2_UCHAR mask;
5844 BOOL has_match_end = (common->match_end_ptr != 0);
5845 
5846 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);
5847 
5848 if (has_match_end)
5849   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
5850 
5851 if (offset > 0)
5852   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
5853 
5854 if (has_match_end)
5855   {
5856   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
5857 
5858   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
5859   OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
5860   CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
5861   }
5862 
5863 #ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
5864 
5865 if (JIT_HAS_FAST_FORWARD_CHAR_SIMD)
5866   {
5867   fast_forward_char_simd(common, char1, char2, offset);
5868 
5869   if (offset > 0)
5870     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
5871 
5872   if (has_match_end)
5873     OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
5874   return;
5875   }
5876 
5877 #endif
5878 
5879 start = LABEL();
5880 
5881 partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5882 if (common->mode == PCRE2_JIT_COMPLETE)
5883   add_jump(compiler, &common->failed_match, partial_quit);
5884 
5885 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5886 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5887 
5888 if (char1 == char2)
5889   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1, start);
5890 else
5891   {
5892   mask = char1 ^ char2;
5893   if (is_powerof2(mask))
5894     {
5895     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
5896     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask, start);
5897     }
5898   else
5899     {
5900     match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
5901     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char2, start);
5902     JUMPHERE(match);
5903     }
5904   }
5905 
5906 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5907 if (common->utf && offset > 0)
5908   {
5909   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-(offset + 1)));
5910   jumpto_if_not_utf_char_start(compiler, TMP1, start);
5911   }
5912 #endif
5913 
5914 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
5915 
5916 if (common->mode != PCRE2_JIT_COMPLETE)
5917   JUMPHERE(partial_quit);
5918 
5919 if (has_match_end)
5920   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
5921 }
5922 
fast_forward_first_n_chars(compiler_common * common)5923 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
5924 {
5925 DEFINE_COMPILER;
5926 struct sljit_label *start;
5927 struct sljit_jump *match;
5928 fast_forward_char_data chars[MAX_N_CHARS];
5929 sljit_s32 offset;
5930 PCRE2_UCHAR mask;
5931 PCRE2_UCHAR *char_set, *char_set_end;
5932 int i, max, from;
5933 int range_right = -1, range_len;
5934 sljit_u8 *update_table = NULL;
5935 BOOL in_range;
5936 sljit_u32 rec_count;
5937 
5938 for (i = 0; i < MAX_N_CHARS; i++)
5939   {
5940   chars[i].count = 0;
5941   chars[i].last_count = 0;
5942   }
5943 
5944 rec_count = 10000;
5945 max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
5946 
5947 if (max < 1)
5948   return FALSE;
5949 
5950 /* Convert last_count to priority. */
5951 for (i = 0; i < max; i++)
5952   {
5953   SLJIT_ASSERT(chars[i].count > 0 && chars[i].last_count <= chars[i].count);
5954 
5955   if (chars[i].count == 1)
5956     {
5957     chars[i].last_count = (chars[i].last_count == 1) ? 7 : 5;
5958     /* Simplifies algorithms later. */
5959     chars[i].chars[1] = chars[i].chars[0];
5960     }
5961   else if (chars[i].count == 2)
5962     {
5963     SLJIT_ASSERT(chars[i].chars[0] != chars[i].chars[1]);
5964 
5965     if (is_powerof2(chars[i].chars[0] ^ chars[i].chars[1]))
5966       chars[i].last_count = (chars[i].last_count == 2) ? 6 : 4;
5967     else
5968       chars[i].last_count = (chars[i].last_count == 2) ? 3 : 2;
5969     }
5970   else
5971     chars[i].last_count = (chars[i].count == 255) ? 0 : 1;
5972   }
5973 
5974 #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
5975 if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && check_fast_forward_char_pair_simd(common, chars, max))
5976   return TRUE;
5977 #endif
5978 
5979 in_range = FALSE;
5980 /* Prevent compiler "uninitialized" warning */
5981 from = 0;
5982 range_len = 4 /* minimum length */ - 1;
5983 for (i = 0; i <= max; i++)
5984   {
5985   if (in_range && (i - from) > range_len && (chars[i - 1].count < 255))
5986     {
5987     range_len = i - from;
5988     range_right = i - 1;
5989     }
5990 
5991   if (i < max && chars[i].count < 255)
5992     {
5993     SLJIT_ASSERT(chars[i].count > 0);
5994     if (!in_range)
5995       {
5996       in_range = TRUE;
5997       from = i;
5998       }
5999     }
6000   else
6001     in_range = FALSE;
6002   }
6003 
6004 if (range_right >= 0)
6005   {
6006   update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
6007   if (update_table == NULL)
6008     return TRUE;
6009   memset(update_table, IN_UCHARS(range_len), 256);
6010 
6011   for (i = 0; i < range_len; i++)
6012     {
6013     SLJIT_ASSERT(chars[range_right - i].count > 0 && chars[range_right - i].count < 255);
6014 
6015     char_set = chars[range_right - i].chars;
6016     char_set_end = char_set + chars[range_right - i].count;
6017     do
6018       {
6019       if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
6020         update_table[(*char_set) & 0xff] = IN_UCHARS(i);
6021       char_set++;
6022       }
6023     while (char_set < char_set_end);
6024     }
6025   }
6026 
6027 offset = -1;
6028 /* Scan forward. */
6029 for (i = 0; i < max; i++)
6030   {
6031   if (range_right == i)
6032     continue;
6033 
6034   if (offset == -1)
6035     {
6036     if (chars[i].last_count >= 2)
6037       offset = i;
6038     }
6039   else if (chars[offset].last_count < chars[i].last_count)
6040     offset = i;
6041   }
6042 
6043 SLJIT_ASSERT(offset == -1 || (chars[offset].count >= 1 && chars[offset].count <= 2));
6044 
6045 if (range_right < 0)
6046   {
6047   if (offset < 0)
6048     return FALSE;
6049   /* Works regardless the value is 1 or 2. */
6050   fast_forward_first_char2(common, chars[offset].chars[0], chars[offset].chars[1], offset);
6051   return TRUE;
6052   }
6053 
6054 SLJIT_ASSERT(range_right != offset);
6055 
6056 if (common->match_end_ptr != 0)
6057   {
6058   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6059   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6060   OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6061   add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6062   OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
6063   CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
6064   }
6065 else
6066   {
6067   OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6068   add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6069   }
6070 
6071 SLJIT_ASSERT(range_right >= 0);
6072 
6073 if (!HAS_VIRTUAL_REGISTERS)
6074   OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
6075 
6076 start = LABEL();
6077 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6078 
6079 #if PCRE2_CODE_UNIT_WIDTH == 8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
6080 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
6081 #else
6082 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
6083 #endif
6084 
6085 if (!HAS_VIRTUAL_REGISTERS)
6086   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
6087 else
6088   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
6089 
6090 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6091 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
6092 
6093 if (offset >= 0)
6094   {
6095   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
6096   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6097 
6098   if (chars[offset].count == 1)
6099     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0], start);
6100   else
6101     {
6102     mask = chars[offset].chars[0] ^ chars[offset].chars[1];
6103     if (is_powerof2(mask))
6104       {
6105       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
6106       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0] | mask, start);
6107       }
6108     else
6109       {
6110       match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0]);
6111       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[1], start);
6112       JUMPHERE(match);
6113       }
6114     }
6115   }
6116 
6117 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6118 if (common->utf && offset != 0)
6119   {
6120   if (offset < 0)
6121     {
6122     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6123     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6124     }
6125   else
6126     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6127 
6128   jumpto_if_not_utf_char_start(compiler, TMP1, start);
6129 
6130   if (offset < 0)
6131     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6132   }
6133 #endif
6134 
6135 if (offset >= 0)
6136   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6137 
6138 if (common->match_end_ptr != 0)
6139   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6140 else
6141   OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6142 return TRUE;
6143 }
6144 
fast_forward_first_char(compiler_common * common)6145 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common)
6146 {
6147 PCRE2_UCHAR first_char = (PCRE2_UCHAR)(common->re->first_codeunit);
6148 PCRE2_UCHAR oc;
6149 
6150 oc = first_char;
6151 if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0)
6152   {
6153   oc = TABLE_GET(first_char, common->fcc, first_char);
6154 #if defined SUPPORT_UNICODE
6155   if (first_char > 127 && (common->utf || common->ucp))
6156     oc = UCD_OTHERCASE(first_char);
6157 #endif
6158   }
6159 
6160 fast_forward_first_char2(common, first_char, oc, 0);
6161 }
6162 
fast_forward_newline(compiler_common * common)6163 static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
6164 {
6165 DEFINE_COMPILER;
6166 struct sljit_label *loop;
6167 struct sljit_jump *lastchar;
6168 struct sljit_jump *firstchar;
6169 struct sljit_jump *quit;
6170 struct sljit_jump *foundcr = NULL;
6171 struct sljit_jump *notfoundnl;
6172 jump_list *newline = NULL;
6173 
6174 if (common->match_end_ptr != 0)
6175   {
6176   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6177   OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6178   }
6179 
6180 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6181   {
6182   lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6183   if (HAS_VIRTUAL_REGISTERS)
6184     {
6185     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6186     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6187     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6188     }
6189   else
6190     {
6191     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6192     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6193     }
6194   firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6195 
6196   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
6197   OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
6198   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
6199 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6200   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
6201 #endif
6202   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6203 
6204   loop = LABEL();
6205   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6206   quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6207   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6208   OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6209   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
6210   CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
6211 
6212   JUMPHERE(quit);
6213   JUMPHERE(firstchar);
6214   JUMPHERE(lastchar);
6215 
6216   if (common->match_end_ptr != 0)
6217     OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6218   return;
6219   }
6220 
6221 if (HAS_VIRTUAL_REGISTERS)
6222   {
6223   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6224   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6225   }
6226 else
6227   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6228 
6229 /* Example: match /^/ to \r\n from offset 1. */
6230 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6231 move_back(common, NULL, FALSE);
6232 
6233 loop = LABEL();
6234 common->ff_newline_shortcut = loop;
6235 
6236 read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
6237 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6238 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6239   foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6240 check_newlinechar(common, common->nltype, &newline, FALSE);
6241 set_jumps(newline, loop);
6242 
6243 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6244   {
6245   quit = JUMP(SLJIT_JUMP);
6246   JUMPHERE(foundcr);
6247   notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6248   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6249   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
6250   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
6251 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6252   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6253 #endif
6254   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6255   JUMPHERE(notfoundnl);
6256   JUMPHERE(quit);
6257   }
6258 JUMPHERE(lastchar);
6259 JUMPHERE(firstchar);
6260 
6261 if (common->match_end_ptr != 0)
6262   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6263 }
6264 
6265 static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
6266 
fast_forward_start_bits(compiler_common * common)6267 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common)
6268 {
6269 DEFINE_COMPILER;
6270 const sljit_u8 *start_bits = common->re->start_bitmap;
6271 struct sljit_label *start;
6272 struct sljit_jump *partial_quit;
6273 #if PCRE2_CODE_UNIT_WIDTH != 8
6274 struct sljit_jump *found = NULL;
6275 #endif
6276 jump_list *matches = NULL;
6277 
6278 if (common->match_end_ptr != 0)
6279   {
6280   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6281   OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
6282   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6283   OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
6284   CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
6285   }
6286 
6287 start = LABEL();
6288 
6289 partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6290 if (common->mode == PCRE2_JIT_COMPLETE)
6291   add_jump(compiler, &common->failed_match, partial_quit);
6292 
6293 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6294 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6295 
6296 if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &matches))
6297   {
6298 #if PCRE2_CODE_UNIT_WIDTH != 8
6299   if ((start_bits[31] & 0x80) != 0)
6300     found = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255);
6301   else
6302     CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255, start);
6303 #elif defined SUPPORT_UNICODE
6304   if (common->utf && is_char7_bitset(start_bits, FALSE))
6305     CMPTO(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 127, start);
6306 #endif
6307   OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6308   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6309   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
6310   if (!HAS_VIRTUAL_REGISTERS)
6311     {
6312     OP2(SLJIT_SHL, TMP3, 0, SLJIT_IMM, 1, TMP2, 0);
6313     OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP3, 0);
6314     }
6315   else
6316     {
6317     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6318     OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6319     }
6320   JUMPTO(SLJIT_ZERO, start);
6321   }
6322 else
6323   set_jumps(matches, start);
6324 
6325 #if PCRE2_CODE_UNIT_WIDTH != 8
6326 if (found != NULL)
6327   JUMPHERE(found);
6328 #endif
6329 
6330 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6331 
6332 if (common->mode != PCRE2_JIT_COMPLETE)
6333   JUMPHERE(partial_quit);
6334 
6335 if (common->match_end_ptr != 0)
6336   OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
6337 }
6338 
search_requested_char(compiler_common * common,PCRE2_UCHAR req_char,BOOL caseless,BOOL has_firstchar)6339 static SLJIT_INLINE jump_list *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar)
6340 {
6341 DEFINE_COMPILER;
6342 struct sljit_label *loop;
6343 struct sljit_jump *toolong;
6344 struct sljit_jump *already_found;
6345 struct sljit_jump *found;
6346 struct sljit_jump *found_oc = NULL;
6347 jump_list *not_found = NULL;
6348 sljit_u32 oc, bit;
6349 
6350 SLJIT_ASSERT(common->req_char_ptr != 0);
6351 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(REQ_CU_MAX) * 100);
6352 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
6353 toolong = CMP(SLJIT_LESS, TMP2, 0, STR_END, 0);
6354 already_found = CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0);
6355 
6356 if (has_firstchar)
6357   OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6358 else
6359   OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
6360 
6361 oc = req_char;
6362 if (caseless)
6363   {
6364   oc = TABLE_GET(req_char, common->fcc, req_char);
6365 #if defined SUPPORT_UNICODE
6366   if (req_char > 127 && (common->utf || common->ucp))
6367     oc = UCD_OTHERCASE(req_char);
6368 #endif
6369   }
6370 
6371 #ifdef JIT_HAS_FAST_REQUESTED_CHAR_SIMD
6372 if (JIT_HAS_FAST_REQUESTED_CHAR_SIMD)
6373   {
6374   not_found = fast_requested_char_simd(common, req_char, oc);
6375   }
6376 else
6377 #endif
6378   {
6379   loop = LABEL();
6380   add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
6381 
6382   OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
6383 
6384   if (req_char == oc)
6385     found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6386   else
6387     {
6388     bit = req_char ^ oc;
6389     if (is_powerof2(bit))
6390       {
6391        OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
6392       found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
6393       }
6394     else
6395       {
6396       found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6397       found_oc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
6398       }
6399     }
6400   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6401   JUMPTO(SLJIT_JUMP, loop);
6402 
6403   JUMPHERE(found);
6404   if (found_oc)
6405     JUMPHERE(found_oc);
6406   }
6407 
6408 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
6409 
6410 JUMPHERE(already_found);
6411 JUMPHERE(toolong);
6412 return not_found;
6413 }
6414 
do_revertframes(compiler_common * common)6415 static void do_revertframes(compiler_common *common)
6416 {
6417 DEFINE_COMPILER;
6418 struct sljit_jump *jump;
6419 struct sljit_label *mainloop;
6420 
6421 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
6422 GET_LOCAL_BASE(TMP1, 0, 0);
6423 
6424 /* Drop frames until we reach STACK_TOP. */
6425 mainloop = LABEL();
6426 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -sizeof(sljit_sw));
6427 jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6428 
6429 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6430 if (HAS_VIRTUAL_REGISTERS)
6431   {
6432   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6433   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
6434   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
6435   }
6436 else
6437   {
6438   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6439   OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
6440   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
6441   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
6442   GET_LOCAL_BASE(TMP1, 0, 0);
6443   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0);
6444   }
6445 JUMPTO(SLJIT_JUMP, mainloop);
6446 
6447 JUMPHERE(jump);
6448 jump = CMP(SLJIT_NOT_ZERO /* SIG_LESS */, TMP2, 0, SLJIT_IMM, 0);
6449 /* End of reverting values. */
6450 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
6451 
6452 JUMPHERE(jump);
6453 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
6454 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6455 if (HAS_VIRTUAL_REGISTERS)
6456   {
6457   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6458   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
6459   }
6460 else
6461   {
6462   OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6463   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
6464   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0);
6465   }
6466 JUMPTO(SLJIT_JUMP, mainloop);
6467 }
6468 
check_wordboundary(compiler_common * common)6469 static void check_wordboundary(compiler_common *common)
6470 {
6471 DEFINE_COMPILER;
6472 struct sljit_jump *skipread;
6473 jump_list *skipread_list = NULL;
6474 #ifdef SUPPORT_UNICODE
6475 struct sljit_label *valid_utf;
6476 jump_list *invalid_utf1 = NULL;
6477 #endif /* SUPPORT_UNICODE */
6478 jump_list *invalid_utf2 = NULL;
6479 #if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE
6480 struct sljit_jump *jump;
6481 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 || SUPPORT_UNICODE */
6482 
6483 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
6484 
6485 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6486 /* Get type of the previous char, and put it to TMP3. */
6487 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6488 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6489 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
6490 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6491 
6492 #ifdef SUPPORT_UNICODE
6493 if (common->invalid_utf)
6494   {
6495   peek_char_back(common, READ_CHAR_MAX, &invalid_utf1);
6496 
6497   if (common->mode != PCRE2_JIT_COMPLETE)
6498     {
6499     OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
6500     move_back(common, NULL, TRUE);
6501     check_start_used_ptr(common);
6502     OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
6503     }
6504   }
6505 else
6506 #endif /* SUPPORT_UNICODE */
6507   {
6508   if (common->mode == PCRE2_JIT_COMPLETE)
6509     peek_char_back(common, READ_CHAR_MAX, NULL);
6510   else
6511     {
6512     move_back(common, NULL, TRUE);
6513     check_start_used_ptr(common);
6514     read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR);
6515     }
6516   }
6517 
6518 /* Testing char type. */
6519 #ifdef SUPPORT_UNICODE
6520 if (common->ucp)
6521   {
6522   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
6523   jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
6524   add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6525   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
6526   OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
6527   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6528   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
6529   OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
6530   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6531   JUMPHERE(jump);
6532   OP1(SLJIT_MOV, TMP3, 0, TMP2, 0);
6533   }
6534 else
6535 #endif /* SUPPORT_UNICODE */
6536   {
6537 #if PCRE2_CODE_UNIT_WIDTH != 8
6538   jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6539 #elif defined SUPPORT_UNICODE
6540   /* Here TMP3 has already been zeroed. */
6541   jump = NULL;
6542   if (common->utf)
6543     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6544 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6545   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
6546   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
6547   OP2(SLJIT_AND, TMP3, 0, TMP1, 0, SLJIT_IMM, 1);
6548 #if PCRE2_CODE_UNIT_WIDTH != 8
6549   JUMPHERE(jump);
6550 #elif defined SUPPORT_UNICODE
6551   if (jump != NULL)
6552     JUMPHERE(jump);
6553 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6554   }
6555 JUMPHERE(skipread);
6556 
6557 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6558 check_str_end(common, &skipread_list);
6559 peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf2);
6560 
6561 /* Testing char type. This is a code duplication. */
6562 #ifdef SUPPORT_UNICODE
6563 
6564 valid_utf = LABEL();
6565 
6566 if (common->ucp)
6567   {
6568   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
6569   jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
6570   add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6571   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
6572   OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
6573   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6574   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
6575   OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
6576   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6577   JUMPHERE(jump);
6578   }
6579 else
6580 #endif /* SUPPORT_UNICODE */
6581   {
6582 #if PCRE2_CODE_UNIT_WIDTH != 8
6583   /* TMP2 may be destroyed by peek_char. */
6584   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6585   jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6586 #elif defined SUPPORT_UNICODE
6587   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6588   jump = NULL;
6589   if (common->utf)
6590     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6591 #endif
6592   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
6593   OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
6594   OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
6595 #if PCRE2_CODE_UNIT_WIDTH != 8
6596   JUMPHERE(jump);
6597 #elif defined SUPPORT_UNICODE
6598   if (jump != NULL)
6599     JUMPHERE(jump);
6600 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6601   }
6602 set_jumps(skipread_list, LABEL());
6603 
6604 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6605 OP2(SLJIT_XOR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, TMP3, 0);
6606 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6607 
6608 #ifdef SUPPORT_UNICODE
6609 if (common->invalid_utf)
6610   {
6611   set_jumps(invalid_utf1, LABEL());
6612 
6613   peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, NULL);
6614   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR, valid_utf);
6615 
6616   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6617   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, -1);
6618   OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6619 
6620   set_jumps(invalid_utf2, LABEL());
6621   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6622   OP1(SLJIT_MOV, TMP2, 0, TMP3, 0);
6623   OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6624   }
6625 #endif /* SUPPORT_UNICODE */
6626 }
6627 
optimize_class_ranges(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)6628 static BOOL optimize_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6629 {
6630 /* May destroy TMP1. */
6631 DEFINE_COMPILER;
6632 int ranges[MAX_CLASS_RANGE_SIZE];
6633 sljit_u8 bit, cbit, all;
6634 int i, byte, length = 0;
6635 
6636 bit = bits[0] & 0x1;
6637 /* All bits will be zero or one (since bit is zero or one). */
6638 all = -bit;
6639 
6640 for (i = 0; i < 256; )
6641   {
6642   byte = i >> 3;
6643   if ((i & 0x7) == 0 && bits[byte] == all)
6644     i += 8;
6645   else
6646     {
6647     cbit = (bits[byte] >> (i & 0x7)) & 0x1;
6648     if (cbit != bit)
6649       {
6650       if (length >= MAX_CLASS_RANGE_SIZE)
6651         return FALSE;
6652       ranges[length] = i;
6653       length++;
6654       bit = cbit;
6655       all = -cbit;
6656       }
6657     i++;
6658     }
6659   }
6660 
6661 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
6662   {
6663   if (length >= MAX_CLASS_RANGE_SIZE)
6664     return FALSE;
6665   ranges[length] = 256;
6666   length++;
6667   }
6668 
6669 if (length < 0 || length > 4)
6670   return FALSE;
6671 
6672 bit = bits[0] & 0x1;
6673 if (invert) bit ^= 0x1;
6674 
6675 /* No character is accepted. */
6676 if (length == 0 && bit == 0)
6677   add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6678 
6679 switch(length)
6680   {
6681   case 0:
6682   /* When bit != 0, all characters are accepted. */
6683   return TRUE;
6684 
6685   case 1:
6686   add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6687   return TRUE;
6688 
6689   case 2:
6690   if (ranges[0] + 1 != ranges[1])
6691     {
6692     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6693     add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6694     }
6695   else
6696     add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6697   return TRUE;
6698 
6699   case 3:
6700   if (bit != 0)
6701     {
6702     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
6703     if (ranges[0] + 1 != ranges[1])
6704       {
6705       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6706       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6707       }
6708     else
6709       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6710     return TRUE;
6711     }
6712 
6713   add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
6714   if (ranges[1] + 1 != ranges[2])
6715     {
6716     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
6717     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
6718     }
6719   else
6720     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
6721   return TRUE;
6722 
6723   case 4:
6724   if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
6725       && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
6726       && (ranges[1] & (ranges[2] - ranges[0])) == 0
6727       && is_powerof2(ranges[2] - ranges[0]))
6728     {
6729     SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
6730     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
6731     if (ranges[2] + 1 != ranges[3])
6732       {
6733       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
6734       add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
6735       }
6736     else
6737       add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
6738     return TRUE;
6739     }
6740 
6741   if (bit != 0)
6742     {
6743     i = 0;
6744     if (ranges[0] + 1 != ranges[1])
6745       {
6746       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6747       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6748       i = ranges[0];
6749       }
6750     else
6751       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6752 
6753     if (ranges[2] + 1 != ranges[3])
6754       {
6755       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
6756       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
6757       }
6758     else
6759       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
6760     return TRUE;
6761     }
6762 
6763   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6764   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
6765   if (ranges[1] + 1 != ranges[2])
6766     {
6767     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
6768     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
6769     }
6770   else
6771     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6772   return TRUE;
6773 
6774   default:
6775   SLJIT_UNREACHABLE();
6776   return FALSE;
6777   }
6778 }
6779 
optimize_class_chars(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)6780 static BOOL optimize_class_chars(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6781 {
6782 /* May destroy TMP1. */
6783 DEFINE_COMPILER;
6784 uint16_t char_list[MAX_CLASS_CHARS_SIZE];
6785 uint8_t byte;
6786 sljit_s32 type;
6787 int i, j, k, len, c;
6788 
6789 if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
6790   return FALSE;
6791 
6792 len = 0;
6793 
6794 for (i = 0; i < 32; i++)
6795   {
6796   byte = bits[i];
6797 
6798   if (nclass)
6799     byte = ~byte;
6800 
6801   j = 0;
6802   while (byte != 0)
6803     {
6804     if (byte & 0x1)
6805       {
6806       c = i * 8 + j;
6807 
6808       k = len;
6809 
6810       if ((c & 0x20) != 0)
6811         {
6812         for (k = 0; k < len; k++)
6813           if (char_list[k] == c - 0x20)
6814             {
6815             char_list[k] |= 0x120;
6816             break;
6817             }
6818         }
6819 
6820       if (k == len)
6821         {
6822         if (len >= MAX_CLASS_CHARS_SIZE)
6823           return FALSE;
6824 
6825         char_list[len++] = (uint16_t) c;
6826         }
6827       }
6828 
6829     byte >>= 1;
6830     j++;
6831     }
6832   }
6833 
6834 if (len == 0) return FALSE;  /* Should never occur, but stops analyzers complaining. */
6835 
6836 i = 0;
6837 j = 0;
6838 
6839 if (char_list[0] == 0)
6840   {
6841   i++;
6842   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0);
6843   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_ZERO);
6844   }
6845 else
6846   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6847 
6848 while (i < len)
6849   {
6850   if ((char_list[i] & 0x100) != 0)
6851     j++;
6852   else
6853     {
6854     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i]);
6855     CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
6856     }
6857   i++;
6858   }
6859 
6860 if (j != 0)
6861   {
6862   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x20);
6863 
6864   for (i = 0; i < len; i++)
6865     if ((char_list[i] & 0x100) != 0)
6866       {
6867       j--;
6868       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff);
6869       CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
6870       }
6871   }
6872 
6873 if (invert)
6874   nclass = !nclass;
6875 
6876 type = nclass ? SLJIT_NOT_EQUAL : SLJIT_EQUAL;
6877 add_jump(compiler, backtracks, CMP(type, TMP2, 0, SLJIT_IMM, 0));
6878 return TRUE;
6879 }
6880 
optimize_class(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)6881 static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6882 {
6883 /* May destroy TMP1. */
6884 if (optimize_class_ranges(common, bits, nclass, invert, backtracks))
6885   return TRUE;
6886 return optimize_class_chars(common, bits, nclass, invert, backtracks);
6887 }
6888 
check_anynewline(compiler_common * common)6889 static void check_anynewline(compiler_common *common)
6890 {
6891 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
6892 DEFINE_COMPILER;
6893 
6894 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
6895 
6896 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
6897 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
6898 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6899 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
6900 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6901 #if PCRE2_CODE_UNIT_WIDTH == 8
6902 if (common->utf)
6903   {
6904 #endif
6905   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6906   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
6907   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
6908 #if PCRE2_CODE_UNIT_WIDTH == 8
6909   }
6910 #endif
6911 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
6912 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
6913 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
6914 }
6915 
check_hspace(compiler_common * common)6916 static void check_hspace(compiler_common *common)
6917 {
6918 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
6919 DEFINE_COMPILER;
6920 
6921 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
6922 
6923 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
6924 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6925 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
6926 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6927 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
6928 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6929 #if PCRE2_CODE_UNIT_WIDTH == 8
6930 if (common->utf)
6931   {
6932 #endif
6933   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6934   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
6935   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6936   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
6937   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6938   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
6939   OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
6940   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6941   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
6942   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6943   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
6944   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6945   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
6946 #if PCRE2_CODE_UNIT_WIDTH == 8
6947   }
6948 #endif
6949 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
6950 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
6951 
6952 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
6953 }
6954 
check_vspace(compiler_common * common)6955 static void check_vspace(compiler_common *common)
6956 {
6957 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
6958 DEFINE_COMPILER;
6959 
6960 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
6961 
6962 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
6963 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
6964 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6965 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
6966 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6967 #if PCRE2_CODE_UNIT_WIDTH == 8
6968 if (common->utf)
6969   {
6970 #endif
6971   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6972   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
6973   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
6974 #if PCRE2_CODE_UNIT_WIDTH == 8
6975   }
6976 #endif
6977 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
6978 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
6979 
6980 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
6981 }
6982 
do_casefulcmp(compiler_common * common)6983 static void do_casefulcmp(compiler_common *common)
6984 {
6985 DEFINE_COMPILER;
6986 struct sljit_jump *jump;
6987 struct sljit_label *label;
6988 int char1_reg;
6989 int char2_reg;
6990 
6991 if (HAS_VIRTUAL_REGISTERS)
6992   {
6993   char1_reg = STR_END;
6994   char2_reg = STACK_TOP;
6995   }
6996 else
6997   {
6998   char1_reg = TMP3;
6999   char2_reg = RETURN_ADDR;
7000   }
7001 
7002 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7003 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7004 
7005 if (char1_reg == STR_END)
7006   {
7007   OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0);
7008   OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
7009   }
7010 
7011 if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7012   {
7013   label = LABEL();
7014   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7015   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7016   jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7017   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7018   JUMPTO(SLJIT_NOT_ZERO, label);
7019 
7020   JUMPHERE(jump);
7021   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7022   }
7023 else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7024   {
7025   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7026   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7027 
7028   label = LABEL();
7029   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7030   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7031   jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7032   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7033   JUMPTO(SLJIT_NOT_ZERO, label);
7034 
7035   JUMPHERE(jump);
7036   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7037   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7038   }
7039 else
7040   {
7041   label = LABEL();
7042   OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7043   OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7044   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7045   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7046   jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7047   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7048   JUMPTO(SLJIT_NOT_ZERO, label);
7049 
7050   JUMPHERE(jump);
7051   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7052   }
7053 
7054 if (char1_reg == STR_END)
7055   {
7056   OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0);
7057   OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);
7058   }
7059 
7060 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7061 }
7062 
do_caselesscmp(compiler_common * common)7063 static void do_caselesscmp(compiler_common *common)
7064 {
7065 DEFINE_COMPILER;
7066 struct sljit_jump *jump;
7067 struct sljit_label *label;
7068 int char1_reg = STR_END;
7069 int char2_reg;
7070 int lcc_table;
7071 int opt_type = 0;
7072 
7073 if (HAS_VIRTUAL_REGISTERS)
7074   {
7075   char2_reg = STACK_TOP;
7076   lcc_table = STACK_LIMIT;
7077   }
7078 else
7079   {
7080   char2_reg = RETURN_ADDR;
7081   lcc_table = TMP3;
7082   }
7083 
7084 if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7085   opt_type = 1;
7086 else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7087   opt_type = 2;
7088 
7089 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7090 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7091 
7092 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0);
7093 
7094 if (char2_reg == STACK_TOP)
7095   {
7096   OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0);
7097   OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0);
7098   }
7099 
7100 OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
7101 
7102 if (opt_type == 1)
7103   {
7104   label = LABEL();
7105   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7106   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7107   }
7108 else if (opt_type == 2)
7109   {
7110   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7111   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7112 
7113   label = LABEL();
7114   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7115   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7116   }
7117 else
7118   {
7119   label = LABEL();
7120   OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7121   OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7122   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7123   }
7124 
7125 #if PCRE2_CODE_UNIT_WIDTH != 8
7126 jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255);
7127 #endif
7128 OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0);
7129 #if PCRE2_CODE_UNIT_WIDTH != 8
7130 JUMPHERE(jump);
7131 jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255);
7132 #endif
7133 OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0);
7134 #if PCRE2_CODE_UNIT_WIDTH != 8
7135 JUMPHERE(jump);
7136 #endif
7137 
7138 if (opt_type == 0)
7139   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7140 
7141 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7142 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7143 JUMPTO(SLJIT_NOT_ZERO, label);
7144 
7145 JUMPHERE(jump);
7146 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7147 
7148 if (opt_type == 2)
7149   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7150 
7151 if (char2_reg == STACK_TOP)
7152   {
7153   OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0);
7154   OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0);
7155   }
7156 
7157 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
7158 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7159 }
7160 
byte_sequence_compare(compiler_common * common,BOOL caseless,PCRE2_SPTR cc,compare_context * context,jump_list ** backtracks)7161 static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc,
7162     compare_context *context, jump_list **backtracks)
7163 {
7164 DEFINE_COMPILER;
7165 unsigned int othercasebit = 0;
7166 PCRE2_SPTR othercasechar = NULL;
7167 #ifdef SUPPORT_UNICODE
7168 int utflength;
7169 #endif
7170 
7171 if (caseless && char_has_othercase(common, cc))
7172   {
7173   othercasebit = char_get_othercase_bit(common, cc);
7174   SLJIT_ASSERT(othercasebit);
7175   /* Extracting bit difference info. */
7176 #if PCRE2_CODE_UNIT_WIDTH == 8
7177   othercasechar = cc + (othercasebit >> 8);
7178   othercasebit &= 0xff;
7179 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7180   /* Note that this code only handles characters in the BMP. If there
7181   ever are characters outside the BMP whose othercase differs in only one
7182   bit from itself (there currently are none), this code will need to be
7183   revised for PCRE2_CODE_UNIT_WIDTH == 32. */
7184   othercasechar = cc + (othercasebit >> 9);
7185   if ((othercasebit & 0x100) != 0)
7186     othercasebit = (othercasebit & 0xff) << 8;
7187   else
7188     othercasebit &= 0xff;
7189 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7190   }
7191 
7192 if (context->sourcereg == -1)
7193   {
7194 #if PCRE2_CODE_UNIT_WIDTH == 8
7195 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7196   if (context->length >= 4)
7197     OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7198   else if (context->length >= 2)
7199     OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7200   else
7201 #endif
7202     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7203 #elif PCRE2_CODE_UNIT_WIDTH == 16
7204 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7205   if (context->length >= 4)
7206     OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7207   else
7208 #endif
7209     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7210 #elif PCRE2_CODE_UNIT_WIDTH == 32
7211   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7212 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7213   context->sourcereg = TMP2;
7214   }
7215 
7216 #ifdef SUPPORT_UNICODE
7217 utflength = 1;
7218 if (common->utf && HAS_EXTRALEN(*cc))
7219   utflength += GET_EXTRALEN(*cc);
7220 
7221 do
7222   {
7223 #endif
7224 
7225   context->length -= IN_UCHARS(1);
7226 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7227 
7228   /* Unaligned read is supported. */
7229   if (othercasebit != 0 && othercasechar == cc)
7230     {
7231     context->c.asuchars[context->ucharptr] = *cc | othercasebit;
7232     context->oc.asuchars[context->ucharptr] = othercasebit;
7233     }
7234   else
7235     {
7236     context->c.asuchars[context->ucharptr] = *cc;
7237     context->oc.asuchars[context->ucharptr] = 0;
7238     }
7239   context->ucharptr++;
7240 
7241 #if PCRE2_CODE_UNIT_WIDTH == 8
7242   if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
7243 #else
7244   if (context->ucharptr >= 2 || context->length == 0)
7245 #endif
7246     {
7247     if (context->length >= 4)
7248       OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7249     else if (context->length >= 2)
7250       OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7251 #if PCRE2_CODE_UNIT_WIDTH == 8
7252     else if (context->length >= 1)
7253       OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7254 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7255     context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7256 
7257     switch(context->ucharptr)
7258       {
7259       case 4 / sizeof(PCRE2_UCHAR):
7260       if (context->oc.asint != 0)
7261         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
7262       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
7263       break;
7264 
7265       case 2 / sizeof(PCRE2_UCHAR):
7266       if (context->oc.asushort != 0)
7267         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
7268       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
7269       break;
7270 
7271 #if PCRE2_CODE_UNIT_WIDTH == 8
7272       case 1:
7273       if (context->oc.asbyte != 0)
7274         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
7275       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
7276       break;
7277 #endif
7278 
7279       default:
7280       SLJIT_UNREACHABLE();
7281       break;
7282       }
7283     context->ucharptr = 0;
7284     }
7285 
7286 #else
7287 
7288   /* Unaligned read is unsupported or in 32 bit mode. */
7289   if (context->length >= 1)
7290     OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7291 
7292   context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7293 
7294   if (othercasebit != 0 && othercasechar == cc)
7295     {
7296     OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
7297     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
7298     }
7299   else
7300     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
7301 
7302 #endif
7303 
7304   cc++;
7305 #ifdef SUPPORT_UNICODE
7306   utflength--;
7307   }
7308 while (utflength > 0);
7309 #endif
7310 
7311 return cc;
7312 }
7313 
7314 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
7315 
7316 #define SET_TYPE_OFFSET(value) \
7317   if ((value) != typeoffset) \
7318     { \
7319     if ((value) < typeoffset) \
7320       OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
7321     else \
7322       OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
7323     } \
7324   typeoffset = (value);
7325 
7326 #define SET_CHAR_OFFSET(value) \
7327   if ((value) != charoffset) \
7328     { \
7329     if ((value) < charoffset) \
7330       OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
7331     else \
7332       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
7333     } \
7334   charoffset = (value);
7335 
7336 static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr);
7337 
compile_xclass_matchingpath(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks)7338 static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
7339 {
7340 DEFINE_COMPILER;
7341 jump_list *found = NULL;
7342 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
7343 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
7344 struct sljit_jump *jump = NULL;
7345 PCRE2_SPTR ccbegin;
7346 int compares, invertcmp, numberofcmps;
7347 #if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7348 BOOL utf = common->utf;
7349 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
7350 
7351 #ifdef SUPPORT_UNICODE
7352 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
7353 BOOL charsaved = FALSE;
7354 int typereg = TMP1;
7355 const sljit_u32 *other_cases;
7356 sljit_uw typeoffset;
7357 #endif /* SUPPORT_UNICODE */
7358 
7359 /* Scanning the necessary info. */
7360 cc++;
7361 ccbegin = cc;
7362 compares = 0;
7363 
7364 if (cc[-1] & XCL_MAP)
7365   {
7366   min = 0;
7367   cc += 32 / sizeof(PCRE2_UCHAR);
7368   }
7369 
7370 while (*cc != XCL_END)
7371   {
7372   compares++;
7373   if (*cc == XCL_SINGLE)
7374     {
7375     cc ++;
7376     GETCHARINCTEST(c, cc);
7377     if (c > max) max = c;
7378     if (c < min) min = c;
7379 #ifdef SUPPORT_UNICODE
7380     needschar = TRUE;
7381 #endif /* SUPPORT_UNICODE */
7382     }
7383   else if (*cc == XCL_RANGE)
7384     {
7385     cc ++;
7386     GETCHARINCTEST(c, cc);
7387     if (c < min) min = c;
7388     GETCHARINCTEST(c, cc);
7389     if (c > max) max = c;
7390 #ifdef SUPPORT_UNICODE
7391     needschar = TRUE;
7392 #endif /* SUPPORT_UNICODE */
7393     }
7394 #ifdef SUPPORT_UNICODE
7395   else
7396     {
7397     SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7398     cc++;
7399     if (*cc == PT_CLIST)
7400       {
7401       other_cases = PRIV(ucd_caseless_sets) + cc[1];
7402       while (*other_cases != NOTACHAR)
7403         {
7404         if (*other_cases > max) max = *other_cases;
7405         if (*other_cases < min) min = *other_cases;
7406         other_cases++;
7407         }
7408       }
7409     else
7410       {
7411       max = READ_CHAR_MAX;
7412       min = 0;
7413       }
7414 
7415     switch(*cc)
7416       {
7417       case PT_ANY:
7418       /* Any either accepts everything or ignored. */
7419       if (cc[-1] == XCL_PROP)
7420         {
7421         compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
7422         if (list == backtracks)
7423           add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7424         return;
7425         }
7426       break;
7427 
7428       case PT_LAMP:
7429       case PT_GC:
7430       case PT_PC:
7431       case PT_ALNUM:
7432       needstype = TRUE;
7433       break;
7434 
7435       case PT_SC:
7436       needsscript = TRUE;
7437       break;
7438 
7439       case PT_SPACE:
7440       case PT_PXSPACE:
7441       case PT_WORD:
7442       case PT_PXGRAPH:
7443       case PT_PXPRINT:
7444       case PT_PXPUNCT:
7445       needstype = TRUE;
7446       needschar = TRUE;
7447       break;
7448 
7449       case PT_CLIST:
7450       case PT_UCNC:
7451       needschar = TRUE;
7452       break;
7453 
7454       default:
7455       SLJIT_UNREACHABLE();
7456       break;
7457       }
7458     cc += 2;
7459     }
7460 #endif /* SUPPORT_UNICODE */
7461   }
7462 SLJIT_ASSERT(compares > 0);
7463 
7464 /* We are not necessary in utf mode even in 8 bit mode. */
7465 cc = ccbegin;
7466 if ((cc[-1] & XCL_NOT) != 0)
7467   read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR);
7468 else
7469   {
7470 #ifdef SUPPORT_UNICODE
7471   read_char(common, min, max, (needstype || needsscript) ? backtracks : NULL, 0);
7472 #else /* !SUPPORT_UNICODE */
7473   read_char(common, min, max, NULL, 0);
7474 #endif /* SUPPORT_UNICODE */
7475   }
7476 
7477 if ((cc[-1] & XCL_HASPROP) == 0)
7478   {
7479   if ((cc[-1] & XCL_MAP) != 0)
7480     {
7481     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7482     if (!optimize_class(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
7483       {
7484       OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7485       OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7486       OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7487       OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7488       OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
7489       add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
7490       }
7491 
7492     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7493     JUMPHERE(jump);
7494 
7495     cc += 32 / sizeof(PCRE2_UCHAR);
7496     }
7497   else
7498     {
7499     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
7500     add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
7501     }
7502   }
7503 else if ((cc[-1] & XCL_MAP) != 0)
7504   {
7505   OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7506 #ifdef SUPPORT_UNICODE
7507   charsaved = TRUE;
7508 #endif /* SUPPORT_UNICODE */
7509   if (!optimize_class(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
7510     {
7511 #if PCRE2_CODE_UNIT_WIDTH == 8
7512     jump = NULL;
7513     if (common->utf)
7514 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7515       jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7516 
7517     OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7518     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7519     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7520     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7521     OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
7522     add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
7523 
7524 #if PCRE2_CODE_UNIT_WIDTH == 8
7525     if (common->utf)
7526 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7527       JUMPHERE(jump);
7528     }
7529 
7530   OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7531   cc += 32 / sizeof(PCRE2_UCHAR);
7532   }
7533 
7534 #ifdef SUPPORT_UNICODE
7535 if (needstype || needsscript)
7536   {
7537   if (needschar && !charsaved)
7538     OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7539 
7540 #if PCRE2_CODE_UNIT_WIDTH == 32
7541   if (!common->utf)
7542     {
7543     jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
7544     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
7545     JUMPHERE(jump);
7546     }
7547 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
7548 
7549   OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7550   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
7551   OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
7552   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
7553   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7554   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7555   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
7556   OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
7557 
7558   /* Before anything else, we deal with scripts. */
7559   if (needsscript)
7560     {
7561     OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
7562     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
7563     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7564 
7565     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
7566 
7567     ccbegin = cc;
7568 
7569     while (*cc != XCL_END)
7570       {
7571       if (*cc == XCL_SINGLE)
7572         {
7573         cc ++;
7574         GETCHARINCTEST(c, cc);
7575         }
7576       else if (*cc == XCL_RANGE)
7577         {
7578         cc ++;
7579         GETCHARINCTEST(c, cc);
7580         GETCHARINCTEST(c, cc);
7581         }
7582       else
7583         {
7584         SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7585         cc++;
7586         if (*cc == PT_SC)
7587           {
7588           compares--;
7589           invertcmp = (compares == 0 && list != backtracks);
7590           if (cc[-1] == XCL_NOTPROP)
7591             invertcmp ^= 0x1;
7592           jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
7593           add_jump(compiler, compares > 0 ? list : backtracks, jump);
7594           }
7595         cc += 2;
7596         }
7597       }
7598 
7599     cc = ccbegin;
7600 
7601     if (needstype)
7602       {
7603       /* TMP2 has already been shifted by 2 */
7604       if (!needschar)
7605         {
7606         OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP2, 0);
7607         OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7608 
7609         OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7610         }
7611       else
7612         {
7613         OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP2, 0);
7614         OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
7615 
7616         OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7617         OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7618         typereg = RETURN_ADDR;
7619         }
7620       }
7621     else if (needschar)
7622       OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7623     }
7624   else if (needstype)
7625     {
7626     OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
7627     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
7628 
7629     if (!needschar)
7630       {
7631       OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7632 
7633       OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7634       }
7635     else
7636       {
7637       OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
7638 
7639       OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7640       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7641       typereg = RETURN_ADDR;
7642       }
7643     }
7644   else if (needschar)
7645     OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7646   }
7647 #endif /* SUPPORT_UNICODE */
7648 
7649 /* Generating code. */
7650 charoffset = 0;
7651 numberofcmps = 0;
7652 #ifdef SUPPORT_UNICODE
7653 typeoffset = 0;
7654 #endif /* SUPPORT_UNICODE */
7655 
7656 while (*cc != XCL_END)
7657   {
7658   compares--;
7659   invertcmp = (compares == 0 && list != backtracks);
7660   jump = NULL;
7661 
7662   if (*cc == XCL_SINGLE)
7663     {
7664     cc ++;
7665     GETCHARINCTEST(c, cc);
7666 
7667     if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
7668       {
7669       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7670       OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7671       numberofcmps++;
7672       }
7673     else if (numberofcmps > 0)
7674       {
7675       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7676       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7677       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7678       numberofcmps = 0;
7679       }
7680     else
7681       {
7682       jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7683       numberofcmps = 0;
7684       }
7685     }
7686   else if (*cc == XCL_RANGE)
7687     {
7688     cc ++;
7689     GETCHARINCTEST(c, cc);
7690     SET_CHAR_OFFSET(c);
7691     GETCHARINCTEST(c, cc);
7692 
7693     if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
7694       {
7695       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7696       OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7697       numberofcmps++;
7698       }
7699     else if (numberofcmps > 0)
7700       {
7701       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7702       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
7703       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7704       numberofcmps = 0;
7705       }
7706     else
7707       {
7708       jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7709       numberofcmps = 0;
7710       }
7711     }
7712 #ifdef SUPPORT_UNICODE
7713   else
7714     {
7715     SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7716     if (*cc == XCL_NOTPROP)
7717       invertcmp ^= 0x1;
7718     cc++;
7719     switch(*cc)
7720       {
7721       case PT_ANY:
7722       if (!invertcmp)
7723         jump = JUMP(SLJIT_JUMP);
7724       break;
7725 
7726       case PT_LAMP:
7727       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
7728       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7729       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
7730       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7731       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
7732       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7733       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7734       break;
7735 
7736       case PT_GC:
7737       c = PRIV(ucp_typerange)[(int)cc[1] * 2];
7738       SET_TYPE_OFFSET(c);
7739       jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
7740       break;
7741 
7742       case PT_PC:
7743       jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
7744       break;
7745 
7746       case PT_SC:
7747       compares++;
7748       /* Do nothing. */
7749       break;
7750 
7751       case PT_SPACE:
7752       case PT_PXSPACE:
7753       SET_CHAR_OFFSET(9);
7754       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
7755       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7756 
7757       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
7758       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7759 
7760       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
7761       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7762 
7763       SET_TYPE_OFFSET(ucp_Zl);
7764       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
7765       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
7766       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7767       break;
7768 
7769       case PT_WORD:
7770       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
7771       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7772       /* Fall through. */
7773 
7774       case PT_ALNUM:
7775       SET_TYPE_OFFSET(ucp_Ll);
7776       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
7777       OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7778       SET_TYPE_OFFSET(ucp_Nd);
7779       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
7780       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
7781       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7782       break;
7783 
7784       case PT_CLIST:
7785       other_cases = PRIV(ucd_caseless_sets) + cc[1];
7786 
7787       /* At least three characters are required.
7788          Otherwise this case would be handled by the normal code path. */
7789       SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
7790       SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
7791 
7792       /* Optimizing character pairs, if their difference is power of 2. */
7793       if (is_powerof2(other_cases[1] ^ other_cases[0]))
7794         {
7795         if (charoffset == 0)
7796           OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
7797         else
7798           {
7799           OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
7800           OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
7801           }
7802         OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
7803         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7804         other_cases += 2;
7805         }
7806       else if (is_powerof2(other_cases[2] ^ other_cases[1]))
7807         {
7808         if (charoffset == 0)
7809           OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
7810         else
7811           {
7812           OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
7813           OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
7814           }
7815         OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
7816         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7817 
7818         OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
7819         OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
7820 
7821         other_cases += 3;
7822         }
7823       else
7824         {
7825         OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
7826         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7827         }
7828 
7829       while (*other_cases != NOTACHAR)
7830         {
7831         OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
7832         OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
7833         }
7834       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7835       break;
7836 
7837       case PT_UCNC:
7838       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
7839       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7840       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
7841       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7842       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
7843       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7844 
7845       SET_CHAR_OFFSET(0xa0);
7846       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
7847       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7848       SET_CHAR_OFFSET(0);
7849       OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
7850       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
7851       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7852       break;
7853 
7854       case PT_PXGRAPH:
7855       /* C and Z groups are the farthest two groups. */
7856       SET_TYPE_OFFSET(ucp_Ll);
7857       OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
7858       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
7859 
7860       jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
7861 
7862       /* In case of ucp_Cf, we overwrite the result. */
7863       SET_CHAR_OFFSET(0x2066);
7864       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
7865       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7866 
7867       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
7868       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7869 
7870       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
7871       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7872 
7873       JUMPHERE(jump);
7874       jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
7875       break;
7876 
7877       case PT_PXPRINT:
7878       /* C and Z groups are the farthest two groups. */
7879       SET_TYPE_OFFSET(ucp_Ll);
7880       OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
7881       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
7882 
7883       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
7884       OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_NOT_EQUAL);
7885 
7886       jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
7887 
7888       /* In case of ucp_Cf, we overwrite the result. */
7889       SET_CHAR_OFFSET(0x2066);
7890       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
7891       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7892 
7893       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
7894       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7895 
7896       JUMPHERE(jump);
7897       jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
7898       break;
7899 
7900       case PT_PXPUNCT:
7901       SET_TYPE_OFFSET(ucp_Sc);
7902       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
7903       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7904 
7905       SET_CHAR_OFFSET(0);
7906       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
7907       OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
7908 
7909       SET_TYPE_OFFSET(ucp_Pc);
7910       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
7911       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
7912       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7913       break;
7914 
7915       default:
7916       SLJIT_UNREACHABLE();
7917       break;
7918       }
7919     cc += 2;
7920     }
7921 #endif /* SUPPORT_UNICODE */
7922 
7923   if (jump != NULL)
7924     add_jump(compiler, compares > 0 ? list : backtracks, jump);
7925   }
7926 
7927 if (found != NULL)
7928   set_jumps(found, LABEL());
7929 }
7930 
7931 #undef SET_TYPE_OFFSET
7932 #undef SET_CHAR_OFFSET
7933 
7934 #endif
7935 
compile_simple_assertion_matchingpath(compiler_common * common,PCRE2_UCHAR type,PCRE2_SPTR cc,jump_list ** backtracks)7936 static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks)
7937 {
7938 DEFINE_COMPILER;
7939 int length;
7940 struct sljit_jump *jump[4];
7941 #ifdef SUPPORT_UNICODE
7942 struct sljit_label *label;
7943 #endif /* SUPPORT_UNICODE */
7944 
7945 switch(type)
7946   {
7947   case OP_SOD:
7948   if (HAS_VIRTUAL_REGISTERS)
7949     {
7950     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7951     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
7952     }
7953   else
7954     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
7955   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
7956   return cc;
7957 
7958   case OP_SOM:
7959   if (HAS_VIRTUAL_REGISTERS)
7960     {
7961     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7962     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
7963     }
7964   else
7965     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
7966   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
7967   return cc;
7968 
7969   case OP_NOT_WORD_BOUNDARY:
7970   case OP_WORD_BOUNDARY:
7971   add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
7972 #ifdef SUPPORT_UNICODE
7973   if (common->invalid_utf)
7974     {
7975     add_jump(compiler, backtracks, CMP((type == OP_NOT_WORD_BOUNDARY) ? SLJIT_NOT_EQUAL : SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0));
7976     return cc;
7977     }
7978 #endif /* SUPPORT_UNICODE */
7979   sljit_set_current_flags(compiler, SLJIT_SET_Z);
7980   add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
7981   return cc;
7982 
7983   case OP_EODN:
7984   /* Requires rather complex checks. */
7985   jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
7986   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
7987     {
7988     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
7989     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
7990     if (common->mode == PCRE2_JIT_COMPLETE)
7991       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
7992     else
7993       {
7994       jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
7995       OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
7996       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
7997       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
7998       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
7999       add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
8000       check_partial(common, TRUE);
8001       add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8002       JUMPHERE(jump[1]);
8003       }
8004     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8005     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8006     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8007     }
8008   else if (common->nltype == NLTYPE_FIXED)
8009     {
8010     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8011     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8012     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8013     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
8014     }
8015   else
8016     {
8017     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8018     jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
8019     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8020     OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
8021     jump[2] = JUMP(SLJIT_GREATER);
8022     add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
8023     /* Equal. */
8024     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8025     jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
8026     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8027 
8028     JUMPHERE(jump[1]);
8029     if (common->nltype == NLTYPE_ANYCRLF)
8030       {
8031       OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8032       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
8033       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
8034       }
8035     else
8036       {
8037       OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
8038       read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
8039       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
8040       add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
8041       sljit_set_current_flags(compiler, SLJIT_SET_Z);
8042       add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
8043       OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
8044       }
8045     JUMPHERE(jump[2]);
8046     JUMPHERE(jump[3]);
8047     }
8048   JUMPHERE(jump[0]);
8049   if (common->mode != PCRE2_JIT_COMPLETE)
8050     check_partial(common, TRUE);
8051   return cc;
8052 
8053   case OP_EOD:
8054   add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8055   if (common->mode != PCRE2_JIT_COMPLETE)
8056     check_partial(common, TRUE);
8057   return cc;
8058 
8059   case OP_DOLL:
8060   if (HAS_VIRTUAL_REGISTERS)
8061     {
8062     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8063     OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8064     }
8065   else
8066     OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8067   add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
8068 
8069   if (!common->endonly)
8070     compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
8071   else
8072     {
8073     add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8074     check_partial(common, FALSE);
8075     }
8076   return cc;
8077 
8078   case OP_DOLLM:
8079   jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
8080   if (HAS_VIRTUAL_REGISTERS)
8081     {
8082     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8083     OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8084     }
8085   else
8086     OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8087   add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
8088   check_partial(common, FALSE);
8089   jump[0] = JUMP(SLJIT_JUMP);
8090   JUMPHERE(jump[1]);
8091 
8092   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8093     {
8094     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8095     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8096     if (common->mode == PCRE2_JIT_COMPLETE)
8097       add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
8098     else
8099       {
8100       jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
8101       /* STR_PTR = STR_END - IN_UCHARS(1) */
8102       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8103       check_partial(common, TRUE);
8104       add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8105       JUMPHERE(jump[1]);
8106       }
8107 
8108     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8109     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8110     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8111     }
8112   else
8113     {
8114     peek_char(common, common->nlmax, TMP3, 0, NULL);
8115     check_newlinechar(common, common->nltype, backtracks, FALSE);
8116     }
8117   JUMPHERE(jump[0]);
8118   return cc;
8119 
8120   case OP_CIRC:
8121   if (HAS_VIRTUAL_REGISTERS)
8122     {
8123     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8124     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
8125     add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8126     OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8127     add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
8128     }
8129   else
8130     {
8131     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8132     add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8133     OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8134     add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
8135     }
8136   return cc;
8137 
8138   case OP_CIRCM:
8139   /* TMP2 might be used by peek_char_back. */
8140   if (HAS_VIRTUAL_REGISTERS)
8141     {
8142     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8143     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8144     jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8145     OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8146     }
8147   else
8148     {
8149     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8150     jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8151     OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8152     }
8153   add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
8154   jump[0] = JUMP(SLJIT_JUMP);
8155   JUMPHERE(jump[1]);
8156 
8157   if (!common->alt_circumflex)
8158     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8159 
8160   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8161     {
8162     OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8163     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, TMP2, 0));
8164     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
8165     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
8166     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8167     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8168     }
8169   else
8170     {
8171     peek_char_back(common, common->nlmax, backtracks);
8172     check_newlinechar(common, common->nltype, backtracks, FALSE);
8173     }
8174   JUMPHERE(jump[0]);
8175   return cc;
8176 
8177   case OP_REVERSE:
8178   length = GET(cc, 0);
8179   if (length == 0)
8180     return cc + LINK_SIZE;
8181   if (HAS_VIRTUAL_REGISTERS)
8182     {
8183     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8184     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8185     }
8186   else
8187     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8188 #ifdef SUPPORT_UNICODE
8189   if (common->utf)
8190     {
8191     OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, length);
8192     label = LABEL();
8193     add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0));
8194     move_back(common, backtracks, FALSE);
8195     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
8196     JUMPTO(SLJIT_NOT_ZERO, label);
8197     }
8198   else
8199 #endif
8200     {
8201     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
8202     add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0));
8203     }
8204   check_start_used_ptr(common);
8205   return cc + LINK_SIZE;
8206   }
8207 SLJIT_UNREACHABLE();
8208 return cc;
8209 }
8210 
8211 #ifdef SUPPORT_UNICODE
8212 
8213 #if PCRE2_CODE_UNIT_WIDTH != 32
8214 
do_extuni_utf(jit_arguments * args,PCRE2_SPTR cc)8215 static PCRE2_SPTR SLJIT_FUNC do_extuni_utf(jit_arguments *args, PCRE2_SPTR cc)
8216 {
8217 PCRE2_SPTR start_subject = args->begin;
8218 PCRE2_SPTR end_subject = args->end;
8219 int lgb, rgb, ricount;
8220 PCRE2_SPTR prevcc, endcc, bptr;
8221 BOOL first = TRUE;
8222 uint32_t c;
8223 
8224 prevcc = cc;
8225 endcc = NULL;
8226 do
8227   {
8228   GETCHARINC(c, cc);
8229   rgb = UCD_GRAPHBREAK(c);
8230 
8231   if (first)
8232     {
8233     lgb = rgb;
8234     endcc = cc;
8235     first = FALSE;
8236     continue;
8237     }
8238 
8239   if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8240     break;
8241 
8242   /* Not breaking between Regional Indicators is allowed only if there
8243   are an even number of preceding RIs. */
8244 
8245   if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
8246     {
8247     ricount = 0;
8248     bptr = prevcc;
8249 
8250     /* bptr is pointing to the left-hand character */
8251     while (bptr > start_subject)
8252       {
8253       bptr--;
8254       BACKCHAR(bptr);
8255       GETCHAR(c, bptr);
8256 
8257       if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator)
8258         break;
8259 
8260       ricount++;
8261       }
8262 
8263     if ((ricount & 1) != 0) break;  /* Grapheme break required */
8264     }
8265 
8266   /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8267   allows any number of them before a following Extended_Pictographic. */
8268 
8269   if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8270        lgb != ucp_gbExtended_Pictographic)
8271     lgb = rgb;
8272 
8273   prevcc = endcc;
8274   endcc = cc;
8275   }
8276 while (cc < end_subject);
8277 
8278 return endcc;
8279 }
8280 
8281 #endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
8282 
do_extuni_utf_invalid(jit_arguments * args,PCRE2_SPTR cc)8283 static PCRE2_SPTR SLJIT_FUNC do_extuni_utf_invalid(jit_arguments *args, PCRE2_SPTR cc)
8284 {
8285 PCRE2_SPTR start_subject = args->begin;
8286 PCRE2_SPTR end_subject = args->end;
8287 int lgb, rgb, ricount;
8288 PCRE2_SPTR prevcc, endcc, bptr;
8289 BOOL first = TRUE;
8290 uint32_t c;
8291 
8292 prevcc = cc;
8293 endcc = NULL;
8294 do
8295   {
8296   GETCHARINC_INVALID(c, cc, end_subject, break);
8297   rgb = UCD_GRAPHBREAK(c);
8298 
8299   if (first)
8300     {
8301     lgb = rgb;
8302     endcc = cc;
8303     first = FALSE;
8304     continue;
8305     }
8306 
8307   if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8308     break;
8309 
8310   /* Not breaking between Regional Indicators is allowed only if there
8311   are an even number of preceding RIs. */
8312 
8313   if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
8314     {
8315     ricount = 0;
8316     bptr = prevcc;
8317 
8318     /* bptr is pointing to the left-hand character */
8319     while (bptr > start_subject)
8320       {
8321       GETCHARBACK_INVALID(c, bptr, start_subject, break);
8322 
8323       if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator)
8324         break;
8325 
8326       ricount++;
8327       }
8328 
8329     if ((ricount & 1) != 0)
8330       break;  /* Grapheme break required */
8331     }
8332 
8333   /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8334   allows any number of them before a following Extended_Pictographic. */
8335 
8336   if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8337        lgb != ucp_gbExtended_Pictographic)
8338     lgb = rgb;
8339 
8340   prevcc = endcc;
8341   endcc = cc;
8342   }
8343 while (cc < end_subject);
8344 
8345 return endcc;
8346 }
8347 
do_extuni_no_utf(jit_arguments * args,PCRE2_SPTR cc)8348 static PCRE2_SPTR SLJIT_FUNC do_extuni_no_utf(jit_arguments *args, PCRE2_SPTR cc)
8349 {
8350 PCRE2_SPTR start_subject = args->begin;
8351 PCRE2_SPTR end_subject = args->end;
8352 int lgb, rgb, ricount;
8353 PCRE2_SPTR bptr;
8354 uint32_t c;
8355 
8356 /* Patch by PH */
8357 /* GETCHARINC(c, cc); */
8358 c = *cc++;
8359 
8360 #if PCRE2_CODE_UNIT_WIDTH == 32
8361 if (c >= 0x110000)
8362   return NULL;
8363 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8364 lgb = UCD_GRAPHBREAK(c);
8365 
8366 while (cc < end_subject)
8367   {
8368   c = *cc;
8369 #if PCRE2_CODE_UNIT_WIDTH == 32
8370   if (c >= 0x110000)
8371     break;
8372 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8373   rgb = UCD_GRAPHBREAK(c);
8374 
8375   if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8376     break;
8377 
8378   /* Not breaking between Regional Indicators is allowed only if there
8379   are an even number of preceding RIs. */
8380 
8381   if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
8382     {
8383     ricount = 0;
8384     bptr = cc - 1;
8385 
8386     /* bptr is pointing to the left-hand character */
8387     while (bptr > start_subject)
8388       {
8389       bptr--;
8390       c = *bptr;
8391 #if PCRE2_CODE_UNIT_WIDTH == 32
8392       if (c >= 0x110000)
8393         break;
8394 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8395 
8396       if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) break;
8397 
8398       ricount++;
8399       }
8400 
8401     if ((ricount & 1) != 0)
8402       break;  /* Grapheme break required */
8403     }
8404 
8405   /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8406   allows any number of them before a following Extended_Pictographic. */
8407 
8408   if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8409        lgb != ucp_gbExtended_Pictographic)
8410     lgb = rgb;
8411 
8412   cc++;
8413   }
8414 
8415 return cc;
8416 }
8417 
8418 #endif /* SUPPORT_UNICODE */
8419 
compile_char1_matchingpath(compiler_common * common,PCRE2_UCHAR type,PCRE2_SPTR cc,jump_list ** backtracks,BOOL check_str_ptr)8420 static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr)
8421 {
8422 DEFINE_COMPILER;
8423 int length;
8424 unsigned int c, oc, bit;
8425 compare_context context;
8426 struct sljit_jump *jump[3];
8427 jump_list *end_list;
8428 #ifdef SUPPORT_UNICODE
8429 PCRE2_UCHAR propdata[5];
8430 #endif /* SUPPORT_UNICODE */
8431 
8432 switch(type)
8433   {
8434   case OP_NOT_DIGIT:
8435   case OP_DIGIT:
8436   /* Digits are usually 0-9, so it is worth to optimize them. */
8437   if (check_str_ptr)
8438     detect_partial_match(common, backtracks);
8439 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8440   if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_digit, FALSE))
8441     read_char7_type(common, backtracks, type == OP_NOT_DIGIT);
8442   else
8443 #endif
8444     read_char8_type(common, backtracks, type == OP_NOT_DIGIT);
8445     /* Flip the starting bit in the negative case. */
8446   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
8447   add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8448   return cc;
8449 
8450   case OP_NOT_WHITESPACE:
8451   case OP_WHITESPACE:
8452   if (check_str_ptr)
8453     detect_partial_match(common, backtracks);
8454 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8455   if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_space, FALSE))
8456     read_char7_type(common, backtracks, type == OP_NOT_WHITESPACE);
8457   else
8458 #endif
8459     read_char8_type(common, backtracks, type == OP_NOT_WHITESPACE);
8460   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
8461   add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8462   return cc;
8463 
8464   case OP_NOT_WORDCHAR:
8465   case OP_WORDCHAR:
8466   if (check_str_ptr)
8467     detect_partial_match(common, backtracks);
8468 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8469   if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_word, FALSE))
8470     read_char7_type(common, backtracks, type == OP_NOT_WORDCHAR);
8471   else
8472 #endif
8473     read_char8_type(common, backtracks, type == OP_NOT_WORDCHAR);
8474   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
8475   add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8476   return cc;
8477 
8478   case OP_ANY:
8479   if (check_str_ptr)
8480     detect_partial_match(common, backtracks);
8481   read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
8482   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8483     {
8484     jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
8485     end_list = NULL;
8486     if (common->mode != PCRE2_JIT_PARTIAL_HARD)
8487       add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8488     else
8489       check_str_end(common, &end_list);
8490 
8491     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8492     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
8493     set_jumps(end_list, LABEL());
8494     JUMPHERE(jump[0]);
8495     }
8496   else
8497     check_newlinechar(common, common->nltype, backtracks, TRUE);
8498   return cc;
8499 
8500   case OP_ALLANY:
8501   if (check_str_ptr)
8502     detect_partial_match(common, backtracks);
8503 #ifdef SUPPORT_UNICODE
8504   if (common->utf)
8505     {
8506     if (common->invalid_utf)
8507       {
8508       read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR);
8509       return cc;
8510       }
8511 
8512 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
8513     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8514     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8515 #if PCRE2_CODE_UNIT_WIDTH == 8
8516     jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
8517     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
8518     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8519 #elif PCRE2_CODE_UNIT_WIDTH == 16
8520     jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
8521     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
8522     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
8523     OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
8524     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8525     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8526 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
8527     JUMPHERE(jump[0]);
8528     return cc;
8529 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
8530     }
8531 #endif /* SUPPORT_UNICODE */
8532   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8533   return cc;
8534 
8535   case OP_ANYBYTE:
8536   if (check_str_ptr)
8537     detect_partial_match(common, backtracks);
8538   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8539   return cc;
8540 
8541 #ifdef SUPPORT_UNICODE
8542   case OP_NOTPROP:
8543   case OP_PROP:
8544   propdata[0] = XCL_HASPROP;
8545   propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
8546   propdata[2] = cc[0];
8547   propdata[3] = cc[1];
8548   propdata[4] = XCL_END;
8549   if (check_str_ptr)
8550     detect_partial_match(common, backtracks);
8551   compile_xclass_matchingpath(common, propdata, backtracks);
8552   return cc + 2;
8553 #endif
8554 
8555   case OP_ANYNL:
8556   if (check_str_ptr)
8557     detect_partial_match(common, backtracks);
8558   read_char(common, common->bsr_nlmin, common->bsr_nlmax, NULL, 0);
8559   jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
8560   /* We don't need to handle soft partial matching case. */
8561   end_list = NULL;
8562   if (common->mode != PCRE2_JIT_PARTIAL_HARD)
8563     add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8564   else
8565     check_str_end(common, &end_list);
8566   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8567   jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
8568   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8569   jump[2] = JUMP(SLJIT_JUMP);
8570   JUMPHERE(jump[0]);
8571   check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
8572   set_jumps(end_list, LABEL());
8573   JUMPHERE(jump[1]);
8574   JUMPHERE(jump[2]);
8575   return cc;
8576 
8577   case OP_NOT_HSPACE:
8578   case OP_HSPACE:
8579   if (check_str_ptr)
8580     detect_partial_match(common, backtracks);
8581 
8582   if (type == OP_NOT_HSPACE)
8583     read_char(common, 0x9, 0x3000, backtracks, READ_CHAR_UPDATE_STR_PTR);
8584   else
8585     read_char(common, 0x9, 0x3000, NULL, 0);
8586 
8587   add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
8588   sljit_set_current_flags(compiler, SLJIT_SET_Z);
8589   add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8590   return cc;
8591 
8592   case OP_NOT_VSPACE:
8593   case OP_VSPACE:
8594   if (check_str_ptr)
8595     detect_partial_match(common, backtracks);
8596 
8597   if (type == OP_NOT_VSPACE)
8598     read_char(common, 0xa, 0x2029, backtracks, READ_CHAR_UPDATE_STR_PTR);
8599   else
8600     read_char(common, 0xa, 0x2029, NULL, 0);
8601 
8602   add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
8603   sljit_set_current_flags(compiler, SLJIT_SET_Z);
8604   add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8605   return cc;
8606 
8607 #ifdef SUPPORT_UNICODE
8608   case OP_EXTUNI:
8609   if (check_str_ptr)
8610     detect_partial_match(common, backtracks);
8611 
8612   SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
8613   OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
8614 
8615 #if PCRE2_CODE_UNIT_WIDTH != 32
8616   sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
8617     common->utf ? (common->invalid_utf ? SLJIT_FUNC_OFFSET(do_extuni_utf_invalid) : SLJIT_FUNC_OFFSET(do_extuni_utf)) : SLJIT_FUNC_OFFSET(do_extuni_no_utf));
8618   if (common->invalid_utf)
8619     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
8620 #else
8621   sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
8622     common->invalid_utf ? SLJIT_FUNC_OFFSET(do_extuni_utf_invalid) : SLJIT_FUNC_OFFSET(do_extuni_no_utf));
8623   if (!common->utf || common->invalid_utf)
8624     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
8625 #endif
8626 
8627   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
8628 
8629   if (common->mode == PCRE2_JIT_PARTIAL_HARD)
8630     {
8631     jump[0] = CMP(SLJIT_LESS, SLJIT_RETURN_REG, 0, STR_END, 0);
8632     /* Since we successfully read a char above, partial matching must occure. */
8633     check_partial(common, TRUE);
8634     JUMPHERE(jump[0]);
8635     }
8636   return cc;
8637 #endif
8638 
8639   case OP_CHAR:
8640   case OP_CHARI:
8641   length = 1;
8642 #ifdef SUPPORT_UNICODE
8643   if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
8644 #endif
8645 
8646   if (check_str_ptr && common->mode != PCRE2_JIT_COMPLETE)
8647     detect_partial_match(common, backtracks);
8648 
8649   if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)
8650     {
8651     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
8652     if (length > 1 || (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE))
8653       add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
8654 
8655     context.length = IN_UCHARS(length);
8656     context.sourcereg = -1;
8657 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
8658     context.ucharptr = 0;
8659 #endif
8660     return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
8661     }
8662 
8663 #ifdef SUPPORT_UNICODE
8664   if (common->utf)
8665     {
8666     GETCHAR(c, cc);
8667     }
8668   else
8669 #endif
8670     c = *cc;
8671 
8672   SLJIT_ASSERT(type == OP_CHARI && char_has_othercase(common, cc));
8673 
8674   if (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE)
8675     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8676 
8677   oc = char_othercase(common, c);
8678   read_char(common, c < oc ? c : oc, c > oc ? c : oc, NULL, 0);
8679 
8680   SLJIT_ASSERT(!is_powerof2(c ^ oc));
8681 
8682   if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
8683     {
8684     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
8685     CMOV(SLJIT_EQUAL, TMP1, SLJIT_IMM, c);
8686     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8687     }
8688   else
8689     {
8690     jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
8691     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
8692     JUMPHERE(jump[0]);
8693     }
8694   return cc + length;
8695 
8696   case OP_NOT:
8697   case OP_NOTI:
8698   if (check_str_ptr)
8699     detect_partial_match(common, backtracks);
8700 
8701   length = 1;
8702 #ifdef SUPPORT_UNICODE
8703   if (common->utf)
8704     {
8705 #if PCRE2_CODE_UNIT_WIDTH == 8
8706     c = *cc;
8707     if (c < 128 && !common->invalid_utf)
8708       {
8709       OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8710       if (type == OP_NOT || !char_has_othercase(common, cc))
8711         add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8712       else
8713         {
8714         /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
8715         OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
8716         add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
8717         }
8718       /* Skip the variable-length character. */
8719       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8720       jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
8721       OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
8722       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8723       JUMPHERE(jump[0]);
8724       return cc + 1;
8725       }
8726     else
8727 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
8728       {
8729       GETCHARLEN(c, cc, length);
8730       }
8731     }
8732   else
8733 #endif /* SUPPORT_UNICODE */
8734     c = *cc;
8735 
8736   if (type == OP_NOT || !char_has_othercase(common, cc))
8737     {
8738     read_char(common, c, c, backtracks, READ_CHAR_UPDATE_STR_PTR);
8739     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8740     }
8741   else
8742     {
8743     oc = char_othercase(common, c);
8744     read_char(common, c < oc ? c : oc, c > oc ? c : oc, backtracks, READ_CHAR_UPDATE_STR_PTR);
8745     bit = c ^ oc;
8746     if (is_powerof2(bit))
8747       {
8748       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
8749       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
8750       }
8751     else
8752       {
8753       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8754       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
8755       }
8756     }
8757   return cc + length;
8758 
8759   case OP_CLASS:
8760   case OP_NCLASS:
8761   if (check_str_ptr)
8762     detect_partial_match(common, backtracks);
8763 
8764 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8765   bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
8766   if (type == OP_NCLASS)
8767     read_char(common, 0, bit, backtracks, READ_CHAR_UPDATE_STR_PTR);
8768   else
8769     read_char(common, 0, bit, NULL, 0);
8770 #else
8771   if (type == OP_NCLASS)
8772     read_char(common, 0, 255, backtracks, READ_CHAR_UPDATE_STR_PTR);
8773   else
8774     read_char(common, 0, 255, NULL, 0);
8775 #endif
8776 
8777   if (optimize_class(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
8778     return cc + 32 / sizeof(PCRE2_UCHAR);
8779 
8780 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8781   jump[0] = NULL;
8782   if (common->utf)
8783     {
8784     jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
8785     if (type == OP_CLASS)
8786       {
8787       add_jump(compiler, backtracks, jump[0]);
8788       jump[0] = NULL;
8789       }
8790     }
8791 #elif PCRE2_CODE_UNIT_WIDTH != 8
8792   jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
8793   if (type == OP_CLASS)
8794     {
8795     add_jump(compiler, backtracks, jump[0]);
8796     jump[0] = NULL;
8797     }
8798 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
8799 
8800   OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
8801   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
8802   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
8803   OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
8804   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
8805   add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
8806 
8807 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
8808   if (jump[0] != NULL)
8809     JUMPHERE(jump[0]);
8810 #endif
8811   return cc + 32 / sizeof(PCRE2_UCHAR);
8812 
8813 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
8814   case OP_XCLASS:
8815   if (check_str_ptr)
8816     detect_partial_match(common, backtracks);
8817   compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
8818   return cc + GET(cc, 0) - 1;
8819 #endif
8820   }
8821 SLJIT_UNREACHABLE();
8822 return cc;
8823 }
8824 
compile_charn_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,jump_list ** backtracks)8825 static SLJIT_INLINE PCRE2_SPTR compile_charn_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, jump_list **backtracks)
8826 {
8827 /* This function consumes at least one input character. */
8828 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
8829 DEFINE_COMPILER;
8830 PCRE2_SPTR ccbegin = cc;
8831 compare_context context;
8832 int size;
8833 
8834 context.length = 0;
8835 do
8836   {
8837   if (cc >= ccend)
8838     break;
8839 
8840   if (*cc == OP_CHAR)
8841     {
8842     size = 1;
8843 #ifdef SUPPORT_UNICODE
8844     if (common->utf && HAS_EXTRALEN(cc[1]))
8845       size += GET_EXTRALEN(cc[1]);
8846 #endif
8847     }
8848   else if (*cc == OP_CHARI)
8849     {
8850     size = 1;
8851 #ifdef SUPPORT_UNICODE
8852     if (common->utf)
8853       {
8854       if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
8855         size = 0;
8856       else if (HAS_EXTRALEN(cc[1]))
8857         size += GET_EXTRALEN(cc[1]);
8858       }
8859     else
8860 #endif
8861     if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
8862       size = 0;
8863     }
8864   else
8865     size = 0;
8866 
8867   cc += 1 + size;
8868   context.length += IN_UCHARS(size);
8869   }
8870 while (size > 0 && context.length <= 128);
8871 
8872 cc = ccbegin;
8873 if (context.length > 0)
8874   {
8875   /* We have a fixed-length byte sequence. */
8876   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
8877   add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
8878 
8879   context.sourcereg = -1;
8880 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
8881   context.ucharptr = 0;
8882 #endif
8883   do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
8884   return cc;
8885   }
8886 
8887 /* A non-fixed length character will be checked if length == 0. */
8888 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
8889 }
8890 
8891 /* Forward definitions. */
8892 static void compile_matchingpath(compiler_common *, PCRE2_SPTR, PCRE2_SPTR, backtrack_common *);
8893 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
8894 
8895 #define PUSH_BACKTRACK(size, ccstart, error) \
8896   do \
8897     { \
8898     backtrack = sljit_alloc_memory(compiler, (size)); \
8899     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
8900       return error; \
8901     memset(backtrack, 0, size); \
8902     backtrack->prev = parent->top; \
8903     backtrack->cc = (ccstart); \
8904     parent->top = backtrack; \
8905     } \
8906   while (0)
8907 
8908 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
8909   do \
8910     { \
8911     backtrack = sljit_alloc_memory(compiler, (size)); \
8912     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
8913       return; \
8914     memset(backtrack, 0, size); \
8915     backtrack->prev = parent->top; \
8916     backtrack->cc = (ccstart); \
8917     parent->top = backtrack; \
8918     } \
8919   while (0)
8920 
8921 #define BACKTRACK_AS(type) ((type *)backtrack)
8922 
compile_dnref_search(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks)8923 static void compile_dnref_search(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
8924 {
8925 /* The OVECTOR offset goes to TMP2. */
8926 DEFINE_COMPILER;
8927 int count = GET2(cc, 1 + IMM2_SIZE);
8928 PCRE2_SPTR slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
8929 unsigned int offset;
8930 jump_list *found = NULL;
8931 
8932 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
8933 
8934 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
8935 
8936 count--;
8937 while (count-- > 0)
8938   {
8939   offset = GET2(slot, 0) << 1;
8940   GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
8941   add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
8942   slot += common->name_entry_size;
8943   }
8944 
8945 offset = GET2(slot, 0) << 1;
8946 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
8947 if (backtracks != NULL && !common->unset_backref)
8948   add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
8949 
8950 set_jumps(found, LABEL());
8951 }
8952 
compile_ref_matchingpath(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks,BOOL withchecks,BOOL emptyfail)8953 static void compile_ref_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
8954 {
8955 DEFINE_COMPILER;
8956 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
8957 int offset = 0;
8958 struct sljit_jump *jump = NULL;
8959 struct sljit_jump *partial;
8960 struct sljit_jump *nopartial;
8961 #if defined SUPPORT_UNICODE
8962 struct sljit_label *loop;
8963 struct sljit_label *caseless_loop;
8964 jump_list *no_match = NULL;
8965 int source_reg = COUNT_MATCH;
8966 int source_end_reg = ARGUMENTS;
8967 int char1_reg = STACK_LIMIT;
8968 #endif /* SUPPORT_UNICODE */
8969 
8970 if (ref)
8971   {
8972   offset = GET2(cc, 1) << 1;
8973   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
8974   /* OVECTOR(1) contains the "string begin - 1" constant. */
8975   if (withchecks && !common->unset_backref)
8976     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
8977   }
8978 else
8979   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
8980 
8981 #if defined SUPPORT_UNICODE
8982 if (common->utf && *cc == OP_REFI)
8983   {
8984   SLJIT_ASSERT(common->iref_ptr != 0);
8985 
8986   if (ref)
8987     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
8988   else
8989     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
8990 
8991   if (withchecks && emptyfail)
8992     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0));
8993 
8994   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr, source_reg, 0);
8995   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw), source_end_reg, 0);
8996   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2, char1_reg, 0);
8997 
8998   OP1(SLJIT_MOV, source_reg, 0, TMP1, 0);
8999   OP1(SLJIT_MOV, source_end_reg, 0, TMP2, 0);
9000 
9001   loop = LABEL();
9002   jump = CMP(SLJIT_GREATER_EQUAL, source_reg, 0, source_end_reg, 0);
9003   partial = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
9004 
9005   /* Read original character. It must be a valid UTF character. */
9006   OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
9007   OP1(SLJIT_MOV, STR_PTR, 0, source_reg, 0);
9008 
9009   read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR | READ_CHAR_VALID_UTF);
9010 
9011   OP1(SLJIT_MOV, source_reg, 0, STR_PTR, 0);
9012   OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
9013   OP1(SLJIT_MOV, char1_reg, 0, TMP1, 0);
9014 
9015   /* Read second character. */
9016   read_char(common, 0, READ_CHAR_MAX, &no_match, READ_CHAR_UPDATE_STR_PTR);
9017 
9018   CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9019 
9020   OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
9021 
9022   add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
9023 
9024   OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
9025   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
9026   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
9027 
9028   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records));
9029 
9030   OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, other_case));
9031   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, caseset));
9032   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
9033   CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9034 
9035   add_jump(compiler, &no_match, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9036   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
9037   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_caseless_sets));
9038 
9039   caseless_loop = LABEL();
9040   OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9041   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(uint32_t));
9042   OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, char1_reg, 0);
9043   JUMPTO(SLJIT_EQUAL, loop);
9044   JUMPTO(SLJIT_LESS, caseless_loop);
9045 
9046   set_jumps(no_match, LABEL());
9047   if (common->mode == PCRE2_JIT_COMPLETE)
9048     JUMPHERE(partial);
9049 
9050   OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9051   OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9052   OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9053   add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9054 
9055   if (common->mode != PCRE2_JIT_COMPLETE)
9056     {
9057     JUMPHERE(partial);
9058     OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9059     OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9060     OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9061 
9062     check_partial(common, FALSE);
9063     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9064     }
9065 
9066   JUMPHERE(jump);
9067   OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9068   OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9069   OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9070   return;
9071   }
9072 else
9073 #endif /* SUPPORT_UNICODE */
9074   {
9075   if (ref)
9076     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
9077   else
9078     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
9079 
9080   if (withchecks)
9081     jump = JUMP(SLJIT_ZERO);
9082 
9083   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
9084   partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
9085   if (common->mode == PCRE2_JIT_COMPLETE)
9086     add_jump(compiler, backtracks, partial);
9087 
9088   add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9089   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9090 
9091   if (common->mode != PCRE2_JIT_COMPLETE)
9092     {
9093     nopartial = JUMP(SLJIT_JUMP);
9094     JUMPHERE(partial);
9095     /* TMP2 -= STR_END - STR_PTR */
9096     OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
9097     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
9098     partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
9099     OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
9100     add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9101     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9102     JUMPHERE(partial);
9103     check_partial(common, FALSE);
9104     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9105     JUMPHERE(nopartial);
9106     }
9107   }
9108 
9109 if (jump != NULL)
9110   {
9111   if (emptyfail)
9112     add_jump(compiler, backtracks, jump);
9113   else
9114     JUMPHERE(jump);
9115   }
9116 }
9117 
compile_ref_iterator_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9118 static SLJIT_INLINE PCRE2_SPTR compile_ref_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9119 {
9120 DEFINE_COMPILER;
9121 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9122 backtrack_common *backtrack;
9123 PCRE2_UCHAR type;
9124 int offset = 0;
9125 struct sljit_label *label;
9126 struct sljit_jump *zerolength;
9127 struct sljit_jump *jump = NULL;
9128 PCRE2_SPTR ccbegin = cc;
9129 int min = 0, max = 0;
9130 BOOL minimize;
9131 
9132 PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
9133 
9134 if (ref)
9135   offset = GET2(cc, 1) << 1;
9136 else
9137   cc += IMM2_SIZE;
9138 type = cc[1 + IMM2_SIZE];
9139 
9140 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
9141 minimize = (type & 0x1) != 0;
9142 switch(type)
9143   {
9144   case OP_CRSTAR:
9145   case OP_CRMINSTAR:
9146   min = 0;
9147   max = 0;
9148   cc += 1 + IMM2_SIZE + 1;
9149   break;
9150   case OP_CRPLUS:
9151   case OP_CRMINPLUS:
9152   min = 1;
9153   max = 0;
9154   cc += 1 + IMM2_SIZE + 1;
9155   break;
9156   case OP_CRQUERY:
9157   case OP_CRMINQUERY:
9158   min = 0;
9159   max = 1;
9160   cc += 1 + IMM2_SIZE + 1;
9161   break;
9162   case OP_CRRANGE:
9163   case OP_CRMINRANGE:
9164   min = GET2(cc, 1 + IMM2_SIZE + 1);
9165   max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
9166   cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
9167   break;
9168   default:
9169   SLJIT_UNREACHABLE();
9170   break;
9171   }
9172 
9173 if (!minimize)
9174   {
9175   if (min == 0)
9176     {
9177     allocate_stack(common, 2);
9178     if (ref)
9179       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9180     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9181     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9182     /* Temporary release of STR_PTR. */
9183     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9184     /* Handles both invalid and empty cases. Since the minimum repeat,
9185     is zero the invalid case is basically the same as an empty case. */
9186     if (ref)
9187       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9188     else
9189       {
9190       compile_dnref_search(common, ccbegin, NULL);
9191       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9192       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9193       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9194       }
9195     /* Restore if not zero length. */
9196     OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9197     }
9198   else
9199     {
9200     allocate_stack(common, 1);
9201     if (ref)
9202       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9203     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9204     if (ref)
9205       {
9206       add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9207       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9208       }
9209     else
9210       {
9211       compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
9212       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9213       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9214       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9215       }
9216     }
9217 
9218   if (min > 1 || max > 1)
9219     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
9220 
9221   label = LABEL();
9222   if (!ref)
9223     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
9224   compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
9225 
9226   if (min > 1 || max > 1)
9227     {
9228     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
9229     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9230     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
9231     if (min > 1)
9232       CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
9233     if (max > 1)
9234       {
9235       jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
9236       allocate_stack(common, 1);
9237       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9238       JUMPTO(SLJIT_JUMP, label);
9239       JUMPHERE(jump);
9240       }
9241     }
9242 
9243   if (max == 0)
9244     {
9245     /* Includes min > 1 case as well. */
9246     allocate_stack(common, 1);
9247     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9248     JUMPTO(SLJIT_JUMP, label);
9249     }
9250 
9251   JUMPHERE(zerolength);
9252   BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9253 
9254   count_match(common);
9255   return cc;
9256   }
9257 
9258 allocate_stack(common, ref ? 2 : 3);
9259 if (ref)
9260   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9261 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9262 if (type != OP_CRMINSTAR)
9263   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9264 
9265 if (min == 0)
9266   {
9267   /* Handles both invalid and empty cases. Since the minimum repeat,
9268   is zero the invalid case is basically the same as an empty case. */
9269   if (ref)
9270     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9271   else
9272     {
9273     compile_dnref_search(common, ccbegin, NULL);
9274     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9275     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9276     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9277     }
9278   /* Length is non-zero, we can match real repeats. */
9279   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9280   jump = JUMP(SLJIT_JUMP);
9281   }
9282 else
9283   {
9284   if (ref)
9285     {
9286     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9287     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9288     }
9289   else
9290     {
9291     compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
9292     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9293     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9294     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9295     }
9296   }
9297 
9298 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9299 if (max > 0)
9300   add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
9301 
9302 if (!ref)
9303   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
9304 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
9305 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9306 
9307 if (min > 1)
9308   {
9309   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9310   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9311   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9312   CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
9313   }
9314 else if (max > 0)
9315   OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
9316 
9317 if (jump != NULL)
9318   JUMPHERE(jump);
9319 JUMPHERE(zerolength);
9320 
9321 count_match(common);
9322 return cc;
9323 }
9324 
compile_recurse_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9325 static SLJIT_INLINE PCRE2_SPTR compile_recurse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9326 {
9327 DEFINE_COMPILER;
9328 backtrack_common *backtrack;
9329 recurse_entry *entry = common->entries;
9330 recurse_entry *prev = NULL;
9331 sljit_sw start = GET(cc, 1);
9332 PCRE2_SPTR start_cc;
9333 BOOL needs_control_head;
9334 
9335 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
9336 
9337 /* Inlining simple patterns. */
9338 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
9339   {
9340   start_cc = common->start + start;
9341   compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
9342   BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
9343   return cc + 1 + LINK_SIZE;
9344   }
9345 
9346 while (entry != NULL)
9347   {
9348   if (entry->start == start)
9349     break;
9350   prev = entry;
9351   entry = entry->next;
9352   }
9353 
9354 if (entry == NULL)
9355   {
9356   entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
9357   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9358     return NULL;
9359   entry->next = NULL;
9360   entry->entry_label = NULL;
9361   entry->backtrack_label = NULL;
9362   entry->entry_calls = NULL;
9363   entry->backtrack_calls = NULL;
9364   entry->start = start;
9365 
9366   if (prev != NULL)
9367     prev->next = entry;
9368   else
9369     common->entries = entry;
9370   }
9371 
9372 BACKTRACK_AS(recurse_backtrack)->entry = entry;
9373 
9374 if (entry->entry_label == NULL)
9375   add_jump(compiler, &entry->entry_calls, JUMP(SLJIT_FAST_CALL));
9376 else
9377   JUMPTO(SLJIT_FAST_CALL, entry->entry_label);
9378 /* Leave if the match is failed. */
9379 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
9380 BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL();
9381 return cc + 1 + LINK_SIZE;
9382 }
9383 
do_callout(struct jit_arguments * arguments,pcre2_callout_block * callout_block,PCRE2_SPTR * jit_ovector)9384 static sljit_s32 SLJIT_FUNC do_callout(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
9385 {
9386 PCRE2_SPTR begin;
9387 PCRE2_SIZE *ovector;
9388 sljit_u32 oveccount, capture_top;
9389 
9390 if (arguments->callout == NULL)
9391   return 0;
9392 
9393 SLJIT_COMPILE_ASSERT(sizeof (PCRE2_SIZE) <= sizeof (sljit_sw), pcre2_size_must_be_lower_than_sljit_sw_size);
9394 
9395 begin = arguments->begin;
9396 ovector = (PCRE2_SIZE*)(callout_block + 1);
9397 oveccount = callout_block->capture_top;
9398 
9399 SLJIT_ASSERT(oveccount >= 1);
9400 
9401 callout_block->version = 2;
9402 callout_block->callout_flags = 0;
9403 
9404 /* Offsets in subject. */
9405 callout_block->subject_length = arguments->end - arguments->begin;
9406 callout_block->start_match = jit_ovector[0] - begin;
9407 callout_block->current_position = (PCRE2_SPTR)callout_block->offset_vector - begin;
9408 callout_block->subject = begin;
9409 
9410 /* Convert and copy the JIT offset vector to the ovector array. */
9411 callout_block->capture_top = 1;
9412 callout_block->offset_vector = ovector;
9413 
9414 ovector[0] = PCRE2_UNSET;
9415 ovector[1] = PCRE2_UNSET;
9416 ovector += 2;
9417 jit_ovector += 2;
9418 capture_top = 1;
9419 
9420 /* Convert pointers to sizes. */
9421 while (--oveccount != 0)
9422   {
9423   capture_top++;
9424 
9425   ovector[0] = (PCRE2_SIZE)(jit_ovector[0] - begin);
9426   ovector[1] = (PCRE2_SIZE)(jit_ovector[1] - begin);
9427 
9428   if (ovector[0] != PCRE2_UNSET)
9429     callout_block->capture_top = capture_top;
9430 
9431   ovector += 2;
9432   jit_ovector += 2;
9433   }
9434 
9435 return (arguments->callout)(callout_block, arguments->callout_data);
9436 }
9437 
9438 #define CALLOUT_ARG_OFFSET(arg) \
9439     SLJIT_OFFSETOF(pcre2_callout_block, arg)
9440 
compile_callout_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9441 static SLJIT_INLINE PCRE2_SPTR compile_callout_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9442 {
9443 DEFINE_COMPILER;
9444 backtrack_common *backtrack;
9445 sljit_s32 mov_opcode;
9446 unsigned int callout_length = (*cc == OP_CALLOUT)
9447     ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);
9448 sljit_sw value1;
9449 sljit_sw value2;
9450 sljit_sw value3;
9451 sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * sizeof(sljit_sw);
9452 
9453 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
9454 
9455 callout_arg_size = (sizeof(pcre2_callout_block) + callout_arg_size + sizeof(sljit_sw) - 1) / sizeof(sljit_sw);
9456 
9457 allocate_stack(common, callout_arg_size);
9458 
9459 SLJIT_ASSERT(common->capture_last_ptr != 0);
9460 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
9461 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9462 value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0;
9463 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1);
9464 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
9465 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_top), SLJIT_IMM, common->re->top_bracket + 1);
9466 
9467 /* These pointer sized fields temporarly stores internal variables. */
9468 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
9469 
9470 if (common->mark_ptr != 0)
9471   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
9472 mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
9473 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 1));
9474 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 1 + LINK_SIZE));
9475 
9476 if (*cc == OP_CALLOUT)
9477   {
9478   value1 = 0;
9479   value2 = 0;
9480   value3 = 0;
9481   }
9482 else
9483   {
9484   value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1);
9485   value2 = (callout_length - (1 + 4*LINK_SIZE + 2));
9486   value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE));
9487   }
9488 
9489 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);
9490 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);
9491 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3);
9492 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
9493 
9494 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
9495 
9496 /* Needed to save important temporary registers. */
9497 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
9498 /* SLJIT_R0 = arguments */
9499 OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
9500 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
9501 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(S32) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
9502 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
9503 free_stack(common, callout_arg_size);
9504 
9505 /* Check return value. */
9506 OP2(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
9507 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER32));
9508 if (common->abort_label == NULL)
9509   add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL32) /* SIG_LESS */);
9510 else
9511   JUMPTO(SLJIT_NOT_EQUAL32 /* SIG_LESS */, common->abort_label);
9512 return cc + callout_length;
9513 }
9514 
9515 #undef CALLOUT_ARG_SIZE
9516 #undef CALLOUT_ARG_OFFSET
9517 
assert_needs_str_ptr_saving(PCRE2_SPTR cc)9518 static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(PCRE2_SPTR cc)
9519 {
9520 while (TRUE)
9521   {
9522   switch (*cc)
9523     {
9524     case OP_CALLOUT_STR:
9525     cc += GET(cc, 1 + 2*LINK_SIZE);
9526     break;
9527 
9528     case OP_NOT_WORD_BOUNDARY:
9529     case OP_WORD_BOUNDARY:
9530     case OP_CIRC:
9531     case OP_CIRCM:
9532     case OP_DOLL:
9533     case OP_DOLLM:
9534     case OP_CALLOUT:
9535     case OP_ALT:
9536     cc += PRIV(OP_lengths)[*cc];
9537     break;
9538 
9539     case OP_KET:
9540     return FALSE;
9541 
9542     default:
9543     return TRUE;
9544     }
9545   }
9546 }
9547 
compile_assert_matchingpath(compiler_common * common,PCRE2_SPTR cc,assert_backtrack * backtrack,BOOL conditional)9548 static PCRE2_SPTR compile_assert_matchingpath(compiler_common *common, PCRE2_SPTR cc, assert_backtrack *backtrack, BOOL conditional)
9549 {
9550 DEFINE_COMPILER;
9551 int framesize;
9552 int extrasize;
9553 BOOL local_quit_available = FALSE;
9554 BOOL needs_control_head;
9555 int private_data_ptr;
9556 backtrack_common altbacktrack;
9557 PCRE2_SPTR ccbegin;
9558 PCRE2_UCHAR opcode;
9559 PCRE2_UCHAR bra = OP_BRA;
9560 jump_list *tmp = NULL;
9561 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
9562 jump_list **found;
9563 /* Saving previous accept variables. */
9564 BOOL save_local_quit_available = common->local_quit_available;
9565 BOOL save_in_positive_assertion = common->in_positive_assertion;
9566 then_trap_backtrack *save_then_trap = common->then_trap;
9567 struct sljit_label *save_quit_label = common->quit_label;
9568 struct sljit_label *save_accept_label = common->accept_label;
9569 jump_list *save_quit = common->quit;
9570 jump_list *save_positive_assertion_quit = common->positive_assertion_quit;
9571 jump_list *save_accept = common->accept;
9572 struct sljit_jump *jump;
9573 struct sljit_jump *brajump = NULL;
9574 
9575 /* Assert captures then. */
9576 common->then_trap = NULL;
9577 
9578 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
9579   {
9580   SLJIT_ASSERT(!conditional);
9581   bra = *cc;
9582   cc++;
9583   }
9584 private_data_ptr = PRIVATE_DATA(cc);
9585 SLJIT_ASSERT(private_data_ptr != 0);
9586 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
9587 backtrack->framesize = framesize;
9588 backtrack->private_data_ptr = private_data_ptr;
9589 opcode = *cc;
9590 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
9591 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
9592 ccbegin = cc;
9593 cc += GET(cc, 1);
9594 
9595 if (bra == OP_BRAMINZERO)
9596   {
9597   /* This is a braminzero backtrack path. */
9598   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9599   free_stack(common, 1);
9600   brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9601   }
9602 
9603 if (framesize < 0)
9604   {
9605   extrasize = 1;
9606   if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
9607     extrasize = 0;
9608 
9609   if (needs_control_head)
9610     extrasize++;
9611 
9612   if (framesize == no_frame)
9613     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
9614 
9615   if (extrasize > 0)
9616     allocate_stack(common, extrasize);
9617 
9618   if (needs_control_head)
9619     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9620 
9621   if (extrasize > 0)
9622     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9623 
9624   if (needs_control_head)
9625     {
9626     SLJIT_ASSERT(extrasize == 2);
9627     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9628     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9629     }
9630   }
9631 else
9632   {
9633   extrasize = needs_control_head ? 3 : 2;
9634   allocate_stack(common, framesize + extrasize);
9635 
9636   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9637   OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
9638   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
9639   if (needs_control_head)
9640     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9641   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9642 
9643   if (needs_control_head)
9644     {
9645     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
9646     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
9647     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9648     }
9649   else
9650     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9651 
9652   init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize);
9653   }
9654 
9655 memset(&altbacktrack, 0, sizeof(backtrack_common));
9656 if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT))
9657   {
9658   /* Control verbs cannot escape from these asserts. */
9659   local_quit_available = TRUE;
9660   common->local_quit_available = TRUE;
9661   common->quit_label = NULL;
9662   common->quit = NULL;
9663   }
9664 
9665 common->in_positive_assertion = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK);
9666 common->positive_assertion_quit = NULL;
9667 
9668 while (1)
9669   {
9670   common->accept_label = NULL;
9671   common->accept = NULL;
9672   altbacktrack.top = NULL;
9673   altbacktrack.topbacktracks = NULL;
9674 
9675   if (*ccbegin == OP_ALT && extrasize > 0)
9676     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9677 
9678   altbacktrack.cc = ccbegin;
9679   compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
9680   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9681     {
9682     if (local_quit_available)
9683       {
9684       common->local_quit_available = save_local_quit_available;
9685       common->quit_label = save_quit_label;
9686       common->quit = save_quit;
9687       }
9688     common->in_positive_assertion = save_in_positive_assertion;
9689     common->then_trap = save_then_trap;
9690     common->accept_label = save_accept_label;
9691     common->positive_assertion_quit = save_positive_assertion_quit;
9692     common->accept = save_accept;
9693     return NULL;
9694     }
9695   common->accept_label = LABEL();
9696   if (common->accept != NULL)
9697     set_jumps(common->accept, common->accept_label);
9698 
9699   /* Reset stack. */
9700   if (framesize < 0)
9701     {
9702     if (framesize == no_frame)
9703       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9704     else if (extrasize > 0)
9705       free_stack(common, extrasize);
9706 
9707     if (needs_control_head)
9708       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
9709     }
9710   else
9711     {
9712     if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
9713       {
9714       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
9715       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
9716       if (needs_control_head)
9717         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
9718       }
9719     else
9720       {
9721       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9722       if (needs_control_head)
9723         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));
9724       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9725       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
9726       }
9727     }
9728 
9729   if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
9730     {
9731     /* We know that STR_PTR was stored on the top of the stack. */
9732     if (conditional)
9733       {
9734       if (extrasize > 0)
9735         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? STACK(-2) : STACK(-1));
9736       }
9737     else if (bra == OP_BRAZERO)
9738       {
9739       if (framesize < 0)
9740         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
9741       else
9742         {
9743         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
9744         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize));
9745         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9746         }
9747       OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9748       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9749       }
9750     else if (framesize >= 0)
9751       {
9752       /* For OP_BRA and OP_BRAMINZERO. */
9753       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
9754       }
9755     }
9756   add_jump(compiler, found, JUMP(SLJIT_JUMP));
9757 
9758   compile_backtrackingpath(common, altbacktrack.top);
9759   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9760     {
9761     if (local_quit_available)
9762       {
9763       common->local_quit_available = save_local_quit_available;
9764       common->quit_label = save_quit_label;
9765       common->quit = save_quit;
9766       }
9767     common->in_positive_assertion = save_in_positive_assertion;
9768     common->then_trap = save_then_trap;
9769     common->accept_label = save_accept_label;
9770     common->positive_assertion_quit = save_positive_assertion_quit;
9771     common->accept = save_accept;
9772     return NULL;
9773     }
9774   set_jumps(altbacktrack.topbacktracks, LABEL());
9775 
9776   if (*cc != OP_ALT)
9777     break;
9778 
9779   ccbegin = cc;
9780   cc += GET(cc, 1);
9781   }
9782 
9783 if (local_quit_available)
9784   {
9785   SLJIT_ASSERT(common->positive_assertion_quit == NULL);
9786   /* Makes the check less complicated below. */
9787   common->positive_assertion_quit = common->quit;
9788   }
9789 
9790 /* None of them matched. */
9791 if (common->positive_assertion_quit != NULL)
9792   {
9793   jump = JUMP(SLJIT_JUMP);
9794   set_jumps(common->positive_assertion_quit, LABEL());
9795   SLJIT_ASSERT(framesize != no_stack);
9796   if (framesize < 0)
9797     OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
9798   else
9799     {
9800     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9801     add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9802     OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (extrasize + 1) * sizeof(sljit_sw));
9803     }
9804   JUMPHERE(jump);
9805   }
9806 
9807 if (needs_control_head)
9808   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
9809 
9810 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
9811   {
9812   /* Assert is failed. */
9813   if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
9814     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9815 
9816   if (framesize < 0)
9817     {
9818     /* The topmost item should be 0. */
9819     if (bra == OP_BRAZERO)
9820       {
9821       if (extrasize == 2)
9822         free_stack(common, 1);
9823       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9824       }
9825     else if (extrasize > 0)
9826       free_stack(common, extrasize);
9827     }
9828   else
9829     {
9830     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
9831     /* The topmost item should be 0. */
9832     if (bra == OP_BRAZERO)
9833       {
9834       free_stack(common, framesize + extrasize - 1);
9835       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9836       }
9837     else
9838       free_stack(common, framesize + extrasize);
9839     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9840     }
9841   jump = JUMP(SLJIT_JUMP);
9842   if (bra != OP_BRAZERO)
9843     add_jump(compiler, target, jump);
9844 
9845   /* Assert is successful. */
9846   set_jumps(tmp, LABEL());
9847   if (framesize < 0)
9848     {
9849     /* We know that STR_PTR was stored on the top of the stack. */
9850     if (extrasize > 0)
9851       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
9852 
9853     /* Keep the STR_PTR on the top of the stack. */
9854     if (bra == OP_BRAZERO)
9855       {
9856       OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9857       if (extrasize == 2)
9858         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9859       }
9860     else if (bra == OP_BRAMINZERO)
9861       {
9862       OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9863       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9864       }
9865     }
9866   else
9867     {
9868     if (bra == OP_BRA)
9869       {
9870       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
9871       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
9872       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
9873       }
9874     else
9875       {
9876       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
9877       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
9878       if (extrasize == 2)
9879         {
9880         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9881         if (bra == OP_BRAMINZERO)
9882           OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9883         }
9884       else
9885         {
9886         SLJIT_ASSERT(extrasize == 3);
9887         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
9888         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
9889         }
9890       }
9891     }
9892 
9893   if (bra == OP_BRAZERO)
9894     {
9895     backtrack->matchingpath = LABEL();
9896     SET_LABEL(jump, backtrack->matchingpath);
9897     }
9898   else if (bra == OP_BRAMINZERO)
9899     {
9900     JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
9901     JUMPHERE(brajump);
9902     if (framesize >= 0)
9903       {
9904       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9905       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9906       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
9907       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
9908       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9909       }
9910     set_jumps(backtrack->common.topbacktracks, LABEL());
9911     }
9912   }
9913 else
9914   {
9915   /* AssertNot is successful. */
9916   if (framesize < 0)
9917     {
9918     if (extrasize > 0)
9919       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9920 
9921     if (bra != OP_BRA)
9922       {
9923       if (extrasize == 2)
9924         free_stack(common, 1);
9925       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9926       }
9927     else if (extrasize > 0)
9928       free_stack(common, extrasize);
9929     }
9930   else
9931     {
9932     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9933     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
9934     /* The topmost item should be 0. */
9935     if (bra != OP_BRA)
9936       {
9937       free_stack(common, framesize + extrasize - 1);
9938       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9939       }
9940     else
9941       free_stack(common, framesize + extrasize);
9942     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9943     }
9944 
9945   if (bra == OP_BRAZERO)
9946     backtrack->matchingpath = LABEL();
9947   else if (bra == OP_BRAMINZERO)
9948     {
9949     JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
9950     JUMPHERE(brajump);
9951     }
9952 
9953   if (bra != OP_BRA)
9954     {
9955     SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
9956     set_jumps(backtrack->common.topbacktracks, LABEL());
9957     backtrack->common.topbacktracks = NULL;
9958     }
9959   }
9960 
9961 if (local_quit_available)
9962   {
9963   common->local_quit_available = save_local_quit_available;
9964   common->quit_label = save_quit_label;
9965   common->quit = save_quit;
9966   }
9967 common->in_positive_assertion = save_in_positive_assertion;
9968 common->then_trap = save_then_trap;
9969 common->accept_label = save_accept_label;
9970 common->positive_assertion_quit = save_positive_assertion_quit;
9971 common->accept = save_accept;
9972 return cc + 1 + LINK_SIZE;
9973 }
9974 
match_once_common(compiler_common * common,PCRE2_UCHAR ket,int framesize,int private_data_ptr,BOOL has_alternatives,BOOL needs_control_head)9975 static SLJIT_INLINE void match_once_common(compiler_common *common, PCRE2_UCHAR ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
9976 {
9977 DEFINE_COMPILER;
9978 int stacksize;
9979 
9980 if (framesize < 0)
9981   {
9982   if (framesize == no_frame)
9983     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9984   else
9985     {
9986     stacksize = needs_control_head ? 1 : 0;
9987     if (ket != OP_KET || has_alternatives)
9988       stacksize++;
9989 
9990     if (stacksize > 0)
9991       free_stack(common, stacksize);
9992     }
9993 
9994   if (needs_control_head)
9995     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1));
9996 
9997   /* TMP2 which is set here used by OP_KETRMAX below. */
9998   if (ket == OP_KETRMAX)
9999     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10000   else if (ket == OP_KETRMIN)
10001     {
10002     /* Move the STR_PTR to the private_data_ptr. */
10003     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10004     }
10005   }
10006 else
10007   {
10008   stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
10009   OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
10010   if (needs_control_head)
10011     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10012 
10013   if (ket == OP_KETRMAX)
10014     {
10015     /* TMP2 which is set here used by OP_KETRMAX below. */
10016     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10017     }
10018   }
10019 if (needs_control_head)
10020   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
10021 }
10022 
match_capture_common(compiler_common * common,int stacksize,int offset,int private_data_ptr)10023 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
10024 {
10025 DEFINE_COMPILER;
10026 
10027 if (common->capture_last_ptr != 0)
10028   {
10029   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
10030   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10031   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10032   stacksize++;
10033   }
10034 if (common->optimized_cbracket[offset >> 1] == 0)
10035   {
10036   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
10037   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
10038   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10039   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10040   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
10041   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10042   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10043   stacksize += 2;
10044   }
10045 return stacksize;
10046 }
10047 
do_script_run(PCRE2_SPTR ptr,PCRE2_SPTR endptr)10048 static PCRE2_SPTR SLJIT_FUNC do_script_run(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10049 {
10050   if (PRIV(script_run)(ptr, endptr, FALSE))
10051     return endptr;
10052   return NULL;
10053 }
10054 
10055 #ifdef SUPPORT_UNICODE
10056 
do_script_run_utf(PCRE2_SPTR ptr,PCRE2_SPTR endptr)10057 static PCRE2_SPTR SLJIT_FUNC do_script_run_utf(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10058 {
10059   if (PRIV(script_run)(ptr, endptr, TRUE))
10060     return endptr;
10061   return NULL;
10062 }
10063 
10064 #endif /* SUPPORT_UNICODE */
10065 
match_script_run_common(compiler_common * common,int private_data_ptr,backtrack_common * parent)10066 static SLJIT_INLINE void match_script_run_common(compiler_common *common, int private_data_ptr, backtrack_common *parent)
10067 {
10068 DEFINE_COMPILER;
10069 
10070 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
10071 
10072 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10073 #ifdef SUPPORT_UNICODE
10074 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
10075   common->utf ? SLJIT_FUNC_OFFSET(do_script_run_utf) : SLJIT_FUNC_OFFSET(do_script_run));
10076 #else
10077 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_script_run));
10078 #endif
10079 
10080 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
10081 add_jump(compiler, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
10082 }
10083 
10084 /*
10085   Handling bracketed expressions is probably the most complex part.
10086 
10087   Stack layout naming characters:
10088     S - Push the current STR_PTR
10089     0 - Push a 0 (NULL)
10090     A - Push the current STR_PTR. Needed for restoring the STR_PTR
10091         before the next alternative. Not pushed if there are no alternatives.
10092     M - Any values pushed by the current alternative. Can be empty, or anything.
10093     C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
10094     L - Push the previous local (pointed by localptr) to the stack
10095    () - opional values stored on the stack
10096   ()* - optonal, can be stored multiple times
10097 
10098   The following list shows the regular expression templates, their PCRE byte codes
10099   and stack layout supported by pcre-sljit.
10100 
10101   (?:)                     OP_BRA     | OP_KET                A M
10102   ()                       OP_CBRA    | OP_KET                C M
10103   (?:)+                    OP_BRA     | OP_KETRMAX        0   A M S   ( A M S )*
10104                            OP_SBRA    | OP_KETRMAX        0   L M S   ( L M S )*
10105   (?:)+?                   OP_BRA     | OP_KETRMIN        0   A M S   ( A M S )*
10106                            OP_SBRA    | OP_KETRMIN        0   L M S   ( L M S )*
10107   ()+                      OP_CBRA    | OP_KETRMAX        0   C M S   ( C M S )*
10108                            OP_SCBRA   | OP_KETRMAX        0   C M S   ( C M S )*
10109   ()+?                     OP_CBRA    | OP_KETRMIN        0   C M S   ( C M S )*
10110                            OP_SCBRA   | OP_KETRMIN        0   C M S   ( C M S )*
10111   (?:)?    OP_BRAZERO    | OP_BRA     | OP_KET            S ( A M 0 )
10112   (?:)??   OP_BRAMINZERO | OP_BRA     | OP_KET            S ( A M 0 )
10113   ()?      OP_BRAZERO    | OP_CBRA    | OP_KET            S ( C M 0 )
10114   ()??     OP_BRAMINZERO | OP_CBRA    | OP_KET            S ( C M 0 )
10115   (?:)*    OP_BRAZERO    | OP_BRA     | OP_KETRMAX      S 0 ( A M S )*
10116            OP_BRAZERO    | OP_SBRA    | OP_KETRMAX      S 0 ( L M S )*
10117   (?:)*?   OP_BRAMINZERO | OP_BRA     | OP_KETRMIN      S 0 ( A M S )*
10118            OP_BRAMINZERO | OP_SBRA    | OP_KETRMIN      S 0 ( L M S )*
10119   ()*      OP_BRAZERO    | OP_CBRA    | OP_KETRMAX      S 0 ( C M S )*
10120            OP_BRAZERO    | OP_SCBRA   | OP_KETRMAX      S 0 ( C M S )*
10121   ()*?     OP_BRAMINZERO | OP_CBRA    | OP_KETRMIN      S 0 ( C M S )*
10122            OP_BRAMINZERO | OP_SCBRA   | OP_KETRMIN      S 0 ( C M S )*
10123 
10124 
10125   Stack layout naming characters:
10126     A - Push the alternative index (starting from 0) on the stack.
10127         Not pushed if there is no alternatives.
10128     M - Any values pushed by the current alternative. Can be empty, or anything.
10129 
10130   The next list shows the possible content of a bracket:
10131   (|)     OP_*BRA    | OP_ALT ...         M A
10132   (?()|)  OP_*COND   | OP_ALT             M A
10133   (?>|)   OP_ONCE    | OP_ALT ...         [stack trace] M A
10134                                           Or nothing, if trace is unnecessary
10135 */
10136 
compile_bracket_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)10137 static PCRE2_SPTR compile_bracket_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10138 {
10139 DEFINE_COMPILER;
10140 backtrack_common *backtrack;
10141 PCRE2_UCHAR opcode;
10142 int private_data_ptr = 0;
10143 int offset = 0;
10144 int i, stacksize;
10145 int repeat_ptr = 0, repeat_length = 0;
10146 int repeat_type = 0, repeat_count = 0;
10147 PCRE2_SPTR ccbegin;
10148 PCRE2_SPTR matchingpath;
10149 PCRE2_SPTR slot;
10150 PCRE2_UCHAR bra = OP_BRA;
10151 PCRE2_UCHAR ket;
10152 assert_backtrack *assert;
10153 BOOL has_alternatives;
10154 BOOL needs_control_head = FALSE;
10155 struct sljit_jump *jump;
10156 struct sljit_jump *skip;
10157 struct sljit_label *rmax_label = NULL;
10158 struct sljit_jump *braminzero = NULL;
10159 
10160 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
10161 
10162 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
10163   {
10164   bra = *cc;
10165   cc++;
10166   opcode = *cc;
10167   }
10168 
10169 opcode = *cc;
10170 ccbegin = cc;
10171 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
10172 ket = *matchingpath;
10173 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
10174   {
10175   repeat_ptr = PRIVATE_DATA(matchingpath);
10176   repeat_length = PRIVATE_DATA(matchingpath + 1);
10177   repeat_type = PRIVATE_DATA(matchingpath + 2);
10178   repeat_count = PRIVATE_DATA(matchingpath + 3);
10179   SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
10180   if (repeat_type == OP_UPTO)
10181     ket = OP_KETRMAX;
10182   if (repeat_type == OP_MINUPTO)
10183     ket = OP_KETRMIN;
10184   }
10185 
10186 matchingpath = ccbegin + 1 + LINK_SIZE;
10187 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
10188 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
10189 cc += GET(cc, 1);
10190 
10191 has_alternatives = *cc == OP_ALT;
10192 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
10193   {
10194   SLJIT_COMPILE_ASSERT(OP_DNRREF == OP_RREF + 1 && OP_FALSE == OP_RREF + 2 && OP_TRUE == OP_RREF + 3,
10195     compile_time_checks_must_be_grouped_together);
10196   has_alternatives = ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL) ? FALSE : TRUE;
10197   }
10198 
10199 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
10200   opcode = OP_SCOND;
10201 
10202 if (opcode == OP_CBRA || opcode == OP_SCBRA)
10203   {
10204   /* Capturing brackets has a pre-allocated space. */
10205   offset = GET2(ccbegin, 1 + LINK_SIZE);
10206   if (common->optimized_cbracket[offset] == 0)
10207     {
10208     private_data_ptr = OVECTOR_PRIV(offset);
10209     offset <<= 1;
10210     }
10211   else
10212     {
10213     offset <<= 1;
10214     private_data_ptr = OVECTOR(offset);
10215     }
10216   BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10217   matchingpath += IMM2_SIZE;
10218   }
10219 else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_ONCE || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
10220   {
10221   /* Other brackets simply allocate the next entry. */
10222   private_data_ptr = PRIVATE_DATA(ccbegin);
10223   SLJIT_ASSERT(private_data_ptr != 0);
10224   BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10225   if (opcode == OP_ONCE)
10226     BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
10227   }
10228 
10229 /* Instructions before the first alternative. */
10230 stacksize = 0;
10231 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10232   stacksize++;
10233 if (bra == OP_BRAZERO)
10234   stacksize++;
10235 
10236 if (stacksize > 0)
10237   allocate_stack(common, stacksize);
10238 
10239 stacksize = 0;
10240 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10241   {
10242   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10243   stacksize++;
10244   }
10245 
10246 if (bra == OP_BRAZERO)
10247   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10248 
10249 if (bra == OP_BRAMINZERO)
10250   {
10251   /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
10252   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10253   if (ket != OP_KETRMIN)
10254     {
10255     free_stack(common, 1);
10256     braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10257     }
10258   else if (opcode == OP_ONCE || opcode >= OP_SBRA)
10259     {
10260     jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10261     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10262     /* Nothing stored during the first run. */
10263     skip = JUMP(SLJIT_JUMP);
10264     JUMPHERE(jump);
10265     /* Checking zero-length iteration. */
10266     if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10267       {
10268       /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
10269       braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10270       }
10271     else
10272       {
10273       /* Except when the whole stack frame must be saved. */
10274       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10275       braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));
10276       }
10277     JUMPHERE(skip);
10278     }
10279   else
10280     {
10281     jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10282     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10283     JUMPHERE(jump);
10284     }
10285   }
10286 
10287 if (repeat_type != 0)
10288   {
10289   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
10290   if (repeat_type == OP_EXACT)
10291     rmax_label = LABEL();
10292   }
10293 
10294 if (ket == OP_KETRMIN)
10295   BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10296 
10297 if (ket == OP_KETRMAX)
10298   {
10299   rmax_label = LABEL();
10300   if (has_alternatives && opcode >= OP_BRA && opcode < OP_SBRA && repeat_type == 0)
10301     BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
10302   }
10303 
10304 /* Handling capturing brackets and alternatives. */
10305 if (opcode == OP_ONCE)
10306   {
10307   stacksize = 0;
10308   if (needs_control_head)
10309     {
10310     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10311     stacksize++;
10312     }
10313 
10314   if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10315     {
10316     /* Neither capturing brackets nor recursions are found in the block. */
10317     if (ket == OP_KETRMIN)
10318       {
10319       stacksize += 2;
10320       if (!needs_control_head)
10321         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10322       }
10323     else
10324       {
10325       if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
10326         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10327       if (ket == OP_KETRMAX || has_alternatives)
10328         stacksize++;
10329       }
10330 
10331     if (stacksize > 0)
10332       allocate_stack(common, stacksize);
10333 
10334     stacksize = 0;
10335     if (needs_control_head)
10336       {
10337       stacksize++;
10338       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10339       }
10340 
10341     if (ket == OP_KETRMIN)
10342       {
10343       if (needs_control_head)
10344         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10345       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10346       if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
10347         OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
10348       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
10349       }
10350     else if (ket == OP_KETRMAX || has_alternatives)
10351       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10352     }
10353   else
10354     {
10355     if (ket != OP_KET || has_alternatives)
10356       stacksize++;
10357 
10358     stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
10359     allocate_stack(common, stacksize);
10360 
10361     if (needs_control_head)
10362       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10363 
10364     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10365     OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10366 
10367     stacksize = needs_control_head ? 1 : 0;
10368     if (ket != OP_KET || has_alternatives)
10369       {
10370       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10371       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10372       stacksize++;
10373       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10374       }
10375     else
10376       {
10377       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10378       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10379       }
10380     init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1);
10381     }
10382   }
10383 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
10384   {
10385   /* Saving the previous values. */
10386   if (common->optimized_cbracket[offset >> 1] != 0)
10387     {
10388     SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
10389     allocate_stack(common, 2);
10390     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10391     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
10392     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10393     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
10394     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
10395     }
10396   else
10397     {
10398     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10399     allocate_stack(common, 1);
10400     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10401     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10402     }
10403   }
10404 else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
10405   {
10406   /* Saving the previous value. */
10407   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10408   allocate_stack(common, 1);
10409   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10410   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10411   }
10412 else if (has_alternatives)
10413   {
10414   /* Pushing the starting string pointer. */
10415   allocate_stack(common, 1);
10416   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10417   }
10418 
10419 /* Generating code for the first alternative. */
10420 if (opcode == OP_COND || opcode == OP_SCOND)
10421   {
10422   if (*matchingpath == OP_CREF)
10423     {
10424     SLJIT_ASSERT(has_alternatives);
10425     add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
10426       CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
10427     matchingpath += 1 + IMM2_SIZE;
10428     }
10429   else if (*matchingpath == OP_DNCREF)
10430     {
10431     SLJIT_ASSERT(has_alternatives);
10432 
10433     i = GET2(matchingpath, 1 + IMM2_SIZE);
10434     slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
10435     OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
10436     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
10437     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
10438     slot += common->name_entry_size;
10439     i--;
10440     while (i-- > 0)
10441       {
10442       OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
10443       OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);
10444       slot += common->name_entry_size;
10445       }
10446     OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
10447     add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
10448     matchingpath += 1 + 2 * IMM2_SIZE;
10449     }
10450   else if ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL)
10451     {
10452     /* Never has other case. */
10453     BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
10454     SLJIT_ASSERT(!has_alternatives);
10455 
10456     if (*matchingpath == OP_TRUE)
10457       {
10458       stacksize = 1;
10459       matchingpath++;
10460       }
10461     else if (*matchingpath == OP_FALSE || *matchingpath == OP_FAIL)
10462       stacksize = 0;
10463     else if (*matchingpath == OP_RREF)
10464       {
10465       stacksize = GET2(matchingpath, 1);
10466       if (common->currententry == NULL)
10467         stacksize = 0;
10468       else if (stacksize == RREF_ANY)
10469         stacksize = 1;
10470       else if (common->currententry->start == 0)
10471         stacksize = stacksize == 0;
10472       else
10473         stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
10474 
10475       if (stacksize != 0)
10476         matchingpath += 1 + IMM2_SIZE;
10477       }
10478     else
10479       {
10480       if (common->currententry == NULL || common->currententry->start == 0)
10481         stacksize = 0;
10482       else
10483         {
10484         stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
10485         slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
10486         i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
10487         while (stacksize > 0)
10488           {
10489           if ((int)GET2(slot, 0) == i)
10490             break;
10491           slot += common->name_entry_size;
10492           stacksize--;
10493           }
10494         }
10495 
10496       if (stacksize != 0)
10497         matchingpath += 1 + 2 * IMM2_SIZE;
10498       }
10499 
10500       /* The stacksize == 0 is a common "else" case. */
10501       if (stacksize == 0)
10502         {
10503         if (*cc == OP_ALT)
10504           {
10505           matchingpath = cc + 1 + LINK_SIZE;
10506           cc += GET(cc, 1);
10507           }
10508         else
10509           matchingpath = cc;
10510         }
10511     }
10512   else
10513     {
10514     SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
10515     /* Similar code as PUSH_BACKTRACK macro. */
10516     assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
10517     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10518       return NULL;
10519     memset(assert, 0, sizeof(assert_backtrack));
10520     assert->common.cc = matchingpath;
10521     BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
10522     matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
10523     }
10524   }
10525 
10526 compile_matchingpath(common, matchingpath, cc, backtrack);
10527 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10528   return NULL;
10529 
10530 if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA)
10531   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10532 
10533 if (opcode == OP_ONCE)
10534   match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
10535 
10536 if (opcode == OP_SCRIPT_RUN)
10537   match_script_run_common(common, private_data_ptr, backtrack);
10538 
10539 stacksize = 0;
10540 if (repeat_type == OP_MINUPTO)
10541   {
10542   /* We need to preserve the counter. TMP2 will be used below. */
10543   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10544   stacksize++;
10545   }
10546 if (ket != OP_KET || bra != OP_BRA)
10547   stacksize++;
10548 if (offset != 0)
10549   {
10550   if (common->capture_last_ptr != 0)
10551     stacksize++;
10552   if (common->optimized_cbracket[offset >> 1] == 0)
10553     stacksize += 2;
10554   }
10555 if (has_alternatives && opcode != OP_ONCE)
10556   stacksize++;
10557 
10558 if (stacksize > 0)
10559   allocate_stack(common, stacksize);
10560 
10561 stacksize = 0;
10562 if (repeat_type == OP_MINUPTO)
10563   {
10564   /* TMP2 was set above. */
10565   OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
10566   stacksize++;
10567   }
10568 
10569 if (ket != OP_KET || bra != OP_BRA)
10570   {
10571   if (ket != OP_KET)
10572     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10573   else
10574     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10575   stacksize++;
10576   }
10577 
10578 if (offset != 0)
10579   stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
10580 
10581 /* Skip and count the other alternatives. */
10582 i = 1;
10583 while (*cc == OP_ALT)
10584   {
10585   cc += GET(cc, 1);
10586   i++;
10587   }
10588 
10589 if (has_alternatives)
10590   {
10591   if (opcode != OP_ONCE)
10592     {
10593     if (i <= 3)
10594       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10595     else
10596       BACKTRACK_AS(bracket_backtrack)->u.matching_put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
10597     }
10598   if (ket != OP_KETRMAX)
10599     BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10600   }
10601 
10602 /* Must be after the matchingpath label. */
10603 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
10604   {
10605   SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
10606   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10607   }
10608 
10609 if (ket == OP_KETRMAX)
10610   {
10611   if (repeat_type != 0)
10612     {
10613     if (has_alternatives)
10614       BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10615     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10616     JUMPTO(SLJIT_NOT_ZERO, rmax_label);
10617     /* Drop STR_PTR for greedy plus quantifier. */
10618     if (opcode != OP_ONCE)
10619       free_stack(common, 1);
10620     }
10621   else if (opcode < OP_BRA || opcode >= OP_SBRA)
10622     {
10623     if (has_alternatives)
10624       BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10625 
10626     /* Checking zero-length iteration. */
10627     if (opcode != OP_ONCE)
10628       {
10629       /* This case includes opcodes such as OP_SCRIPT_RUN. */
10630       CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
10631       /* Drop STR_PTR for greedy plus quantifier. */
10632       if (bra != OP_BRAZERO)
10633         free_stack(common, 1);
10634       }
10635     else
10636       /* TMP2 must contain the starting STR_PTR. */
10637       CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
10638     }
10639   else
10640     JUMPTO(SLJIT_JUMP, rmax_label);
10641   BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10642   }
10643 
10644 if (repeat_type == OP_EXACT)
10645   {
10646   count_match(common);
10647   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10648   JUMPTO(SLJIT_NOT_ZERO, rmax_label);
10649   }
10650 else if (repeat_type == OP_UPTO)
10651   {
10652   /* We need to preserve the counter. */
10653   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10654   allocate_stack(common, 1);
10655   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10656   }
10657 
10658 if (bra == OP_BRAZERO)
10659   BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
10660 
10661 if (bra == OP_BRAMINZERO)
10662   {
10663   /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
10664   JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
10665   if (braminzero != NULL)
10666     {
10667     JUMPHERE(braminzero);
10668     /* We need to release the end pointer to perform the
10669     backtrack for the zero-length iteration. When
10670     framesize is < 0, OP_ONCE will do the release itself. */
10671     if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
10672       {
10673       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10674       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10675       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
10676       }
10677     else if (ket == OP_KETRMIN && opcode != OP_ONCE)
10678       free_stack(common, 1);
10679     }
10680   /* Continue to the normal backtrack. */
10681   }
10682 
10683 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
10684   count_match(common);
10685 
10686 cc += 1 + LINK_SIZE;
10687 
10688 if (opcode == OP_ONCE)
10689   {
10690   /* We temporarily encode the needs_control_head in the lowest bit.
10691      Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns
10692      the same value for small signed numbers (including negative numbers). */
10693   BACKTRACK_AS(bracket_backtrack)->u.framesize = (int)((unsigned)BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
10694   }
10695 return cc + repeat_length;
10696 }
10697 
compile_bracketpos_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)10698 static PCRE2_SPTR compile_bracketpos_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10699 {
10700 DEFINE_COMPILER;
10701 backtrack_common *backtrack;
10702 PCRE2_UCHAR opcode;
10703 int private_data_ptr;
10704 int cbraprivptr = 0;
10705 BOOL needs_control_head;
10706 int framesize;
10707 int stacksize;
10708 int offset = 0;
10709 BOOL zero = FALSE;
10710 PCRE2_SPTR ccbegin = NULL;
10711 int stack; /* Also contains the offset of control head. */
10712 struct sljit_label *loop = NULL;
10713 struct jump_list *emptymatch = NULL;
10714 
10715 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
10716 if (*cc == OP_BRAPOSZERO)
10717   {
10718   zero = TRUE;
10719   cc++;
10720   }
10721 
10722 opcode = *cc;
10723 private_data_ptr = PRIVATE_DATA(cc);
10724 SLJIT_ASSERT(private_data_ptr != 0);
10725 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
10726 switch(opcode)
10727   {
10728   case OP_BRAPOS:
10729   case OP_SBRAPOS:
10730   ccbegin = cc + 1 + LINK_SIZE;
10731   break;
10732 
10733   case OP_CBRAPOS:
10734   case OP_SCBRAPOS:
10735   offset = GET2(cc, 1 + LINK_SIZE);
10736   /* This case cannot be optimized in the same was as
10737   normal capturing brackets. */
10738   SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
10739   cbraprivptr = OVECTOR_PRIV(offset);
10740   offset <<= 1;
10741   ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
10742   break;
10743 
10744   default:
10745   SLJIT_UNREACHABLE();
10746   break;
10747   }
10748 
10749 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
10750 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
10751 if (framesize < 0)
10752   {
10753   if (offset != 0)
10754     {
10755     stacksize = 2;
10756     if (common->capture_last_ptr != 0)
10757       stacksize++;
10758     }
10759   else
10760     stacksize = 1;
10761 
10762   if (needs_control_head)
10763     stacksize++;
10764   if (!zero)
10765     stacksize++;
10766 
10767   BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
10768   allocate_stack(common, stacksize);
10769   if (framesize == no_frame)
10770     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10771 
10772   stack = 0;
10773   if (offset != 0)
10774     {
10775     stack = 2;
10776     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
10777     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
10778     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
10779     if (common->capture_last_ptr != 0)
10780       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
10781     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
10782     if (needs_control_head)
10783       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10784     if (common->capture_last_ptr != 0)
10785       {
10786       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
10787       stack = 3;
10788       }
10789     }
10790   else
10791     {
10792     if (needs_control_head)
10793       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10794     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10795     stack = 1;
10796     }
10797 
10798   if (needs_control_head)
10799     stack++;
10800   if (!zero)
10801     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
10802   if (needs_control_head)
10803     {
10804     stack--;
10805     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
10806     }
10807   }
10808 else
10809   {
10810   stacksize = framesize + 1;
10811   if (!zero)
10812     stacksize++;
10813   if (needs_control_head)
10814     stacksize++;
10815   if (offset == 0)
10816     stacksize++;
10817   BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
10818 
10819   allocate_stack(common, stacksize);
10820   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10821   if (needs_control_head)
10822     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10823   OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10824 
10825   stack = 0;
10826   if (!zero)
10827     {
10828     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
10829     stack = 1;
10830     }
10831   if (needs_control_head)
10832     {
10833     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
10834     stack++;
10835     }
10836   if (offset == 0)
10837     {
10838     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
10839     stack++;
10840     }
10841   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
10842   init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize);
10843   stack -= 1 + (offset == 0);
10844   }
10845 
10846 if (offset != 0)
10847   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
10848 
10849 loop = LABEL();
10850 while (*cc != OP_KETRPOS)
10851   {
10852   backtrack->top = NULL;
10853   backtrack->topbacktracks = NULL;
10854   cc += GET(cc, 1);
10855 
10856   compile_matchingpath(common, ccbegin, cc, backtrack);
10857   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10858     return NULL;
10859 
10860   if (framesize < 0)
10861     {
10862     if (framesize == no_frame)
10863       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10864 
10865     if (offset != 0)
10866       {
10867       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
10868       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10869       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
10870       if (common->capture_last_ptr != 0)
10871         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10872       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10873       }
10874     else
10875       {
10876       if (opcode == OP_SBRAPOS)
10877         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10878       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10879       }
10880 
10881     /* Even if the match is empty, we need to reset the control head. */
10882     if (needs_control_head)
10883       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
10884 
10885     if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
10886       add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
10887 
10888     if (!zero)
10889       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
10890     }
10891   else
10892     {
10893     if (offset != 0)
10894       {
10895       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10896       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
10897       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10898       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
10899       if (common->capture_last_ptr != 0)
10900         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10901       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10902       }
10903     else
10904       {
10905       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10906       OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10907       if (opcode == OP_SBRAPOS)
10908         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
10909       OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0);
10910       }
10911 
10912     /* Even if the match is empty, we need to reset the control head. */
10913     if (needs_control_head)
10914       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
10915 
10916     if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
10917       add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
10918 
10919     if (!zero)
10920       {
10921       if (framesize < 0)
10922         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
10923       else
10924         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10925       }
10926     }
10927 
10928   JUMPTO(SLJIT_JUMP, loop);
10929   flush_stubs(common);
10930 
10931   compile_backtrackingpath(common, backtrack->top);
10932   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10933     return NULL;
10934   set_jumps(backtrack->topbacktracks, LABEL());
10935 
10936   if (framesize < 0)
10937     {
10938     if (offset != 0)
10939       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
10940     else
10941       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10942     }
10943   else
10944     {
10945     if (offset != 0)
10946       {
10947       /* Last alternative. */
10948       if (*cc == OP_KETRPOS)
10949         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10950       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
10951       }
10952     else
10953       {
10954       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10955       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
10956       }
10957     }
10958 
10959   if (*cc == OP_KETRPOS)
10960     break;
10961   ccbegin = cc + 1 + LINK_SIZE;
10962   }
10963 
10964 /* We don't have to restore the control head in case of a failed match. */
10965 
10966 backtrack->topbacktracks = NULL;
10967 if (!zero)
10968   {
10969   if (framesize < 0)
10970     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
10971   else /* TMP2 is set to [private_data_ptr] above. */
10972     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));
10973   }
10974 
10975 /* None of them matched. */
10976 set_jumps(emptymatch, LABEL());
10977 count_match(common);
10978 return cc + 1 + LINK_SIZE;
10979 }
10980 
get_iterator_parameters(compiler_common * common,PCRE2_SPTR cc,PCRE2_UCHAR * opcode,PCRE2_UCHAR * type,sljit_u32 * max,sljit_u32 * exact,PCRE2_SPTR * end)10981 static SLJIT_INLINE PCRE2_SPTR get_iterator_parameters(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *opcode, PCRE2_UCHAR *type, sljit_u32 *max, sljit_u32 *exact, PCRE2_SPTR *end)
10982 {
10983 int class_len;
10984 
10985 *opcode = *cc;
10986 *exact = 0;
10987 
10988 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
10989   {
10990   cc++;
10991   *type = OP_CHAR;
10992   }
10993 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
10994   {
10995   cc++;
10996   *type = OP_CHARI;
10997   *opcode -= OP_STARI - OP_STAR;
10998   }
10999 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
11000   {
11001   cc++;
11002   *type = OP_NOT;
11003   *opcode -= OP_NOTSTAR - OP_STAR;
11004   }
11005 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
11006   {
11007   cc++;
11008   *type = OP_NOTI;
11009   *opcode -= OP_NOTSTARI - OP_STAR;
11010   }
11011 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
11012   {
11013   cc++;
11014   *opcode -= OP_TYPESTAR - OP_STAR;
11015   *type = OP_END;
11016   }
11017 else
11018   {
11019   SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
11020   *type = *opcode;
11021   cc++;
11022   class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(PCRE2_UCHAR))) : GET(cc, 0);
11023   *opcode = cc[class_len - 1];
11024 
11025   if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
11026     {
11027     *opcode -= OP_CRSTAR - OP_STAR;
11028     *end = cc + class_len;
11029 
11030     if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
11031       {
11032       *exact = 1;
11033       *opcode -= OP_PLUS - OP_STAR;
11034       }
11035     }
11036   else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
11037     {
11038     *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
11039     *end = cc + class_len;
11040 
11041     if (*opcode == OP_POSPLUS)
11042       {
11043       *exact = 1;
11044       *opcode = OP_POSSTAR;
11045       }
11046     }
11047   else
11048     {
11049     SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
11050     *max = GET2(cc, (class_len + IMM2_SIZE));
11051     *exact = GET2(cc, class_len);
11052 
11053     if (*max == 0)
11054       {
11055       if (*opcode == OP_CRPOSRANGE)
11056         *opcode = OP_POSSTAR;
11057       else
11058         *opcode -= OP_CRRANGE - OP_STAR;
11059       }
11060     else
11061       {
11062       *max -= *exact;
11063       if (*max == 0)
11064         *opcode = OP_EXACT;
11065       else if (*max == 1)
11066         {
11067         if (*opcode == OP_CRPOSRANGE)
11068           *opcode = OP_POSQUERY;
11069         else
11070           *opcode -= OP_CRRANGE - OP_QUERY;
11071         }
11072       else
11073         {
11074         if (*opcode == OP_CRPOSRANGE)
11075           *opcode = OP_POSUPTO;
11076         else
11077           *opcode -= OP_CRRANGE - OP_UPTO;
11078         }
11079       }
11080     *end = cc + class_len + 2 * IMM2_SIZE;
11081     }
11082   return cc;
11083   }
11084 
11085 switch(*opcode)
11086   {
11087   case OP_EXACT:
11088   *exact = GET2(cc, 0);
11089   cc += IMM2_SIZE;
11090   break;
11091 
11092   case OP_PLUS:
11093   case OP_MINPLUS:
11094   *exact = 1;
11095   *opcode -= OP_PLUS - OP_STAR;
11096   break;
11097 
11098   case OP_POSPLUS:
11099   *exact = 1;
11100   *opcode = OP_POSSTAR;
11101   break;
11102 
11103   case OP_UPTO:
11104   case OP_MINUPTO:
11105   case OP_POSUPTO:
11106   *max = GET2(cc, 0);
11107   cc += IMM2_SIZE;
11108   break;
11109   }
11110 
11111 if (*type == OP_END)
11112   {
11113   *type = *cc;
11114   *end = next_opcode(common, cc);
11115   cc++;
11116   return cc;
11117   }
11118 
11119 *end = cc + 1;
11120 #ifdef SUPPORT_UNICODE
11121 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
11122 #endif
11123 return cc;
11124 }
11125 
compile_iterator_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11126 static PCRE2_SPTR compile_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11127 {
11128 DEFINE_COMPILER;
11129 backtrack_common *backtrack;
11130 PCRE2_UCHAR opcode;
11131 PCRE2_UCHAR type;
11132 sljit_u32 max = 0, exact;
11133 sljit_s32 early_fail_ptr = PRIVATE_DATA(cc + 1);
11134 sljit_s32 early_fail_type;
11135 BOOL charpos_enabled;
11136 PCRE2_UCHAR charpos_char;
11137 unsigned int charpos_othercasebit;
11138 PCRE2_SPTR end;
11139 jump_list *no_match = NULL;
11140 jump_list *no_char1_match = NULL;
11141 struct sljit_jump *jump = NULL;
11142 struct sljit_label *label;
11143 int private_data_ptr = PRIVATE_DATA(cc);
11144 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
11145 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
11146 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
11147 int tmp_base, tmp_offset;
11148 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11149 BOOL use_tmp;
11150 #endif
11151 
11152 PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL);
11153 
11154 early_fail_type = (early_fail_ptr & 0x7);
11155 early_fail_ptr >>= 3;
11156 
11157 /* During recursion, these optimizations are disabled. */
11158 if (common->early_fail_start_ptr == 0 && common->fast_forward_bc_ptr == NULL)
11159   {
11160   early_fail_ptr = 0;
11161   early_fail_type = type_skip;
11162   }
11163 
11164 SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || early_fail_ptr == 0
11165   || (early_fail_ptr >= common->early_fail_start_ptr && early_fail_ptr <= common->early_fail_end_ptr));
11166 
11167 if (early_fail_type == type_fail)
11168   add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr));
11169 
11170 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
11171 
11172 if (type != OP_EXTUNI)
11173   {
11174   tmp_base = TMP3;
11175   tmp_offset = 0;
11176   }
11177 else
11178   {
11179   tmp_base = SLJIT_MEM1(SLJIT_SP);
11180   tmp_offset = POSSESSIVE0;
11181   }
11182 
11183 /* Handle fixed part first. */
11184 if (exact > 1)
11185   {
11186   SLJIT_ASSERT(early_fail_ptr == 0);
11187 
11188   if (common->mode == PCRE2_JIT_COMPLETE
11189 #ifdef SUPPORT_UNICODE
11190       && !common->utf
11191 #endif
11192       && type != OP_ANYNL && type != OP_EXTUNI)
11193     {
11194     OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
11195     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
11196     OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11197     label = LABEL();
11198     compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
11199     OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11200     JUMPTO(SLJIT_NOT_ZERO, label);
11201     }
11202   else
11203     {
11204     OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11205     label = LABEL();
11206     compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
11207     OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11208     JUMPTO(SLJIT_NOT_ZERO, label);
11209     }
11210   }
11211 else if (exact == 1)
11212   {
11213   compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
11214 
11215   if (early_fail_type == type_fail_range)
11216     {
11217     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr);
11218     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw));
11219     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0);
11220     OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0);
11221     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0));
11222 
11223     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw), STR_PTR, 0);
11224     }
11225   }
11226 
11227 switch(opcode)
11228   {
11229   case OP_STAR:
11230   case OP_UPTO:
11231   SLJIT_ASSERT(early_fail_ptr == 0 || opcode == OP_STAR);
11232 
11233   if (type == OP_ANYNL || type == OP_EXTUNI)
11234     {
11235     SLJIT_ASSERT(private_data_ptr == 0);
11236     SLJIT_ASSERT(early_fail_ptr == 0);
11237 
11238     allocate_stack(common, 2);
11239     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11240     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
11241 
11242     if (opcode == OP_UPTO)
11243       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max);
11244 
11245     label = LABEL();
11246     compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
11247     if (opcode == OP_UPTO)
11248       {
11249       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
11250       OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
11251       jump = JUMP(SLJIT_ZERO);
11252       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
11253       }
11254 
11255     /* We cannot use TMP3 because of allocate_stack. */
11256     allocate_stack(common, 1);
11257     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11258     JUMPTO(SLJIT_JUMP, label);
11259     if (jump != NULL)
11260       JUMPHERE(jump);
11261     BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11262     break;
11263     }
11264 #ifdef SUPPORT_UNICODE
11265   else if (type == OP_ALLANY && !common->invalid_utf)
11266 #else
11267   else if (type == OP_ALLANY)
11268 #endif
11269     {
11270     if (opcode == OP_STAR)
11271       {
11272       if (private_data_ptr == 0)
11273         allocate_stack(common, 2);
11274 
11275       OP1(SLJIT_MOV, base, offset0, STR_END, 0);
11276       OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11277 
11278       OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
11279       process_partial_match(common);
11280 
11281       if (early_fail_ptr != 0)
11282         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
11283       BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11284       break;
11285       }
11286 #ifdef SUPPORT_UNICODE
11287     else if (!common->utf)
11288 #else
11289     else
11290 #endif
11291       {
11292       if (private_data_ptr == 0)
11293         allocate_stack(common, 2);
11294 
11295       OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11296       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
11297 
11298       if (common->mode == PCRE2_JIT_COMPLETE)
11299         {
11300         OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
11301         CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
11302         }
11303       else
11304         {
11305         jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
11306         process_partial_match(common);
11307         JUMPHERE(jump);
11308         }
11309 
11310       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11311 
11312       if (early_fail_ptr != 0)
11313         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11314       BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11315       break;
11316       }
11317     }
11318 
11319   charpos_enabled = FALSE;
11320   charpos_char = 0;
11321   charpos_othercasebit = 0;
11322 
11323   if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
11324     {
11325 #ifdef SUPPORT_UNICODE
11326     charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
11327 #else
11328     charpos_enabled = TRUE;
11329 #endif
11330     if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
11331       {
11332       charpos_othercasebit = char_get_othercase_bit(common, end + 1);
11333       if (charpos_othercasebit == 0)
11334         charpos_enabled = FALSE;
11335       }
11336 
11337     if (charpos_enabled)
11338       {
11339       charpos_char = end[1];
11340       /* Consume the OP_CHAR opcode. */
11341       end += 2;
11342 #if PCRE2_CODE_UNIT_WIDTH == 8
11343       SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
11344 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
11345       SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
11346       if ((charpos_othercasebit & 0x100) != 0)
11347         charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
11348 #endif
11349       if (charpos_othercasebit != 0)
11350         charpos_char |= charpos_othercasebit;
11351 
11352       BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE;
11353       BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char;
11354       BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit;
11355       }
11356     }
11357 
11358   if (charpos_enabled)
11359     {
11360     if (opcode == OP_UPTO)
11361       OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1);
11362 
11363     /* Search the first instance of charpos_char. */
11364     jump = JUMP(SLJIT_JUMP);
11365     label = LABEL();
11366     if (opcode == OP_UPTO)
11367       {
11368       OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11369       add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO));
11370       }
11371     compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
11372     if (early_fail_ptr != 0)
11373       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11374     JUMPHERE(jump);
11375 
11376     detect_partial_match(common, &backtrack->topbacktracks);
11377     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
11378     if (charpos_othercasebit != 0)
11379       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
11380     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
11381 
11382     if (private_data_ptr == 0)
11383       allocate_stack(common, 2);
11384     OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11385     OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11386 
11387     if (opcode == OP_UPTO)
11388       {
11389       OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11390       add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11391       }
11392 
11393     /* Search the last instance of charpos_char. */
11394     label = LABEL();
11395     compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11396     if (early_fail_ptr != 0)
11397       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11398     detect_partial_match(common, &no_match);
11399     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
11400     if (charpos_othercasebit != 0)
11401       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
11402 
11403     if (opcode == OP_STAR)
11404       {
11405       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
11406       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11407       JUMPTO(SLJIT_JUMP, label);
11408       }
11409     else
11410       {
11411       jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char);
11412       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11413       JUMPHERE(jump);
11414       OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11415       JUMPTO(SLJIT_NOT_ZERO, label);
11416       }
11417 
11418     set_jumps(no_match, LABEL());
11419     OP2(SLJIT_ADD, STR_PTR, 0, base, offset0, SLJIT_IMM, IN_UCHARS(1));
11420     OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11421     }
11422   else
11423     {
11424     if (private_data_ptr == 0)
11425       allocate_stack(common, 2);
11426 
11427     OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11428 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11429     use_tmp = (!HAS_VIRTUAL_REGISTERS && opcode == OP_STAR);
11430     SLJIT_ASSERT(!use_tmp || tmp_base == TMP3);
11431 
11432     if (common->utf)
11433       OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
11434 #endif
11435     if (opcode == OP_UPTO)
11436       OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11437 
11438     detect_partial_match(common, &no_match);
11439     label = LABEL();
11440     compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11441 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11442     if (common->utf)
11443       OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
11444 #endif
11445 
11446     if (opcode == OP_UPTO)
11447       {
11448       OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11449       add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11450       }
11451 
11452     detect_partial_match_to(common, label);
11453     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11454 
11455     set_jumps(no_char1_match, LABEL());
11456 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11457     if (common->utf)
11458       {
11459       set_jumps(no_match, LABEL());
11460       if (use_tmp)
11461         {
11462         OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
11463         OP1(SLJIT_MOV, base, offset0, TMP3, 0);
11464         }
11465       else
11466         OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
11467       }
11468     else
11469 #endif
11470       {
11471       OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11472       set_jumps(no_match, LABEL());
11473       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11474       }
11475 
11476     if (early_fail_ptr != 0)
11477       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11478     }
11479 
11480   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11481   break;
11482 
11483   case OP_MINSTAR:
11484   if (private_data_ptr == 0)
11485     allocate_stack(common, 1);
11486   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11487   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11488   if (early_fail_ptr != 0)
11489     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11490   break;
11491 
11492   case OP_MINUPTO:
11493   SLJIT_ASSERT(early_fail_ptr == 0);
11494   if (private_data_ptr == 0)
11495     allocate_stack(common, 2);
11496   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11497   OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
11498   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11499   break;
11500 
11501   case OP_QUERY:
11502   case OP_MINQUERY:
11503   SLJIT_ASSERT(early_fail_ptr == 0);
11504   if (private_data_ptr == 0)
11505     allocate_stack(common, 1);
11506   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11507   if (opcode == OP_QUERY)
11508     compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
11509   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11510   break;
11511 
11512   case OP_EXACT:
11513   break;
11514 
11515   case OP_POSSTAR:
11516 #if defined SUPPORT_UNICODE
11517   if (type == OP_ALLANY && !common->invalid_utf)
11518 #else
11519   if (type == OP_ALLANY)
11520 #endif
11521     {
11522     OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
11523     process_partial_match(common);
11524     if (early_fail_ptr != 0)
11525       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
11526     break;
11527     }
11528 
11529 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11530   if (common->utf)
11531     {
11532     OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11533     detect_partial_match(common, &no_match);
11534     label = LABEL();
11535     compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11536     OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11537     detect_partial_match_to(common, label);
11538 
11539     set_jumps(no_match, LABEL());
11540     OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
11541     if (early_fail_ptr != 0)
11542       {
11543       if (!HAS_VIRTUAL_REGISTERS && tmp_base == TMP3)
11544         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, TMP3, 0);
11545       else
11546         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11547       }
11548     break;
11549     }
11550 #endif
11551 
11552   detect_partial_match(common, &no_match);
11553   label = LABEL();
11554   compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11555   detect_partial_match_to(common, label);
11556   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11557 
11558   set_jumps(no_char1_match, LABEL());
11559   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11560   set_jumps(no_match, LABEL());
11561   if (early_fail_ptr != 0)
11562     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11563   break;
11564 
11565   case OP_POSUPTO:
11566   SLJIT_ASSERT(early_fail_ptr == 0);
11567 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11568   if (common->utf)
11569     {
11570     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
11571     OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11572 
11573     detect_partial_match(common, &no_match);
11574     label = LABEL();
11575     compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11576     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
11577     OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11578     add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11579     detect_partial_match_to(common, label);
11580 
11581     set_jumps(no_match, LABEL());
11582     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
11583     break;
11584     }
11585 #endif
11586 
11587   if (type == OP_ALLANY)
11588     {
11589     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
11590 
11591     if (common->mode == PCRE2_JIT_COMPLETE)
11592       {
11593       OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
11594       CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
11595       }
11596     else
11597       {
11598       jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
11599       process_partial_match(common);
11600       JUMPHERE(jump);
11601       }
11602     break;
11603     }
11604 
11605   OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11606 
11607   detect_partial_match(common, &no_match);
11608   label = LABEL();
11609   compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11610   OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11611   add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11612   detect_partial_match_to(common, label);
11613   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11614 
11615   set_jumps(no_char1_match, LABEL());
11616   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11617   set_jumps(no_match, LABEL());
11618   break;
11619 
11620   case OP_POSQUERY:
11621   SLJIT_ASSERT(early_fail_ptr == 0);
11622   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11623   compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
11624   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11625   set_jumps(no_match, LABEL());
11626   OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
11627   break;
11628 
11629   default:
11630   SLJIT_UNREACHABLE();
11631   break;
11632   }
11633 
11634 count_match(common);
11635 return end;
11636 }
11637 
compile_fail_accept_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11638 static SLJIT_INLINE PCRE2_SPTR compile_fail_accept_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11639 {
11640 DEFINE_COMPILER;
11641 backtrack_common *backtrack;
11642 
11643 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
11644 
11645 if (*cc == OP_FAIL)
11646   {
11647   add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
11648   return cc + 1;
11649   }
11650 
11651 if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0)
11652   add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
11653 
11654 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
11655   {
11656   /* No need to check notempty conditions. */
11657   if (common->accept_label == NULL)
11658     add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
11659   else
11660     JUMPTO(SLJIT_JUMP, common->accept_label);
11661   return cc + 1;
11662   }
11663 
11664 if (common->accept_label == NULL)
11665   add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
11666 else
11667   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
11668 
11669 if (HAS_VIRTUAL_REGISTERS)
11670   {
11671   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11672   OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
11673   }
11674 else
11675   OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options));
11676 
11677 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
11678 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_NOT_ZERO));
11679 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
11680 if (common->accept_label == NULL)
11681   add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO));
11682 else
11683   JUMPTO(SLJIT_ZERO, common->accept_label);
11684 
11685 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
11686 if (common->accept_label == NULL)
11687   add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
11688 else
11689   CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
11690 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
11691 return cc + 1;
11692 }
11693 
compile_close_matchingpath(compiler_common * common,PCRE2_SPTR cc)11694 static SLJIT_INLINE PCRE2_SPTR compile_close_matchingpath(compiler_common *common, PCRE2_SPTR cc)
11695 {
11696 DEFINE_COMPILER;
11697 int offset = GET2(cc, 1);
11698 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
11699 
11700 /* Data will be discarded anyway... */
11701 if (common->currententry != NULL)
11702   return cc + 1 + IMM2_SIZE;
11703 
11704 if (!optimized_cbracket)
11705   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
11706 offset <<= 1;
11707 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11708 if (!optimized_cbracket)
11709   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11710 return cc + 1 + IMM2_SIZE;
11711 }
11712 
compile_control_verb_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11713 static SLJIT_INLINE PCRE2_SPTR compile_control_verb_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11714 {
11715 DEFINE_COMPILER;
11716 backtrack_common *backtrack;
11717 PCRE2_UCHAR opcode = *cc;
11718 PCRE2_SPTR ccend = cc + 1;
11719 
11720 if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG ||
11721     opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
11722   ccend += 2 + cc[1];
11723 
11724 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
11725 
11726 if (opcode == OP_SKIP)
11727   {
11728   allocate_stack(common, 1);
11729   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11730   return ccend;
11731   }
11732 
11733 if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
11734   {
11735   if (HAS_VIRTUAL_REGISTERS)
11736     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11737   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
11738   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
11739   OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
11740   }
11741 
11742 return ccend;
11743 }
11744 
11745 static PCRE2_UCHAR then_trap_opcode[1] = { OP_THEN_TRAP };
11746 
compile_then_trap_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,backtrack_common * parent)11747 static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
11748 {
11749 DEFINE_COMPILER;
11750 backtrack_common *backtrack;
11751 BOOL needs_control_head;
11752 int size;
11753 
11754 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
11755 common->then_trap = BACKTRACK_AS(then_trap_backtrack);
11756 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
11757 BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
11758 BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
11759 
11760 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
11761 size = 3 + (size < 0 ? 0 : size);
11762 
11763 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11764 allocate_stack(common, size);
11765 if (size > 3)
11766   OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
11767 else
11768   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
11769 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
11770 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
11771 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
11772 
11773 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
11774 if (size >= 0)
11775   init_frame(common, cc, ccend, size - 1, 0);
11776 }
11777 
compile_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,backtrack_common * parent)11778 static void compile_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
11779 {
11780 DEFINE_COMPILER;
11781 backtrack_common *backtrack;
11782 BOOL has_then_trap = FALSE;
11783 then_trap_backtrack *save_then_trap = NULL;
11784 
11785 SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
11786 
11787 if (common->has_then && common->then_offsets[cc - common->start] != 0)
11788   {
11789   SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
11790   has_then_trap = TRUE;
11791   save_then_trap = common->then_trap;
11792   /* Tail item on backtrack. */
11793   compile_then_trap_matchingpath(common, cc, ccend, parent);
11794   }
11795 
11796 while (cc < ccend)
11797   {
11798   switch(*cc)
11799     {
11800     case OP_SOD:
11801     case OP_SOM:
11802     case OP_NOT_WORD_BOUNDARY:
11803     case OP_WORD_BOUNDARY:
11804     case OP_EODN:
11805     case OP_EOD:
11806     case OP_DOLL:
11807     case OP_DOLLM:
11808     case OP_CIRC:
11809     case OP_CIRCM:
11810     case OP_REVERSE:
11811     cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
11812     break;
11813 
11814     case OP_NOT_DIGIT:
11815     case OP_DIGIT:
11816     case OP_NOT_WHITESPACE:
11817     case OP_WHITESPACE:
11818     case OP_NOT_WORDCHAR:
11819     case OP_WORDCHAR:
11820     case OP_ANY:
11821     case OP_ALLANY:
11822     case OP_ANYBYTE:
11823     case OP_NOTPROP:
11824     case OP_PROP:
11825     case OP_ANYNL:
11826     case OP_NOT_HSPACE:
11827     case OP_HSPACE:
11828     case OP_NOT_VSPACE:
11829     case OP_VSPACE:
11830     case OP_EXTUNI:
11831     case OP_NOT:
11832     case OP_NOTI:
11833     cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
11834     break;
11835 
11836     case OP_SET_SOM:
11837     PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
11838     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
11839     allocate_stack(common, 1);
11840     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
11841     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11842     cc++;
11843     break;
11844 
11845     case OP_CHAR:
11846     case OP_CHARI:
11847     if (common->mode == PCRE2_JIT_COMPLETE)
11848       cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
11849     else
11850       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
11851     break;
11852 
11853     case OP_STAR:
11854     case OP_MINSTAR:
11855     case OP_PLUS:
11856     case OP_MINPLUS:
11857     case OP_QUERY:
11858     case OP_MINQUERY:
11859     case OP_UPTO:
11860     case OP_MINUPTO:
11861     case OP_EXACT:
11862     case OP_POSSTAR:
11863     case OP_POSPLUS:
11864     case OP_POSQUERY:
11865     case OP_POSUPTO:
11866     case OP_STARI:
11867     case OP_MINSTARI:
11868     case OP_PLUSI:
11869     case OP_MINPLUSI:
11870     case OP_QUERYI:
11871     case OP_MINQUERYI:
11872     case OP_UPTOI:
11873     case OP_MINUPTOI:
11874     case OP_EXACTI:
11875     case OP_POSSTARI:
11876     case OP_POSPLUSI:
11877     case OP_POSQUERYI:
11878     case OP_POSUPTOI:
11879     case OP_NOTSTAR:
11880     case OP_NOTMINSTAR:
11881     case OP_NOTPLUS:
11882     case OP_NOTMINPLUS:
11883     case OP_NOTQUERY:
11884     case OP_NOTMINQUERY:
11885     case OP_NOTUPTO:
11886     case OP_NOTMINUPTO:
11887     case OP_NOTEXACT:
11888     case OP_NOTPOSSTAR:
11889     case OP_NOTPOSPLUS:
11890     case OP_NOTPOSQUERY:
11891     case OP_NOTPOSUPTO:
11892     case OP_NOTSTARI:
11893     case OP_NOTMINSTARI:
11894     case OP_NOTPLUSI:
11895     case OP_NOTMINPLUSI:
11896     case OP_NOTQUERYI:
11897     case OP_NOTMINQUERYI:
11898     case OP_NOTUPTOI:
11899     case OP_NOTMINUPTOI:
11900     case OP_NOTEXACTI:
11901     case OP_NOTPOSSTARI:
11902     case OP_NOTPOSPLUSI:
11903     case OP_NOTPOSQUERYI:
11904     case OP_NOTPOSUPTOI:
11905     case OP_TYPESTAR:
11906     case OP_TYPEMINSTAR:
11907     case OP_TYPEPLUS:
11908     case OP_TYPEMINPLUS:
11909     case OP_TYPEQUERY:
11910     case OP_TYPEMINQUERY:
11911     case OP_TYPEUPTO:
11912     case OP_TYPEMINUPTO:
11913     case OP_TYPEEXACT:
11914     case OP_TYPEPOSSTAR:
11915     case OP_TYPEPOSPLUS:
11916     case OP_TYPEPOSQUERY:
11917     case OP_TYPEPOSUPTO:
11918     cc = compile_iterator_matchingpath(common, cc, parent);
11919     break;
11920 
11921     case OP_CLASS:
11922     case OP_NCLASS:
11923     if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE)
11924       cc = compile_iterator_matchingpath(common, cc, parent);
11925     else
11926       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
11927     break;
11928 
11929 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
11930     case OP_XCLASS:
11931     if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
11932       cc = compile_iterator_matchingpath(common, cc, parent);
11933     else
11934       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
11935     break;
11936 #endif
11937 
11938     case OP_REF:
11939     case OP_REFI:
11940     if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
11941       cc = compile_ref_iterator_matchingpath(common, cc, parent);
11942     else
11943       {
11944       compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
11945       cc += 1 + IMM2_SIZE;
11946       }
11947     break;
11948 
11949     case OP_DNREF:
11950     case OP_DNREFI:
11951     if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
11952       cc = compile_ref_iterator_matchingpath(common, cc, parent);
11953     else
11954       {
11955       compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
11956       compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
11957       cc += 1 + 2 * IMM2_SIZE;
11958       }
11959     break;
11960 
11961     case OP_RECURSE:
11962     cc = compile_recurse_matchingpath(common, cc, parent);
11963     break;
11964 
11965     case OP_CALLOUT:
11966     case OP_CALLOUT_STR:
11967     cc = compile_callout_matchingpath(common, cc, parent);
11968     break;
11969 
11970     case OP_ASSERT:
11971     case OP_ASSERT_NOT:
11972     case OP_ASSERTBACK:
11973     case OP_ASSERTBACK_NOT:
11974     PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
11975     cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
11976     break;
11977 
11978     case OP_BRAMINZERO:
11979     PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
11980     cc = bracketend(cc + 1);
11981     if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
11982       {
11983       allocate_stack(common, 1);
11984       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11985       }
11986     else
11987       {
11988       allocate_stack(common, 2);
11989       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
11990       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
11991       }
11992     BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
11993     count_match(common);
11994     break;
11995 
11996     case OP_ASSERT_NA:
11997     case OP_ASSERTBACK_NA:
11998     case OP_ONCE:
11999     case OP_SCRIPT_RUN:
12000     case OP_BRA:
12001     case OP_CBRA:
12002     case OP_COND:
12003     case OP_SBRA:
12004     case OP_SCBRA:
12005     case OP_SCOND:
12006     cc = compile_bracket_matchingpath(common, cc, parent);
12007     break;
12008 
12009     case OP_BRAZERO:
12010     if (cc[1] > OP_ASSERTBACK_NOT)
12011       cc = compile_bracket_matchingpath(common, cc, parent);
12012     else
12013       {
12014       PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12015       cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12016       }
12017     break;
12018 
12019     case OP_BRAPOS:
12020     case OP_CBRAPOS:
12021     case OP_SBRAPOS:
12022     case OP_SCBRAPOS:
12023     case OP_BRAPOSZERO:
12024     cc = compile_bracketpos_matchingpath(common, cc, parent);
12025     break;
12026 
12027     case OP_MARK:
12028     PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
12029     SLJIT_ASSERT(common->mark_ptr != 0);
12030     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
12031     allocate_stack(common, common->has_skip_arg ? 5 : 1);
12032     if (HAS_VIRTUAL_REGISTERS)
12033       OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12034     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
12035     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
12036     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
12037     OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
12038     if (common->has_skip_arg)
12039       {
12040       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12041       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
12042       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
12043       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
12044       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
12045       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
12046       }
12047     cc += 1 + 2 + cc[1];
12048     break;
12049 
12050     case OP_PRUNE:
12051     case OP_PRUNE_ARG:
12052     case OP_SKIP:
12053     case OP_SKIP_ARG:
12054     case OP_THEN:
12055     case OP_THEN_ARG:
12056     case OP_COMMIT:
12057     case OP_COMMIT_ARG:
12058     cc = compile_control_verb_matchingpath(common, cc, parent);
12059     break;
12060 
12061     case OP_FAIL:
12062     case OP_ACCEPT:
12063     case OP_ASSERT_ACCEPT:
12064     cc = compile_fail_accept_matchingpath(common, cc, parent);
12065     break;
12066 
12067     case OP_CLOSE:
12068     cc = compile_close_matchingpath(common, cc);
12069     break;
12070 
12071     case OP_SKIPZERO:
12072     cc = bracketend(cc + 1);
12073     break;
12074 
12075     default:
12076     SLJIT_UNREACHABLE();
12077     return;
12078     }
12079   if (cc == NULL)
12080     return;
12081   }
12082 
12083 if (has_then_trap)
12084   {
12085   /* Head item on backtrack. */
12086   PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
12087   BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
12088   BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
12089   common->then_trap = save_then_trap;
12090   }
12091 SLJIT_ASSERT(cc == ccend);
12092 }
12093 
12094 #undef PUSH_BACKTRACK
12095 #undef PUSH_BACKTRACK_NOVALUE
12096 #undef BACKTRACK_AS
12097 
12098 #define COMPILE_BACKTRACKINGPATH(current) \
12099   do \
12100     { \
12101     compile_backtrackingpath(common, (current)); \
12102     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
12103       return; \
12104     } \
12105   while (0)
12106 
12107 #define CURRENT_AS(type) ((type *)current)
12108 
compile_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)12109 static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12110 {
12111 DEFINE_COMPILER;
12112 PCRE2_SPTR cc = current->cc;
12113 PCRE2_UCHAR opcode;
12114 PCRE2_UCHAR type;
12115 sljit_u32 max = 0, exact;
12116 struct sljit_label *label = NULL;
12117 struct sljit_jump *jump = NULL;
12118 jump_list *jumplist = NULL;
12119 PCRE2_SPTR end;
12120 int private_data_ptr = PRIVATE_DATA(cc);
12121 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
12122 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
12123 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
12124 
12125 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
12126 
12127 switch(opcode)
12128   {
12129   case OP_STAR:
12130   case OP_UPTO:
12131   if (type == OP_ANYNL || type == OP_EXTUNI)
12132     {
12133     SLJIT_ASSERT(private_data_ptr == 0);
12134     set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12135     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12136     free_stack(common, 1);
12137     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12138     }
12139   else
12140     {
12141     if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled)
12142       {
12143       OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12144       OP1(SLJIT_MOV, TMP2, 0, base, offset1);
12145       OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12146 
12147       jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
12148       label = LABEL();
12149       OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
12150       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12151       if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0)
12152         OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit);
12153       CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12154       move_back(common, NULL, TRUE);
12155       CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label);
12156       }
12157     else
12158       {
12159       OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12160       jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
12161       move_back(common, NULL, TRUE);
12162       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12163       JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12164       }
12165     JUMPHERE(jump);
12166     if (private_data_ptr == 0)
12167       free_stack(common, 2);
12168     }
12169   break;
12170 
12171   case OP_MINSTAR:
12172   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12173   compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12174   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12175   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12176   set_jumps(jumplist, LABEL());
12177   if (private_data_ptr == 0)
12178     free_stack(common, 1);
12179   break;
12180 
12181   case OP_MINUPTO:
12182   OP1(SLJIT_MOV, TMP1, 0, base, offset1);
12183   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12184   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
12185   add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
12186 
12187   OP1(SLJIT_MOV, base, offset1, TMP1, 0);
12188   compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12189   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12190   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12191 
12192   set_jumps(jumplist, LABEL());
12193   if (private_data_ptr == 0)
12194     free_stack(common, 2);
12195   break;
12196 
12197   case OP_QUERY:
12198   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12199   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12200   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12201   jump = JUMP(SLJIT_JUMP);
12202   set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12203   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12204   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12205   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12206   JUMPHERE(jump);
12207   if (private_data_ptr == 0)
12208     free_stack(common, 1);
12209   break;
12210 
12211   case OP_MINQUERY:
12212   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12213   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12214   jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12215   compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12216   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12217   set_jumps(jumplist, LABEL());
12218   JUMPHERE(jump);
12219   if (private_data_ptr == 0)
12220     free_stack(common, 1);
12221   break;
12222 
12223   case OP_EXACT:
12224   case OP_POSSTAR:
12225   case OP_POSQUERY:
12226   case OP_POSUPTO:
12227   break;
12228 
12229   default:
12230   SLJIT_UNREACHABLE();
12231   break;
12232   }
12233 
12234 set_jumps(current->topbacktracks, LABEL());
12235 }
12236 
compile_ref_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)12237 static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12238 {
12239 DEFINE_COMPILER;
12240 PCRE2_SPTR cc = current->cc;
12241 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
12242 PCRE2_UCHAR type;
12243 
12244 type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
12245 
12246 if ((type & 0x1) == 0)
12247   {
12248   /* Maximize case. */
12249   set_jumps(current->topbacktracks, LABEL());
12250   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12251   free_stack(common, 1);
12252   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12253   return;
12254   }
12255 
12256 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12257 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12258 set_jumps(current->topbacktracks, LABEL());
12259 free_stack(common, ref ? 2 : 3);
12260 }
12261 
compile_recurse_backtrackingpath(compiler_common * common,struct backtrack_common * current)12262 static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12263 {
12264 DEFINE_COMPILER;
12265 recurse_entry *entry;
12266 
12267 if (!CURRENT_AS(recurse_backtrack)->inlined_pattern)
12268   {
12269   entry = CURRENT_AS(recurse_backtrack)->entry;
12270   if (entry->backtrack_label == NULL)
12271     add_jump(compiler, &entry->backtrack_calls, JUMP(SLJIT_FAST_CALL));
12272   else
12273     JUMPTO(SLJIT_FAST_CALL, entry->backtrack_label);
12274   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(recurse_backtrack)->matchingpath);
12275   }
12276 else
12277   compile_backtrackingpath(common, current->top);
12278 
12279 set_jumps(current->topbacktracks, LABEL());
12280 }
12281 
compile_assert_backtrackingpath(compiler_common * common,struct backtrack_common * current)12282 static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12283 {
12284 DEFINE_COMPILER;
12285 PCRE2_SPTR cc = current->cc;
12286 PCRE2_UCHAR bra = OP_BRA;
12287 struct sljit_jump *brajump = NULL;
12288 
12289 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
12290 if (*cc == OP_BRAZERO)
12291   {
12292   bra = *cc;
12293   cc++;
12294   }
12295 
12296 if (bra == OP_BRAZERO)
12297   {
12298   SLJIT_ASSERT(current->topbacktracks == NULL);
12299   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12300   }
12301 
12302 if (CURRENT_AS(assert_backtrack)->framesize < 0)
12303   {
12304   set_jumps(current->topbacktracks, LABEL());
12305 
12306   if (bra == OP_BRAZERO)
12307     {
12308     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12309     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12310     free_stack(common, 1);
12311     }
12312   return;
12313   }
12314 
12315 if (bra == OP_BRAZERO)
12316   {
12317   if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
12318     {
12319     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12320     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12321     free_stack(common, 1);
12322     return;
12323     }
12324   free_stack(common, 1);
12325   brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12326   }
12327 
12328 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
12329   {
12330   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
12331   add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12332   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12333   OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(assert_backtrack)->framesize - 1) * sizeof(sljit_sw));
12334   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, TMP1, 0);
12335 
12336   set_jumps(current->topbacktracks, LABEL());
12337   }
12338 else
12339   set_jumps(current->topbacktracks, LABEL());
12340 
12341 if (bra == OP_BRAZERO)
12342   {
12343   /* We know there is enough place on the stack. */
12344   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
12345   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12346   JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
12347   JUMPHERE(brajump);
12348   }
12349 }
12350 
compile_bracket_backtrackingpath(compiler_common * common,struct backtrack_common * current)12351 static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12352 {
12353 DEFINE_COMPILER;
12354 int opcode, stacksize, alt_count, alt_max;
12355 int offset = 0;
12356 int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
12357 int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
12358 PCRE2_SPTR cc = current->cc;
12359 PCRE2_SPTR ccbegin;
12360 PCRE2_SPTR ccprev;
12361 PCRE2_UCHAR bra = OP_BRA;
12362 PCRE2_UCHAR ket;
12363 assert_backtrack *assert;
12364 BOOL has_alternatives;
12365 BOOL needs_control_head = FALSE;
12366 struct sljit_jump *brazero = NULL;
12367 struct sljit_jump *next_alt = NULL;
12368 struct sljit_jump *once = NULL;
12369 struct sljit_jump *cond = NULL;
12370 struct sljit_label *rmin_label = NULL;
12371 struct sljit_label *exact_label = NULL;
12372 struct sljit_put_label *put_label = NULL;
12373 
12374 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
12375   {
12376   bra = *cc;
12377   cc++;
12378   }
12379 
12380 opcode = *cc;
12381 ccbegin = bracketend(cc) - 1 - LINK_SIZE;
12382 ket = *ccbegin;
12383 if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
12384   {
12385   repeat_ptr = PRIVATE_DATA(ccbegin);
12386   repeat_type = PRIVATE_DATA(ccbegin + 2);
12387   repeat_count = PRIVATE_DATA(ccbegin + 3);
12388   SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
12389   if (repeat_type == OP_UPTO)
12390     ket = OP_KETRMAX;
12391   if (repeat_type == OP_MINUPTO)
12392     ket = OP_KETRMIN;
12393   }
12394 ccbegin = cc;
12395 cc += GET(cc, 1);
12396 has_alternatives = *cc == OP_ALT;
12397 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12398   has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
12399 if (opcode == OP_CBRA || opcode == OP_SCBRA)
12400   offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
12401 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
12402   opcode = OP_SCOND;
12403 
12404 alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
12405 
12406 /* Decoding the needs_control_head in framesize. */
12407 if (opcode == OP_ONCE)
12408   {
12409   needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
12410   CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
12411   }
12412 
12413 if (ket != OP_KET && repeat_type != 0)
12414   {
12415   /* TMP1 is used in OP_KETRMIN below. */
12416   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12417   free_stack(common, 1);
12418   if (repeat_type == OP_UPTO)
12419     OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
12420   else
12421     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
12422   }
12423 
12424 if (ket == OP_KETRMAX)
12425   {
12426   if (bra == OP_BRAZERO)
12427     {
12428     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12429     free_stack(common, 1);
12430     brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12431     }
12432   }
12433 else if (ket == OP_KETRMIN)
12434   {
12435   if (bra != OP_BRAMINZERO)
12436     {
12437     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12438     if (repeat_type != 0)
12439       {
12440       /* TMP1 was set a few lines above. */
12441       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12442       /* Drop STR_PTR for non-greedy plus quantifier. */
12443       if (opcode != OP_ONCE)
12444         free_stack(common, 1);
12445       }
12446     else if (opcode >= OP_SBRA || opcode == OP_ONCE)
12447       {
12448       /* Checking zero-length iteration. */
12449       if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
12450         CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12451       else
12452         {
12453         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12454         CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12455         }
12456       /* Drop STR_PTR for non-greedy plus quantifier. */
12457       if (opcode != OP_ONCE)
12458         free_stack(common, 1);
12459       }
12460     else
12461       JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12462     }
12463   rmin_label = LABEL();
12464   if (repeat_type != 0)
12465     OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12466   }
12467 else if (bra == OP_BRAZERO)
12468   {
12469   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12470   free_stack(common, 1);
12471   brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12472   }
12473 else if (repeat_type == OP_EXACT)
12474   {
12475   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12476   exact_label = LABEL();
12477   }
12478 
12479 if (offset != 0)
12480   {
12481   if (common->capture_last_ptr != 0)
12482     {
12483     SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
12484     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12485     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12486     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
12487     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12488     free_stack(common, 3);
12489     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
12490     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
12491     }
12492   else if (common->optimized_cbracket[offset >> 1] == 0)
12493     {
12494     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12495     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12496     free_stack(common, 2);
12497     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12498     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12499     }
12500   }
12501 
12502 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
12503   {
12504   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12505     {
12506     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12507     add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12508     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
12509     }
12510   once = JUMP(SLJIT_JUMP);
12511   }
12512 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12513   {
12514   if (has_alternatives)
12515     {
12516     /* Always exactly one alternative. */
12517     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12518     free_stack(common, 1);
12519 
12520     alt_max = 2;
12521     next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12522     }
12523   }
12524 else if (has_alternatives)
12525   {
12526   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12527   free_stack(common, 1);
12528 
12529   if (alt_max > 3)
12530     {
12531     sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
12532 
12533     SLJIT_ASSERT(CURRENT_AS(bracket_backtrack)->u.matching_put_label);
12534     sljit_set_put_label(CURRENT_AS(bracket_backtrack)->u.matching_put_label, LABEL());
12535     sljit_emit_op0(compiler, SLJIT_ENDBR);
12536     }
12537   else
12538     next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12539   }
12540 
12541 COMPILE_BACKTRACKINGPATH(current->top);
12542 if (current->topbacktracks)
12543   set_jumps(current->topbacktracks, LABEL());
12544 
12545 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12546   {
12547   /* Conditional block always has at most one alternative. */
12548   if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
12549     {
12550     SLJIT_ASSERT(has_alternatives);
12551     assert = CURRENT_AS(bracket_backtrack)->u.assert;
12552     if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
12553       {
12554       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
12555       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12556       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12557       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
12558       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
12559       }
12560     cond = JUMP(SLJIT_JUMP);
12561     set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
12562     }
12563   else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
12564     {
12565     SLJIT_ASSERT(has_alternatives);
12566     cond = JUMP(SLJIT_JUMP);
12567     set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
12568     }
12569   else
12570     SLJIT_ASSERT(!has_alternatives);
12571   }
12572 
12573 if (has_alternatives)
12574   {
12575   alt_count = 1;
12576   do
12577     {
12578     current->top = NULL;
12579     current->topbacktracks = NULL;
12580     current->nextbacktracks = NULL;
12581     /* Conditional blocks always have an additional alternative, even if it is empty. */
12582     if (*cc == OP_ALT)
12583       {
12584       ccprev = cc + 1 + LINK_SIZE;
12585       cc += GET(cc, 1);
12586       if (opcode != OP_COND && opcode != OP_SCOND)
12587         {
12588         if (opcode != OP_ONCE)
12589           {
12590           if (private_data_ptr != 0)
12591             OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12592           else
12593             OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12594           }
12595         else
12596           OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
12597         }
12598       compile_matchingpath(common, ccprev, cc, current);
12599       if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
12600         return;
12601 
12602       if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA)
12603         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12604 
12605       if (opcode == OP_SCRIPT_RUN)
12606         match_script_run_common(common, private_data_ptr, current);
12607       }
12608 
12609     /* Instructions after the current alternative is successfully matched. */
12610     /* There is a similar code in compile_bracket_matchingpath. */
12611     if (opcode == OP_ONCE)
12612       match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
12613 
12614     stacksize = 0;
12615     if (repeat_type == OP_MINUPTO)
12616       {
12617       /* We need to preserve the counter. TMP2 will be used below. */
12618       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
12619       stacksize++;
12620       }
12621     if (ket != OP_KET || bra != OP_BRA)
12622       stacksize++;
12623     if (offset != 0)
12624       {
12625       if (common->capture_last_ptr != 0)
12626         stacksize++;
12627       if (common->optimized_cbracket[offset >> 1] == 0)
12628         stacksize += 2;
12629       }
12630     if (opcode != OP_ONCE)
12631       stacksize++;
12632 
12633     if (stacksize > 0)
12634       allocate_stack(common, stacksize);
12635 
12636     stacksize = 0;
12637     if (repeat_type == OP_MINUPTO)
12638       {
12639       /* TMP2 was set above. */
12640       OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
12641       stacksize++;
12642       }
12643 
12644     if (ket != OP_KET || bra != OP_BRA)
12645       {
12646       if (ket != OP_KET)
12647         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
12648       else
12649         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
12650       stacksize++;
12651       }
12652 
12653     if (offset != 0)
12654       stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
12655 
12656     if (opcode != OP_ONCE)
12657       {
12658       if (alt_max <= 3)
12659         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
12660       else
12661         put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
12662       }
12663 
12664     if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
12665       {
12666       /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
12667       SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
12668       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
12669       }
12670 
12671     JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
12672 
12673     if (opcode != OP_ONCE)
12674       {
12675       if (alt_max <= 3)
12676         {
12677         JUMPHERE(next_alt);
12678         alt_count++;
12679         if (alt_count < alt_max)
12680           {
12681           SLJIT_ASSERT(alt_count == 2 && alt_max == 3);
12682           next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
12683           }
12684         }
12685       else
12686         {
12687         sljit_set_put_label(put_label, LABEL());
12688         sljit_emit_op0(compiler, SLJIT_ENDBR);
12689         }
12690       }
12691 
12692     COMPILE_BACKTRACKINGPATH(current->top);
12693     if (current->topbacktracks)
12694       set_jumps(current->topbacktracks, LABEL());
12695     SLJIT_ASSERT(!current->nextbacktracks);
12696     }
12697   while (*cc == OP_ALT);
12698 
12699   if (cond != NULL)
12700     {
12701     SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
12702     assert = CURRENT_AS(bracket_backtrack)->u.assert;
12703     if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
12704       {
12705       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
12706       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12707       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12708       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
12709       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
12710       }
12711     JUMPHERE(cond);
12712     }
12713 
12714   /* Free the STR_PTR. */
12715   if (private_data_ptr == 0)
12716     free_stack(common, 1);
12717   }
12718 
12719 if (offset != 0)
12720   {
12721   /* Using both tmp register is better for instruction scheduling. */
12722   if (common->optimized_cbracket[offset >> 1] != 0)
12723     {
12724     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12725     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12726     free_stack(common, 2);
12727     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12728     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12729     }
12730   else
12731     {
12732     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12733     free_stack(common, 1);
12734     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
12735     }
12736   }
12737 else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
12738   {
12739   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
12740   free_stack(common, 1);
12741   }
12742 else if (opcode == OP_ONCE)
12743   {
12744   cc = ccbegin + GET(ccbegin, 1);
12745   stacksize = needs_control_head ? 1 : 0;
12746 
12747   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12748     {
12749     /* Reset head and drop saved frame. */
12750     stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
12751     }
12752   else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
12753     {
12754     /* The STR_PTR must be released. */
12755     stacksize++;
12756     }
12757 
12758   if (stacksize > 0)
12759     free_stack(common, stacksize);
12760 
12761   JUMPHERE(once);
12762   /* Restore previous private_data_ptr */
12763   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12764     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1));
12765   else if (ket == OP_KETRMIN)
12766     {
12767     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12768     /* See the comment below. */
12769     free_stack(common, 2);
12770     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
12771     }
12772   }
12773 
12774 if (repeat_type == OP_EXACT)
12775   {
12776   OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12777   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
12778   CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
12779   }
12780 else if (ket == OP_KETRMAX)
12781   {
12782   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12783   if (bra != OP_BRAZERO)
12784     free_stack(common, 1);
12785 
12786   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12787   if (bra == OP_BRAZERO)
12788     {
12789     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12790     JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
12791     JUMPHERE(brazero);
12792     free_stack(common, 1);
12793     }
12794   }
12795 else if (ket == OP_KETRMIN)
12796   {
12797   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12798 
12799   /* OP_ONCE removes everything in case of a backtrack, so we don't
12800   need to explicitly release the STR_PTR. The extra release would
12801   affect badly the free_stack(2) above. */
12802   if (opcode != OP_ONCE)
12803     free_stack(common, 1);
12804   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
12805   if (opcode == OP_ONCE)
12806     free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
12807   else if (bra == OP_BRAMINZERO)
12808     free_stack(common, 1);
12809   }
12810 else if (bra == OP_BRAZERO)
12811   {
12812   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12813   JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
12814   JUMPHERE(brazero);
12815   }
12816 }
12817 
compile_bracketpos_backtrackingpath(compiler_common * common,struct backtrack_common * current)12818 static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12819 {
12820 DEFINE_COMPILER;
12821 int offset;
12822 struct sljit_jump *jump;
12823 
12824 if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
12825   {
12826   if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
12827     {
12828     offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
12829     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12830     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12831     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12832     if (common->capture_last_ptr != 0)
12833       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12834     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12835     if (common->capture_last_ptr != 0)
12836       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
12837     }
12838   set_jumps(current->topbacktracks, LABEL());
12839   free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
12840   return;
12841   }
12842 
12843 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
12844 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12845 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracketpos_backtrack)->framesize - 1) * sizeof(sljit_sw));
12846 
12847 if (current->topbacktracks)
12848   {
12849   jump = JUMP(SLJIT_JUMP);
12850   set_jumps(current->topbacktracks, LABEL());
12851   /* Drop the stack frame. */
12852   free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
12853   JUMPHERE(jump);
12854   }
12855 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1));
12856 }
12857 
compile_braminzero_backtrackingpath(compiler_common * common,struct backtrack_common * current)12858 static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12859 {
12860 assert_backtrack backtrack;
12861 
12862 current->top = NULL;
12863 current->topbacktracks = NULL;
12864 current->nextbacktracks = NULL;
12865 if (current->cc[1] > OP_ASSERTBACK_NOT)
12866   {
12867   /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
12868   compile_bracket_matchingpath(common, current->cc, current);
12869   compile_bracket_backtrackingpath(common, current->top);
12870   }
12871 else
12872   {
12873   memset(&backtrack, 0, sizeof(backtrack));
12874   backtrack.common.cc = current->cc;
12875   backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
12876   /* Manual call of compile_assert_matchingpath. */
12877   compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
12878   }
12879 SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
12880 }
12881 
compile_control_verb_backtrackingpath(compiler_common * common,struct backtrack_common * current)12882 static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12883 {
12884 DEFINE_COMPILER;
12885 PCRE2_UCHAR opcode = *current->cc;
12886 struct sljit_label *loop;
12887 struct sljit_jump *jump;
12888 
12889 if (opcode == OP_THEN || opcode == OP_THEN_ARG)
12890   {
12891   if (common->then_trap != NULL)
12892     {
12893     SLJIT_ASSERT(common->control_head_ptr != 0);
12894 
12895     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12896     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
12897     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
12898     jump = JUMP(SLJIT_JUMP);
12899 
12900     loop = LABEL();
12901     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12902     JUMPHERE(jump);
12903     CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop);
12904     CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop);
12905     add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
12906     return;
12907     }
12908   else if (!common->local_quit_available && common->in_positive_assertion)
12909     {
12910     add_jump(compiler, &common->positive_assertion_quit, JUMP(SLJIT_JUMP));
12911     return;
12912     }
12913   }
12914 
12915 if (common->local_quit_available)
12916   {
12917   /* Abort match with a fail. */
12918   if (common->quit_label == NULL)
12919     add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
12920   else
12921     JUMPTO(SLJIT_JUMP, common->quit_label);
12922   return;
12923   }
12924 
12925 if (opcode == OP_SKIP_ARG)
12926   {
12927   SLJIT_ASSERT(common->control_head_ptr != 0 && TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
12928   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12929   OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
12930   sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark));
12931 
12932   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0);
12933   add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0));
12934   return;
12935   }
12936 
12937 if (opcode == OP_SKIP)
12938   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12939 else
12940   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
12941 add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
12942 }
12943 
compile_then_trap_backtrackingpath(compiler_common * common,struct backtrack_common * current)12944 static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12945 {
12946 DEFINE_COMPILER;
12947 struct sljit_jump *jump;
12948 int size;
12949 
12950 if (CURRENT_AS(then_trap_backtrack)->then_trap)
12951   {
12952   common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
12953   return;
12954   }
12955 
12956 size = CURRENT_AS(then_trap_backtrack)->framesize;
12957 size = 3 + (size < 0 ? 0 : size);
12958 
12959 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
12960 free_stack(common, size);
12961 jump = JUMP(SLJIT_JUMP);
12962 
12963 set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
12964 /* STACK_TOP is set by THEN. */
12965 if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
12966   {
12967   add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12968   OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(then_trap_backtrack)->framesize - 1) * sizeof(sljit_sw));
12969   }
12970 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12971 free_stack(common, 3);
12972 
12973 JUMPHERE(jump);
12974 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
12975 }
12976 
compile_backtrackingpath(compiler_common * common,struct backtrack_common * current)12977 static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12978 {
12979 DEFINE_COMPILER;
12980 then_trap_backtrack *save_then_trap = common->then_trap;
12981 
12982 while (current)
12983   {
12984   if (current->nextbacktracks != NULL)
12985     set_jumps(current->nextbacktracks, LABEL());
12986   switch(*current->cc)
12987     {
12988     case OP_SET_SOM:
12989     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12990     free_stack(common, 1);
12991     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
12992     break;
12993 
12994     case OP_STAR:
12995     case OP_MINSTAR:
12996     case OP_PLUS:
12997     case OP_MINPLUS:
12998     case OP_QUERY:
12999     case OP_MINQUERY:
13000     case OP_UPTO:
13001     case OP_MINUPTO:
13002     case OP_EXACT:
13003     case OP_POSSTAR:
13004     case OP_POSPLUS:
13005     case OP_POSQUERY:
13006     case OP_POSUPTO:
13007     case OP_STARI:
13008     case OP_MINSTARI:
13009     case OP_PLUSI:
13010     case OP_MINPLUSI:
13011     case OP_QUERYI:
13012     case OP_MINQUERYI:
13013     case OP_UPTOI:
13014     case OP_MINUPTOI:
13015     case OP_EXACTI:
13016     case OP_POSSTARI:
13017     case OP_POSPLUSI:
13018     case OP_POSQUERYI:
13019     case OP_POSUPTOI:
13020     case OP_NOTSTAR:
13021     case OP_NOTMINSTAR:
13022     case OP_NOTPLUS:
13023     case OP_NOTMINPLUS:
13024     case OP_NOTQUERY:
13025     case OP_NOTMINQUERY:
13026     case OP_NOTUPTO:
13027     case OP_NOTMINUPTO:
13028     case OP_NOTEXACT:
13029     case OP_NOTPOSSTAR:
13030     case OP_NOTPOSPLUS:
13031     case OP_NOTPOSQUERY:
13032     case OP_NOTPOSUPTO:
13033     case OP_NOTSTARI:
13034     case OP_NOTMINSTARI:
13035     case OP_NOTPLUSI:
13036     case OP_NOTMINPLUSI:
13037     case OP_NOTQUERYI:
13038     case OP_NOTMINQUERYI:
13039     case OP_NOTUPTOI:
13040     case OP_NOTMINUPTOI:
13041     case OP_NOTEXACTI:
13042     case OP_NOTPOSSTARI:
13043     case OP_NOTPOSPLUSI:
13044     case OP_NOTPOSQUERYI:
13045     case OP_NOTPOSUPTOI:
13046     case OP_TYPESTAR:
13047     case OP_TYPEMINSTAR:
13048     case OP_TYPEPLUS:
13049     case OP_TYPEMINPLUS:
13050     case OP_TYPEQUERY:
13051     case OP_TYPEMINQUERY:
13052     case OP_TYPEUPTO:
13053     case OP_TYPEMINUPTO:
13054     case OP_TYPEEXACT:
13055     case OP_TYPEPOSSTAR:
13056     case OP_TYPEPOSPLUS:
13057     case OP_TYPEPOSQUERY:
13058     case OP_TYPEPOSUPTO:
13059     case OP_CLASS:
13060     case OP_NCLASS:
13061 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
13062     case OP_XCLASS:
13063 #endif
13064     compile_iterator_backtrackingpath(common, current);
13065     break;
13066 
13067     case OP_REF:
13068     case OP_REFI:
13069     case OP_DNREF:
13070     case OP_DNREFI:
13071     compile_ref_iterator_backtrackingpath(common, current);
13072     break;
13073 
13074     case OP_RECURSE:
13075     compile_recurse_backtrackingpath(common, current);
13076     break;
13077 
13078     case OP_ASSERT:
13079     case OP_ASSERT_NOT:
13080     case OP_ASSERTBACK:
13081     case OP_ASSERTBACK_NOT:
13082     compile_assert_backtrackingpath(common, current);
13083     break;
13084 
13085     case OP_ASSERT_NA:
13086     case OP_ASSERTBACK_NA:
13087     case OP_ONCE:
13088     case OP_SCRIPT_RUN:
13089     case OP_BRA:
13090     case OP_CBRA:
13091     case OP_COND:
13092     case OP_SBRA:
13093     case OP_SCBRA:
13094     case OP_SCOND:
13095     compile_bracket_backtrackingpath(common, current);
13096     break;
13097 
13098     case OP_BRAZERO:
13099     if (current->cc[1] > OP_ASSERTBACK_NOT)
13100       compile_bracket_backtrackingpath(common, current);
13101     else
13102       compile_assert_backtrackingpath(common, current);
13103     break;
13104 
13105     case OP_BRAPOS:
13106     case OP_CBRAPOS:
13107     case OP_SBRAPOS:
13108     case OP_SCBRAPOS:
13109     case OP_BRAPOSZERO:
13110     compile_bracketpos_backtrackingpath(common, current);
13111     break;
13112 
13113     case OP_BRAMINZERO:
13114     compile_braminzero_backtrackingpath(common, current);
13115     break;
13116 
13117     case OP_MARK:
13118     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
13119     if (common->has_skip_arg)
13120       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13121     free_stack(common, common->has_skip_arg ? 5 : 1);
13122     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
13123     if (common->has_skip_arg)
13124       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
13125     break;
13126 
13127     case OP_THEN:
13128     case OP_THEN_ARG:
13129     case OP_PRUNE:
13130     case OP_PRUNE_ARG:
13131     case OP_SKIP:
13132     case OP_SKIP_ARG:
13133     compile_control_verb_backtrackingpath(common, current);
13134     break;
13135 
13136     case OP_COMMIT:
13137     case OP_COMMIT_ARG:
13138     if (!common->local_quit_available)
13139       OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13140     if (common->quit_label == NULL)
13141       add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
13142     else
13143       JUMPTO(SLJIT_JUMP, common->quit_label);
13144     break;
13145 
13146     case OP_CALLOUT:
13147     case OP_CALLOUT_STR:
13148     case OP_FAIL:
13149     case OP_ACCEPT:
13150     case OP_ASSERT_ACCEPT:
13151     set_jumps(current->topbacktracks, LABEL());
13152     break;
13153 
13154     case OP_THEN_TRAP:
13155     /* A virtual opcode for then traps. */
13156     compile_then_trap_backtrackingpath(common, current);
13157     break;
13158 
13159     default:
13160     SLJIT_UNREACHABLE();
13161     break;
13162     }
13163   current = current->prev;
13164   }
13165 common->then_trap = save_then_trap;
13166 }
13167 
compile_recurse(compiler_common * common)13168 static SLJIT_INLINE void compile_recurse(compiler_common *common)
13169 {
13170 DEFINE_COMPILER;
13171 PCRE2_SPTR cc = common->start + common->currententry->start;
13172 PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
13173 PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE);
13174 BOOL needs_control_head;
13175 BOOL has_quit;
13176 BOOL has_accept;
13177 int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &needs_control_head, &has_quit, &has_accept);
13178 int alt_count, alt_max, local_size;
13179 backtrack_common altbacktrack;
13180 jump_list *match = NULL;
13181 struct sljit_jump *next_alt = NULL;
13182 struct sljit_jump *accept_exit = NULL;
13183 struct sljit_label *quit;
13184 struct sljit_put_label *put_label = NULL;
13185 
13186 /* Recurse captures then. */
13187 common->then_trap = NULL;
13188 
13189 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
13190 
13191 alt_max = no_alternatives(cc);
13192 alt_count = 0;
13193 
13194 /* Matching path. */
13195 SLJIT_ASSERT(common->currententry->entry_label == NULL && common->recursive_head_ptr != 0);
13196 common->currententry->entry_label = LABEL();
13197 set_jumps(common->currententry->entry_calls, common->currententry->entry_label);
13198 
13199 sljit_emit_fast_enter(compiler, TMP2, 0);
13200 count_match(common);
13201 
13202 local_size = (alt_max > 1) ? 2 : 1;
13203 
13204 /* (Reversed) stack layout:
13205    [private data][return address][optional: str ptr] ... [optional: alternative index][recursive_head_ptr] */
13206 
13207 allocate_stack(common, private_data_size + local_size);
13208 /* Save return address. */
13209 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0);
13210 
13211 copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, has_quit);
13212 
13213 /* This variable is saved and restored all time when we enter or exit from a recursive context. */
13214 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
13215 
13216 if (needs_control_head)
13217   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13218 
13219 if (alt_max > 1)
13220   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
13221 
13222 memset(&altbacktrack, 0, sizeof(backtrack_common));
13223 common->quit_label = NULL;
13224 common->accept_label = NULL;
13225 common->quit = NULL;
13226 common->accept = NULL;
13227 altbacktrack.cc = ccbegin;
13228 cc += GET(cc, 1);
13229 while (1)
13230   {
13231   altbacktrack.top = NULL;
13232   altbacktrack.topbacktracks = NULL;
13233 
13234   if (altbacktrack.cc != ccbegin)
13235     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13236 
13237   compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
13238   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13239     return;
13240 
13241   allocate_stack(common, (alt_max > 1 || has_accept) ? 2 : 1);
13242   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13243 
13244   if (alt_max > 1 || has_accept)
13245     {
13246     if (alt_max > 3)
13247       put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(1));
13248     else
13249       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
13250     }
13251 
13252   add_jump(compiler, &match, JUMP(SLJIT_JUMP));
13253 
13254   if (alt_count == 0)
13255     {
13256     /* Backtracking path entry. */
13257     SLJIT_ASSERT(common->currententry->backtrack_label == NULL);
13258     common->currententry->backtrack_label = LABEL();
13259     set_jumps(common->currententry->backtrack_calls, common->currententry->backtrack_label);
13260 
13261     sljit_emit_fast_enter(compiler, TMP1, 0);
13262 
13263     if (has_accept)
13264       accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
13265 
13266     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13267     /* Save return address. */
13268     OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0);
13269 
13270     copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit);
13271 
13272     if (alt_max > 1)
13273       {
13274       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13275       free_stack(common, 2);
13276 
13277       if (alt_max > 3)
13278         {
13279         sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
13280         sljit_set_put_label(put_label, LABEL());
13281         sljit_emit_op0(compiler, SLJIT_ENDBR);
13282         }
13283       else
13284         next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13285       }
13286     else
13287       free_stack(common, has_accept ? 2 : 1);
13288     }
13289   else if (alt_max > 3)
13290     {
13291     sljit_set_put_label(put_label, LABEL());
13292     sljit_emit_op0(compiler, SLJIT_ENDBR);
13293     }
13294   else
13295     {
13296     JUMPHERE(next_alt);
13297     if (alt_count + 1 < alt_max)
13298       {
13299       SLJIT_ASSERT(alt_count == 1 && alt_max == 3);
13300       next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
13301       }
13302     }
13303 
13304   alt_count++;
13305 
13306   compile_backtrackingpath(common, altbacktrack.top);
13307   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13308     return;
13309   set_jumps(altbacktrack.topbacktracks, LABEL());
13310 
13311   if (*cc != OP_ALT)
13312     break;
13313 
13314   altbacktrack.cc = cc + 1 + LINK_SIZE;
13315   cc += GET(cc, 1);
13316   }
13317 
13318 /* No alternative is matched. */
13319 
13320 quit = LABEL();
13321 
13322 copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, has_quit);
13323 
13324 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
13325 free_stack(common, private_data_size + local_size);
13326 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
13327 OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13328 
13329 if (common->quit != NULL)
13330   {
13331   SLJIT_ASSERT(has_quit);
13332 
13333   set_jumps(common->quit, LABEL());
13334   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13335   copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, has_quit);
13336   JUMPTO(SLJIT_JUMP, quit);
13337   }
13338 
13339 if (has_accept)
13340   {
13341   JUMPHERE(accept_exit);
13342   free_stack(common, 2);
13343 
13344   /* Save return address. */
13345   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0);
13346 
13347   copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, has_quit);
13348 
13349   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
13350   free_stack(common, private_data_size + local_size);
13351   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
13352   OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13353   }
13354 
13355 if (common->accept != NULL)
13356   {
13357   SLJIT_ASSERT(has_accept);
13358 
13359   set_jumps(common->accept, LABEL());
13360 
13361   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13362   OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
13363 
13364   allocate_stack(common, 2);
13365   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
13366   }
13367 
13368 set_jumps(match, LABEL());
13369 
13370 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
13371 
13372 copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit);
13373 
13374 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1));
13375 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
13376 OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13377 }
13378 
13379 #undef COMPILE_BACKTRACKINGPATH
13380 #undef CURRENT_AS
13381 
13382 #define PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS \
13383   (PCRE2_JIT_INVALID_UTF)
13384 
jit_compile(pcre2_code * code,sljit_u32 mode)13385 static int jit_compile(pcre2_code *code, sljit_u32 mode)
13386 {
13387 pcre2_real_code *re = (pcre2_real_code *)code;
13388 struct sljit_compiler *compiler;
13389 backtrack_common rootbacktrack;
13390 compiler_common common_data;
13391 compiler_common *common = &common_data;
13392 const sljit_u8 *tables = re->tables;
13393 void *allocator_data = &re->memctl;
13394 int private_data_size;
13395 PCRE2_SPTR ccend;
13396 executable_functions *functions;
13397 void *executable_func;
13398 sljit_uw executable_size;
13399 sljit_uw total_length;
13400 struct sljit_label *mainloop_label = NULL;
13401 struct sljit_label *continue_match_label;
13402 struct sljit_label *empty_match_found_label = NULL;
13403 struct sljit_label *empty_match_backtrack_label = NULL;
13404 struct sljit_label *reset_match_label;
13405 struct sljit_label *quit_label;
13406 struct sljit_jump *jump;
13407 struct sljit_jump *minlength_check_failed = NULL;
13408 struct sljit_jump *empty_match = NULL;
13409 struct sljit_jump *end_anchor_failed = NULL;
13410 jump_list *reqcu_not_found = NULL;
13411 
13412 SLJIT_ASSERT(tables);
13413 
13414 #if HAS_VIRTUAL_REGISTERS == 1
13415 SLJIT_ASSERT(sljit_get_register_index(TMP3) < 0 && sljit_get_register_index(ARGUMENTS) < 0 && sljit_get_register_index(RETURN_ADDR) < 0);
13416 #elif HAS_VIRTUAL_REGISTERS == 0
13417 SLJIT_ASSERT(sljit_get_register_index(TMP3) >= 0 && sljit_get_register_index(ARGUMENTS) >= 0 && sljit_get_register_index(RETURN_ADDR) >= 0);
13418 #else
13419 #error "Invalid value for HAS_VIRTUAL_REGISTERS"
13420 #endif
13421 
13422 memset(&rootbacktrack, 0, sizeof(backtrack_common));
13423 memset(common, 0, sizeof(compiler_common));
13424 common->re = re;
13425 common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
13426 rootbacktrack.cc = common->name_table + re->name_count * re->name_entry_size;
13427 
13428 #ifdef SUPPORT_UNICODE
13429 common->invalid_utf = (mode & PCRE2_JIT_INVALID_UTF) != 0;
13430 #endif /* SUPPORT_UNICODE */
13431 mode &= ~PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS;
13432 
13433 common->start = rootbacktrack.cc;
13434 common->read_only_data_head = NULL;
13435 common->fcc = tables + fcc_offset;
13436 common->lcc = (sljit_sw)(tables + lcc_offset);
13437 common->mode = mode;
13438 common->might_be_empty = (re->minlength == 0) || (re->flags & PCRE2_MATCH_EMPTY);
13439 common->allow_empty_partial = (re->max_lookbehind > 0) || (re->flags & PCRE2_MATCH_EMPTY);
13440 common->nltype = NLTYPE_FIXED;
13441 switch(re->newline_convention)
13442   {
13443   case PCRE2_NEWLINE_CR: common->newline = CHAR_CR; break;
13444   case PCRE2_NEWLINE_LF: common->newline = CHAR_NL; break;
13445   case PCRE2_NEWLINE_CRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
13446   case PCRE2_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
13447   case PCRE2_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
13448   case PCRE2_NEWLINE_NUL: common->newline = CHAR_NUL; break;
13449   default: return PCRE2_ERROR_INTERNAL;
13450   }
13451 common->nlmax = READ_CHAR_MAX;
13452 common->nlmin = 0;
13453 if (re->bsr_convention == PCRE2_BSR_UNICODE)
13454   common->bsr_nltype = NLTYPE_ANY;
13455 else if (re->bsr_convention == PCRE2_BSR_ANYCRLF)
13456   common->bsr_nltype = NLTYPE_ANYCRLF;
13457 else
13458   {
13459 #ifdef BSR_ANYCRLF
13460   common->bsr_nltype = NLTYPE_ANYCRLF;
13461 #else
13462   common->bsr_nltype = NLTYPE_ANY;
13463 #endif
13464   }
13465 common->bsr_nlmax = READ_CHAR_MAX;
13466 common->bsr_nlmin = 0;
13467 common->endonly = (re->overall_options & PCRE2_DOLLAR_ENDONLY) != 0;
13468 common->ctypes = (sljit_sw)(tables + ctypes_offset);
13469 common->name_count = re->name_count;
13470 common->name_entry_size = re->name_entry_size;
13471 common->unset_backref = (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) != 0;
13472 common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0;
13473 #ifdef SUPPORT_UNICODE
13474 /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
13475 common->utf = (re->overall_options & PCRE2_UTF) != 0;
13476 common->ucp = (re->overall_options & PCRE2_UCP) != 0;
13477 if (common->utf)
13478   {
13479   if (common->nltype == NLTYPE_ANY)
13480     common->nlmax = 0x2029;
13481   else if (common->nltype == NLTYPE_ANYCRLF)
13482     common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
13483   else
13484     {
13485     /* We only care about the first newline character. */
13486     common->nlmax = common->newline & 0xff;
13487     }
13488 
13489   if (common->nltype == NLTYPE_FIXED)
13490     common->nlmin = common->newline & 0xff;
13491   else
13492     common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
13493 
13494   if (common->bsr_nltype == NLTYPE_ANY)
13495     common->bsr_nlmax = 0x2029;
13496   else
13497     common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
13498   common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
13499   }
13500 else
13501   common->invalid_utf = FALSE;
13502 #endif /* SUPPORT_UNICODE */
13503 ccend = bracketend(common->start);
13504 
13505 /* Calculate the local space size on the stack. */
13506 common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
13507 common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, allocator_data);
13508 if (!common->optimized_cbracket)
13509   return PCRE2_ERROR_NOMEMORY;
13510 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
13511 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
13512 #else
13513 memset(common->optimized_cbracket, 1, re->top_bracket + 1);
13514 #endif
13515 
13516 SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
13517 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
13518 common->capture_last_ptr = common->ovector_start;
13519 common->ovector_start += sizeof(sljit_sw);
13520 #endif
13521 if (!check_opcode_types(common, common->start, ccend))
13522   {
13523   SLJIT_FREE(common->optimized_cbracket, allocator_data);
13524   return PCRE2_ERROR_NOMEMORY;
13525   }
13526 
13527 /* Checking flags and updating ovector_start. */
13528 if (mode == PCRE2_JIT_COMPLETE && (re->flags & PCRE2_LASTSET) != 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13529   {
13530   common->req_char_ptr = common->ovector_start;
13531   common->ovector_start += sizeof(sljit_sw);
13532   }
13533 if (mode != PCRE2_JIT_COMPLETE)
13534   {
13535   common->start_used_ptr = common->ovector_start;
13536   common->ovector_start += sizeof(sljit_sw);
13537   if (mode == PCRE2_JIT_PARTIAL_SOFT)
13538     {
13539     common->hit_start = common->ovector_start;
13540     common->ovector_start += sizeof(sljit_sw);
13541     }
13542   }
13543 if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0)
13544   {
13545   common->match_end_ptr = common->ovector_start;
13546   common->ovector_start += sizeof(sljit_sw);
13547   }
13548 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
13549 common->control_head_ptr = 1;
13550 #endif
13551 if (common->control_head_ptr != 0)
13552   {
13553   common->control_head_ptr = common->ovector_start;
13554   common->ovector_start += sizeof(sljit_sw);
13555   }
13556 if (common->has_set_som)
13557   {
13558   /* Saving the real start pointer is necessary. */
13559   common->start_ptr = common->ovector_start;
13560   common->ovector_start += sizeof(sljit_sw);
13561   }
13562 
13563 /* Aligning ovector to even number of sljit words. */
13564 if ((common->ovector_start & sizeof(sljit_sw)) != 0)
13565   common->ovector_start += sizeof(sljit_sw);
13566 
13567 if (common->start_ptr == 0)
13568   common->start_ptr = OVECTOR(0);
13569 
13570 /* Capturing brackets cannot be optimized if callouts are allowed. */
13571 if (common->capture_last_ptr != 0)
13572   memset(common->optimized_cbracket, 0, re->top_bracket + 1);
13573 
13574 SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
13575 common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
13576 
13577 total_length = ccend - common->start;
13578 common->private_data_ptrs = (sljit_s32 *)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data);
13579 if (!common->private_data_ptrs)
13580   {
13581   SLJIT_FREE(common->optimized_cbracket, allocator_data);
13582   return PCRE2_ERROR_NOMEMORY;
13583   }
13584 memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
13585 
13586 private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
13587 set_private_data_ptrs(common, &private_data_size, ccend);
13588 if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && !common->has_skip_in_assert_back)
13589   detect_early_fail(common, common->start, &private_data_size, 0, 0);
13590 
13591 SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr);
13592 
13593 if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
13594   {
13595   SLJIT_FREE(common->private_data_ptrs, allocator_data);
13596   SLJIT_FREE(common->optimized_cbracket, allocator_data);
13597   return PCRE2_ERROR_NOMEMORY;
13598   }
13599 
13600 if (common->has_then)
13601   {
13602   common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length);
13603   memset(common->then_offsets, 0, total_length);
13604   set_then_offsets(common, common->start, NULL);
13605   }
13606 
13607 compiler = sljit_create_compiler(allocator_data);
13608 if (!compiler)
13609   {
13610   SLJIT_FREE(common->optimized_cbracket, allocator_data);
13611   SLJIT_FREE(common->private_data_ptrs, allocator_data);
13612   return PCRE2_ERROR_NOMEMORY;
13613   }
13614 common->compiler = compiler;
13615 
13616 /* Main pcre_jit_exec entry. */
13617 sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 5, 5, 0, 0, private_data_size);
13618 
13619 /* Register init. */
13620 reset_ovector(common, (re->top_bracket + 1) * 2);
13621 if (common->req_char_ptr != 0)
13622   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
13623 
13624 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
13625 OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
13626 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
13627 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
13628 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
13629 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
13630 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, end));
13631 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, start));
13632 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
13633 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
13634 
13635 if (common->early_fail_start_ptr < common->early_fail_end_ptr)
13636   reset_early_fail(common);
13637 
13638 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13639   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
13640 if (common->mark_ptr != 0)
13641   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
13642 if (common->control_head_ptr != 0)
13643   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13644 
13645 /* Main part of the matching */
13646 if ((re->overall_options & PCRE2_ANCHORED) == 0)
13647   {
13648   mainloop_label = mainloop_entry(common);
13649   continue_match_label = LABEL();
13650   /* Forward search if possible. */
13651   if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13652     {
13653     if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common))
13654       ;
13655     else if ((re->flags & PCRE2_FIRSTSET) != 0)
13656       fast_forward_first_char(common);
13657     else if ((re->flags & PCRE2_STARTLINE) != 0)
13658       fast_forward_newline(common);
13659     else if ((re->flags & PCRE2_FIRSTMAPSET) != 0)
13660       fast_forward_start_bits(common);
13661     }
13662   }
13663 else
13664   continue_match_label = LABEL();
13665 
13666 if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13667   {
13668   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13669   OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(re->minlength));
13670   minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
13671   }
13672 if (common->req_char_ptr != 0)
13673   reqcu_not_found = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0);
13674 
13675 /* Store the current STR_PTR in OVECTOR(0). */
13676 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
13677 /* Copy the limit of allowed recursions. */
13678 OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
13679 if (common->capture_last_ptr != 0)
13680   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, 0);
13681 if (common->fast_forward_bc_ptr != NULL)
13682   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3, STR_PTR, 0);
13683 
13684 if (common->start_ptr != OVECTOR(0))
13685   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
13686 
13687 /* Copy the beginning of the string. */
13688 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13689   {
13690   jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
13691   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
13692   JUMPHERE(jump);
13693   }
13694 else if (mode == PCRE2_JIT_PARTIAL_HARD)
13695   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
13696 
13697 compile_matchingpath(common, common->start, ccend, &rootbacktrack);
13698 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13699   {
13700   sljit_free_compiler(compiler);
13701   SLJIT_FREE(common->optimized_cbracket, allocator_data);
13702   SLJIT_FREE(common->private_data_ptrs, allocator_data);
13703   PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13704   return PCRE2_ERROR_NOMEMORY;
13705   }
13706 
13707 if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
13708   end_anchor_failed = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0);
13709 
13710 if (common->might_be_empty)
13711   {
13712   empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
13713   empty_match_found_label = LABEL();
13714   }
13715 
13716 common->accept_label = LABEL();
13717 if (common->accept != NULL)
13718   set_jumps(common->accept, common->accept_label);
13719 
13720 /* This means we have a match. Update the ovector. */
13721 copy_ovector(common, re->top_bracket + 1);
13722 common->quit_label = common->abort_label = LABEL();
13723 if (common->quit != NULL)
13724   set_jumps(common->quit, common->quit_label);
13725 if (common->abort != NULL)
13726   set_jumps(common->abort, common->abort_label);
13727 if (minlength_check_failed != NULL)
13728   SET_LABEL(minlength_check_failed, common->abort_label);
13729 
13730 sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN);
13731 sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
13732 
13733 if (common->failed_match != NULL)
13734   {
13735   SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
13736   set_jumps(common->failed_match, LABEL());
13737   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13738   JUMPTO(SLJIT_JUMP, common->abort_label);
13739   }
13740 
13741 if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
13742   JUMPHERE(end_anchor_failed);
13743 
13744 if (mode != PCRE2_JIT_COMPLETE)
13745   {
13746   common->partialmatchlabel = LABEL();
13747   set_jumps(common->partialmatch, common->partialmatchlabel);
13748   return_with_partial_match(common, common->quit_label);
13749   }
13750 
13751 if (common->might_be_empty)
13752   empty_match_backtrack_label = LABEL();
13753 compile_backtrackingpath(common, rootbacktrack.top);
13754 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13755   {
13756   sljit_free_compiler(compiler);
13757   SLJIT_FREE(common->optimized_cbracket, allocator_data);
13758   SLJIT_FREE(common->private_data_ptrs, allocator_data);
13759   PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13760   return PCRE2_ERROR_NOMEMORY;
13761   }
13762 
13763 SLJIT_ASSERT(rootbacktrack.prev == NULL);
13764 reset_match_label = LABEL();
13765 
13766 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13767   {
13768   /* Update hit_start only in the first time. */
13769   jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
13770   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
13771   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
13772   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
13773   JUMPHERE(jump);
13774   }
13775 
13776 /* Check we have remaining characters. */
13777 if ((re->overall_options & PCRE2_ANCHORED) == 0 && common->match_end_ptr != 0)
13778   {
13779   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
13780   }
13781 
13782 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
13783     (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3) : common->start_ptr);
13784 
13785 if ((re->overall_options & PCRE2_ANCHORED) == 0)
13786   {
13787   if (common->ff_newline_shortcut != NULL)
13788     {
13789     /* There cannot be more newlines if PCRE2_FIRSTLINE is set. */
13790     if ((re->overall_options & PCRE2_FIRSTLINE) == 0)
13791       {
13792       if (common->match_end_ptr != 0)
13793         {
13794         OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
13795         OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
13796         CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, common->ff_newline_shortcut);
13797         OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
13798         }
13799       else
13800         CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
13801       }
13802     }
13803   else
13804     CMPTO(SLJIT_LESS, STR_PTR, 0, (common->match_end_ptr == 0) ? STR_END : TMP1, 0, mainloop_label);
13805   }
13806 
13807 /* No more remaining characters. */
13808 if (reqcu_not_found != NULL)
13809   set_jumps(reqcu_not_found, LABEL());
13810 
13811 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13812   CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
13813 
13814 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13815 JUMPTO(SLJIT_JUMP, common->quit_label);
13816 
13817 flush_stubs(common);
13818 
13819 if (common->might_be_empty)
13820   {
13821   JUMPHERE(empty_match);
13822   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
13823   OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
13824   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
13825   JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label);
13826   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
13827   JUMPTO(SLJIT_ZERO, empty_match_found_label);
13828   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
13829   CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
13830   JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
13831   }
13832 
13833 common->fast_forward_bc_ptr = NULL;
13834 common->early_fail_start_ptr = 0;
13835 common->early_fail_end_ptr = 0;
13836 common->currententry = common->entries;
13837 common->local_quit_available = TRUE;
13838 quit_label = common->quit_label;
13839 while (common->currententry != NULL)
13840   {
13841   /* Might add new entries. */
13842   compile_recurse(common);
13843   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13844     {
13845     sljit_free_compiler(compiler);
13846     SLJIT_FREE(common->optimized_cbracket, allocator_data);
13847     SLJIT_FREE(common->private_data_ptrs, allocator_data);
13848     PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13849     return PCRE2_ERROR_NOMEMORY;
13850     }
13851   flush_stubs(common);
13852   common->currententry = common->currententry->next;
13853   }
13854 common->local_quit_available = FALSE;
13855 common->quit_label = quit_label;
13856 
13857 /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
13858 /* This is a (really) rare case. */
13859 set_jumps(common->stackalloc, LABEL());
13860 /* RETURN_ADDR is not a saved register. */
13861 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
13862 
13863 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
13864 
13865 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
13866 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
13867 OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE);
13868 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack));
13869 OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0);
13870 
13871 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
13872 
13873 jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
13874 OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0);
13875 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0);
13876 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
13877 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
13878 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
13879 
13880 /* Allocation failed. */
13881 JUMPHERE(jump);
13882 /* We break the return address cache here, but this is a really rare case. */
13883 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_JIT_STACKLIMIT);
13884 JUMPTO(SLJIT_JUMP, common->quit_label);
13885 
13886 /* Call limit reached. */
13887 set_jumps(common->calllimit, LABEL());
13888 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_MATCHLIMIT);
13889 JUMPTO(SLJIT_JUMP, common->quit_label);
13890 
13891 if (common->revertframes != NULL)
13892   {
13893   set_jumps(common->revertframes, LABEL());
13894   do_revertframes(common);
13895   }
13896 if (common->wordboundary != NULL)
13897   {
13898   set_jumps(common->wordboundary, LABEL());
13899   check_wordboundary(common);
13900   }
13901 if (common->anynewline != NULL)
13902   {
13903   set_jumps(common->anynewline, LABEL());
13904   check_anynewline(common);
13905   }
13906 if (common->hspace != NULL)
13907   {
13908   set_jumps(common->hspace, LABEL());
13909   check_hspace(common);
13910   }
13911 if (common->vspace != NULL)
13912   {
13913   set_jumps(common->vspace, LABEL());
13914   check_vspace(common);
13915   }
13916 if (common->casefulcmp != NULL)
13917   {
13918   set_jumps(common->casefulcmp, LABEL());
13919   do_casefulcmp(common);
13920   }
13921 if (common->caselesscmp != NULL)
13922   {
13923   set_jumps(common->caselesscmp, LABEL());
13924   do_caselesscmp(common);
13925   }
13926 if (common->reset_match != NULL)
13927   {
13928   set_jumps(common->reset_match, LABEL());
13929   do_reset_match(common, (re->top_bracket + 1) * 2);
13930   CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
13931   OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
13932   JUMPTO(SLJIT_JUMP, reset_match_label);
13933   }
13934 #ifdef SUPPORT_UNICODE
13935 #if PCRE2_CODE_UNIT_WIDTH == 8
13936 if (common->utfreadchar != NULL)
13937   {
13938   set_jumps(common->utfreadchar, LABEL());
13939   do_utfreadchar(common);
13940   }
13941 if (common->utfreadtype8 != NULL)
13942   {
13943   set_jumps(common->utfreadtype8, LABEL());
13944   do_utfreadtype8(common);
13945   }
13946 if (common->utfpeakcharback != NULL)
13947   {
13948   set_jumps(common->utfpeakcharback, LABEL());
13949   do_utfpeakcharback(common);
13950   }
13951 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
13952 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
13953 if (common->utfreadchar_invalid != NULL)
13954   {
13955   set_jumps(common->utfreadchar_invalid, LABEL());
13956   do_utfreadchar_invalid(common);
13957   }
13958 if (common->utfreadnewline_invalid != NULL)
13959   {
13960   set_jumps(common->utfreadnewline_invalid, LABEL());
13961   do_utfreadnewline_invalid(common);
13962   }
13963 if (common->utfmoveback_invalid)
13964   {
13965   set_jumps(common->utfmoveback_invalid, LABEL());
13966   do_utfmoveback_invalid(common);
13967   }
13968 if (common->utfpeakcharback_invalid)
13969   {
13970   set_jumps(common->utfpeakcharback_invalid, LABEL());
13971   do_utfpeakcharback_invalid(common);
13972   }
13973 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 */
13974 if (common->getucd != NULL)
13975   {
13976   set_jumps(common->getucd, LABEL());
13977   do_getucd(common);
13978   }
13979 if (common->getucdtype != NULL)
13980   {
13981   set_jumps(common->getucdtype, LABEL());
13982   do_getucdtype(common);
13983   }
13984 #endif /* SUPPORT_UNICODE */
13985 
13986 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13987 SLJIT_FREE(common->private_data_ptrs, allocator_data);
13988 
13989 executable_func = sljit_generate_code(compiler);
13990 executable_size = sljit_get_generated_code_size(compiler);
13991 sljit_free_compiler(compiler);
13992 
13993 if (executable_func == NULL)
13994   {
13995   PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13996   return PCRE2_ERROR_NOMEMORY;
13997   }
13998 
13999 /* Reuse the function descriptor if possible. */
14000 if (re->executable_jit != NULL)
14001   functions = (executable_functions *)re->executable_jit;
14002 else
14003   {
14004   functions = SLJIT_MALLOC(sizeof(executable_functions), allocator_data);
14005   if (functions == NULL)
14006     {
14007     /* This case is highly unlikely since we just recently
14008     freed a lot of memory. Not impossible though. */
14009     sljit_free_code(executable_func);
14010     PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14011     return PCRE2_ERROR_NOMEMORY;
14012     }
14013   memset(functions, 0, sizeof(executable_functions));
14014   functions->top_bracket = re->top_bracket + 1;
14015   functions->limit_match = re->limit_match;
14016   re->executable_jit = functions;
14017   }
14018 
14019 /* Turn mode into an index. */
14020 if (mode == PCRE2_JIT_COMPLETE)
14021   mode = 0;
14022 else
14023   mode = (mode == PCRE2_JIT_PARTIAL_SOFT) ? 1 : 2;
14024 
14025 SLJIT_ASSERT(mode < JIT_NUMBER_OF_COMPILE_MODES);
14026 functions->executable_funcs[mode] = executable_func;
14027 functions->read_only_data_heads[mode] = common->read_only_data_head;
14028 functions->executable_sizes[mode] = executable_size;
14029 return 0;
14030 }
14031 
14032 #endif
14033 
14034 /*************************************************
14035 *        JIT compile a Regular Expression        *
14036 *************************************************/
14037 
14038 /* This function used JIT to convert a previously-compiled pattern into machine
14039 code.
14040 
14041 Arguments:
14042   code          a compiled pattern
14043   options       JIT option bits
14044 
14045 Returns:        0: success or (*NOJIT) was used
14046                <0: an error code
14047 */
14048 
14049 #define PUBLIC_JIT_COMPILE_OPTIONS \
14050   (PCRE2_JIT_COMPLETE|PCRE2_JIT_PARTIAL_SOFT|PCRE2_JIT_PARTIAL_HARD|PCRE2_JIT_INVALID_UTF)
14051 
14052 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_jit_compile(pcre2_code * code,uint32_t options)14053 pcre2_jit_compile(pcre2_code *code, uint32_t options)
14054 {
14055 pcre2_real_code *re = (pcre2_real_code *)code;
14056 
14057 if (code == NULL)
14058   return PCRE2_ERROR_NULL;
14059 
14060 if ((options & ~PUBLIC_JIT_COMPILE_OPTIONS) != 0)
14061   return PCRE2_ERROR_JIT_BADOPTION;
14062 
14063 /* Support for invalid UTF was first introduced in JIT, with the option
14064 PCRE2_JIT_INVALID_UTF. Later, support was added to the interpreter, and the
14065 compile-time option PCRE2_MATCH_INVALID_UTF was created. This is now the
14066 preferred feature, with the earlier option deprecated. However, for backward
14067 compatibility, if the earlier option is set, it forces the new option so that
14068 if JIT matching falls back to the interpreter, there is still support for
14069 invalid UTF. However, if this function has already been successfully called
14070 without PCRE2_JIT_INVALID_UTF and without PCRE2_MATCH_INVALID_UTF (meaning that
14071 non-invalid-supporting JIT code was compiled), give an error.
14072 
14073 If in the future support for PCRE2_JIT_INVALID_UTF is withdrawn, the following
14074 actions are needed:
14075 
14076   1. Remove the definition from pcre2.h.in and from the list in
14077      PUBLIC_JIT_COMPILE_OPTIONS above.
14078 
14079   2. Replace PCRE2_JIT_INVALID_UTF with a local flag in this module.
14080 
14081   3. Replace PCRE2_JIT_INVALID_UTF in pcre2_jit_test.c.
14082 
14083   4. Delete the following short block of code. The setting of "re" and
14084      "functions" can be moved into the JIT-only block below, but if that is
14085      done, (void)re and (void)functions will be needed in the non-JIT case, to
14086      avoid compiler warnings.
14087 */
14088 
14089 #ifdef SUPPORT_JIT
14090 executable_functions *functions = (executable_functions *)re->executable_jit;
14091 static int executable_allocator_is_working = 0;
14092 #endif
14093 
14094 if ((options & PCRE2_JIT_INVALID_UTF) != 0)
14095   {
14096   if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) == 0)
14097     {
14098 #ifdef SUPPORT_JIT
14099     if (functions != NULL) return PCRE2_ERROR_JIT_BADOPTION;
14100 #endif
14101     re->overall_options |= PCRE2_MATCH_INVALID_UTF;
14102     }
14103   }
14104 
14105 /* The above tests are run with and without JIT support. This means that
14106 PCRE2_JIT_INVALID_UTF propagates back into the regex options (ensuring
14107 interpreter support) even in the absence of JIT. But now, if there is no JIT
14108 support, give an error return. */
14109 
14110 #ifndef SUPPORT_JIT
14111 return PCRE2_ERROR_JIT_BADOPTION;
14112 #else  /* SUPPORT_JIT */
14113 
14114 /* There is JIT support. Do the necessary. */
14115 
14116 if ((re->flags & PCRE2_NOJIT) != 0) return 0;
14117 
14118 if (executable_allocator_is_working == 0)
14119   {
14120   /* Checks whether the executable allocator is working. This check
14121      might run multiple times in multi-threaded environments, but the
14122      result should not be affected by it. */
14123   void *ptr = SLJIT_MALLOC_EXEC(32);
14124 
14125   executable_allocator_is_working = -1;
14126 
14127   if (ptr != NULL)
14128     {
14129     SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr));
14130     executable_allocator_is_working = 1;
14131     }
14132   }
14133 
14134 if (executable_allocator_is_working < 0)
14135   return PCRE2_ERROR_NOMEMORY;
14136 
14137 if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0)
14138   options |= PCRE2_JIT_INVALID_UTF;
14139 
14140 if ((options & PCRE2_JIT_COMPLETE) != 0 && (functions == NULL
14141     || functions->executable_funcs[0] == NULL)) {
14142   uint32_t excluded_options = (PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD);
14143   int result = jit_compile(code, options & ~excluded_options);
14144   if (result != 0)
14145     return result;
14146   }
14147 
14148 if ((options & PCRE2_JIT_PARTIAL_SOFT) != 0 && (functions == NULL
14149     || functions->executable_funcs[1] == NULL)) {
14150   uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_HARD);
14151   int result = jit_compile(code, options & ~excluded_options);
14152   if (result != 0)
14153     return result;
14154   }
14155 
14156 if ((options & PCRE2_JIT_PARTIAL_HARD) != 0 && (functions == NULL
14157     || functions->executable_funcs[2] == NULL)) {
14158   uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT);
14159   int result = jit_compile(code, options & ~excluded_options);
14160   if (result != 0)
14161     return result;
14162   }
14163 
14164 return 0;
14165 
14166 #endif  /* SUPPORT_JIT */
14167 }
14168 
14169 /* JIT compiler uses an all-in-one approach. This improves security,
14170    since the code generator functions are not exported. */
14171 
14172 #define INCLUDED_FROM_PCRE2_JIT_COMPILE
14173 
14174 #include "pcre2_jit_match.c"
14175 #include "pcre2_jit_misc.c"
14176 
14177 /* End of pcre2_jit_compile.c */
14178