1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9                     This module by Zoltan Herczeg
10      Original API code Copyright (c) 1997-2012 University of Cambridge
11           New API code Copyright (c) 2016-2019 University of Cambridge
12 
13 -----------------------------------------------------------------------------
14 Redistribution and use in source and binary forms, with or without
15 modification, are permitted provided that the following conditions are met:
16 
17     * Redistributions of source code must retain the above copyright notice,
18       this list of conditions and the following disclaimer.
19 
20     * Redistributions in binary form must reproduce the above copyright
21       notice, this list of conditions and the following disclaimer in the
22       documentation and/or other materials provided with the distribution.
23 
24     * Neither the name of the University of Cambridge nor the names of its
25       contributors may be used to endorse or promote products derived from
26       this software without specific prior written permission.
27 
28 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 POSSIBILITY OF SUCH DAMAGE.
39 -----------------------------------------------------------------------------
40 */
41 
42 #ifdef HAVE_CONFIG_H
43 #include "config.h"
44 #endif
45 
46 #include "pcre2_internal.h"
47 
48 #ifdef SUPPORT_JIT
49 
50 /* All-in-one: Since we use the JIT compiler only from here,
51 we just include it. This way we don't need to touch the build
52 system files. */
53 
54 #define SLJIT_CONFIG_AUTO 1
55 #define SLJIT_CONFIG_STATIC 1
56 #define SLJIT_VERBOSE 0
57 
58 #ifdef PCRE2_DEBUG
59 #define SLJIT_DEBUG 1
60 #else
61 #define SLJIT_DEBUG 0
62 #endif
63 
64 #define SLJIT_MALLOC(size, allocator_data) pcre2_jit_malloc(size, allocator_data)
65 #define SLJIT_FREE(ptr, allocator_data) pcre2_jit_free(ptr, allocator_data)
66 
pcre2_jit_malloc(size_t size,void * allocator_data)67 static void * pcre2_jit_malloc(size_t size, void *allocator_data)
68 {
69 pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
70 return allocator->malloc(size, allocator->memory_data);
71 }
72 
pcre2_jit_free(void * ptr,void * allocator_data)73 static void pcre2_jit_free(void *ptr, void *allocator_data)
74 {
75 pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
76 allocator->free(ptr, allocator->memory_data);
77 }
78 
79 #include "sljit/sljitLir.c"
80 
81 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
82 #error Unsupported architecture
83 #endif
84 
85 /* Defines for debugging purposes. */
86 
87 /* 1 - Use unoptimized capturing brackets.
88    2 - Enable capture_last_ptr (includes option 1). */
89 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
90 
91 /* 1 - Always have a control head. */
92 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
93 
94 /* Allocate memory for the regex stack on the real machine stack.
95 Fast, but limited size. */
96 #define MACHINE_STACK_SIZE 32768
97 
98 /* Growth rate for stack allocated by the OS. Should be the multiply
99 of page size. */
100 #define STACK_GROWTH_RATE 8192
101 
102 /* Enable to check that the allocation could destroy temporaries. */
103 #if defined SLJIT_DEBUG && SLJIT_DEBUG
104 #define DESTROY_REGISTERS 1
105 #endif
106 
107 /*
108 Short summary about the backtracking mechanism empolyed by the jit code generator:
109 
110 The code generator follows the recursive nature of the PERL compatible regular
111 expressions. The basic blocks of regular expressions are condition checkers
112 whose execute different commands depending on the result of the condition check.
113 The relationship between the operators can be horizontal (concatenation) and
114 vertical (sub-expression) (See struct backtrack_common for more details).
115 
116   'ab' - 'a' and 'b' regexps are concatenated
117   'a+' - 'a' is the sub-expression of the '+' operator
118 
119 The condition checkers are boolean (true/false) checkers. Machine code is generated
120 for the checker itself and for the actions depending on the result of the checker.
121 The 'true' case is called as the matching path (expected path), and the other is called as
122 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
123 branches on the matching path.
124 
125  Greedy star operator (*) :
126    Matching path: match happens.
127    Backtrack path: match failed.
128  Non-greedy star operator (*?) :
129    Matching path: no need to perform a match.
130    Backtrack path: match is required.
131 
132 The following example shows how the code generated for a capturing bracket
133 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
134 we have the following regular expression:
135 
136    A(B|C)D
137 
138 The generated code will be the following:
139 
140  A matching path
141  '(' matching path (pushing arguments to the stack)
142  B matching path
143  ')' matching path (pushing arguments to the stack)
144  D matching path
145  return with successful match
146 
147  D backtrack path
148  ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
149  B backtrack path
150  C expected path
151  jump to D matching path
152  C backtrack path
153  A backtrack path
154 
155  Notice, that the order of backtrack code paths are the opposite of the fast
156  code paths. In this way the topmost value on the stack is always belong
157  to the current backtrack code path. The backtrack path must check
158  whether there is a next alternative. If so, it needs to jump back to
159  the matching path eventually. Otherwise it needs to clear out its own stack
160  frame and continue the execution on the backtrack code paths.
161 */
162 
163 /*
164 Saved stack frames:
165 
166 Atomic blocks and asserts require reloading the values of private data
167 when the backtrack mechanism performed. Because of OP_RECURSE, the data
168 are not necessarly known in compile time, thus we need a dynamic restore
169 mechanism.
170 
171 The stack frames are stored in a chain list, and have the following format:
172 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
173 
174 Thus we can restore the private data to a particular point in the stack.
175 */
176 
177 typedef struct jit_arguments {
178   /* Pointers first. */
179   struct sljit_stack *stack;
180   PCRE2_SPTR str;
181   PCRE2_SPTR begin;
182   PCRE2_SPTR end;
183   pcre2_match_data *match_data;
184   PCRE2_SPTR startchar_ptr;
185   PCRE2_UCHAR *mark_ptr;
186   int (*callout)(pcre2_callout_block *, void *);
187   void *callout_data;
188   /* Everything else after. */
189   sljit_uw offset_limit;
190   sljit_u32 limit_match;
191   sljit_u32 oveccount;
192   sljit_u32 options;
193 } jit_arguments;
194 
195 #define JIT_NUMBER_OF_COMPILE_MODES 3
196 
197 typedef struct executable_functions {
198   void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
199   void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
200   sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
201   sljit_u32 top_bracket;
202   sljit_u32 limit_match;
203 } executable_functions;
204 
205 typedef struct jump_list {
206   struct sljit_jump *jump;
207   struct jump_list *next;
208 } jump_list;
209 
210 typedef struct stub_list {
211   struct sljit_jump *start;
212   struct sljit_label *quit;
213   struct stub_list *next;
214 } stub_list;
215 
216 enum frame_types {
217   no_frame = -1,
218   no_stack = -2
219 };
220 
221 enum control_types {
222   type_mark = 0,
223   type_then_trap = 1
224 };
225 
226 enum  early_fail_types {
227   type_skip = 0,
228   type_fail = 1,
229   type_fail_range = 2
230 };
231 
232 typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
233 
234 /* The following structure is the key data type for the recursive
235 code generator. It is allocated by compile_matchingpath, and contains
236 the arguments for compile_backtrackingpath. Must be the first member
237 of its descendants. */
238 typedef struct backtrack_common {
239   /* Concatenation stack. */
240   struct backtrack_common *prev;
241   jump_list *nextbacktracks;
242   /* Internal stack (for component operators). */
243   struct backtrack_common *top;
244   jump_list *topbacktracks;
245   /* Opcode pointer. */
246   PCRE2_SPTR cc;
247 } backtrack_common;
248 
249 typedef struct assert_backtrack {
250   backtrack_common common;
251   jump_list *condfailed;
252   /* Less than 0 if a frame is not needed. */
253   int framesize;
254   /* Points to our private memory word on the stack. */
255   int private_data_ptr;
256   /* For iterators. */
257   struct sljit_label *matchingpath;
258 } assert_backtrack;
259 
260 typedef struct bracket_backtrack {
261   backtrack_common common;
262   /* Where to coninue if an alternative is successfully matched. */
263   struct sljit_label *alternative_matchingpath;
264   /* For rmin and rmax iterators. */
265   struct sljit_label *recursive_matchingpath;
266   /* For greedy ? operator. */
267   struct sljit_label *zero_matchingpath;
268   /* Contains the branches of a failed condition. */
269   union {
270     /* Both for OP_COND, OP_SCOND. */
271     jump_list *condfailed;
272     assert_backtrack *assert;
273     /* For OP_ONCE. Less than 0 if not needed. */
274     int framesize;
275     /* For brackets with >3 alternatives. */
276     struct sljit_put_label *matching_put_label;
277   } u;
278   /* Points to our private memory word on the stack. */
279   int private_data_ptr;
280 } bracket_backtrack;
281 
282 typedef struct bracketpos_backtrack {
283   backtrack_common common;
284   /* Points to our private memory word on the stack. */
285   int private_data_ptr;
286   /* Reverting stack is needed. */
287   int framesize;
288   /* Allocated stack size. */
289   int stacksize;
290 } bracketpos_backtrack;
291 
292 typedef struct braminzero_backtrack {
293   backtrack_common common;
294   struct sljit_label *matchingpath;
295 } braminzero_backtrack;
296 
297 typedef struct char_iterator_backtrack {
298   backtrack_common common;
299   /* Next iteration. */
300   struct sljit_label *matchingpath;
301   union {
302     jump_list *backtracks;
303     struct {
304       unsigned int othercasebit;
305       PCRE2_UCHAR chr;
306       BOOL enabled;
307     } charpos;
308   } u;
309 } char_iterator_backtrack;
310 
311 typedef struct ref_iterator_backtrack {
312   backtrack_common common;
313   /* Next iteration. */
314   struct sljit_label *matchingpath;
315 } ref_iterator_backtrack;
316 
317 typedef struct recurse_entry {
318   struct recurse_entry *next;
319   /* Contains the function entry label. */
320   struct sljit_label *entry_label;
321   /* Contains the function entry label. */
322   struct sljit_label *backtrack_label;
323   /* Collects the entry calls until the function is not created. */
324   jump_list *entry_calls;
325   /* Collects the backtrack calls until the function is not created. */
326   jump_list *backtrack_calls;
327   /* Points to the starting opcode. */
328   sljit_sw start;
329 } recurse_entry;
330 
331 typedef struct recurse_backtrack {
332   backtrack_common common;
333   /* Return to the matching path. */
334   struct sljit_label *matchingpath;
335   /* Recursive pattern. */
336   recurse_entry *entry;
337   /* Pattern is inlined. */
338   BOOL inlined_pattern;
339 } recurse_backtrack;
340 
341 #define OP_THEN_TRAP OP_TABLE_LENGTH
342 
343 typedef struct then_trap_backtrack {
344   backtrack_common common;
345   /* If then_trap is not NULL, this structure contains the real
346   then_trap for the backtracking path. */
347   struct then_trap_backtrack *then_trap;
348   /* Points to the starting opcode. */
349   sljit_sw start;
350   /* Exit point for the then opcodes of this alternative. */
351   jump_list *quit;
352   /* Frame size of the current alternative. */
353   int framesize;
354 } then_trap_backtrack;
355 
356 #define MAX_N_CHARS 12
357 #define MAX_DIFF_CHARS 5
358 
359 typedef struct fast_forward_char_data {
360   /* Number of characters in the chars array, 255 for any character. */
361   sljit_u8 count;
362   /* Number of last UTF-8 characters in the chars array. */
363   sljit_u8 last_count;
364   /* Available characters in the current position. */
365   PCRE2_UCHAR chars[MAX_DIFF_CHARS];
366 } fast_forward_char_data;
367 
368 #define MAX_CLASS_RANGE_SIZE 4
369 #define MAX_CLASS_CHARS_SIZE 3
370 
371 typedef struct compiler_common {
372   /* The sljit ceneric compiler. */
373   struct sljit_compiler *compiler;
374   /* Compiled regular expression. */
375   pcre2_real_code *re;
376   /* First byte code. */
377   PCRE2_SPTR start;
378   /* Maps private data offset to each opcode. */
379   sljit_s32 *private_data_ptrs;
380   /* Chain list of read-only data ptrs. */
381   void *read_only_data_head;
382   /* Tells whether the capturing bracket is optimized. */
383   sljit_u8 *optimized_cbracket;
384   /* Tells whether the starting offset is a target of then. */
385   sljit_u8 *then_offsets;
386   /* Current position where a THEN must jump. */
387   then_trap_backtrack *then_trap;
388   /* Starting offset of private data for capturing brackets. */
389   sljit_s32 cbra_ptr;
390   /* Output vector starting point. Must be divisible by 2. */
391   sljit_s32 ovector_start;
392   /* Points to the starting character of the current match. */
393   sljit_s32 start_ptr;
394   /* Last known position of the requested byte. */
395   sljit_s32 req_char_ptr;
396   /* Head of the last recursion. */
397   sljit_s32 recursive_head_ptr;
398   /* First inspected character for partial matching.
399      (Needed for avoiding zero length partial matches.) */
400   sljit_s32 start_used_ptr;
401   /* Starting pointer for partial soft matches. */
402   sljit_s32 hit_start;
403   /* Pointer of the match end position. */
404   sljit_s32 match_end_ptr;
405   /* Points to the marked string. */
406   sljit_s32 mark_ptr;
407   /* Recursive control verb management chain. */
408   sljit_s32 control_head_ptr;
409   /* Points to the last matched capture block index. */
410   sljit_s32 capture_last_ptr;
411   /* Fast forward skipping byte code pointer. */
412   PCRE2_SPTR fast_forward_bc_ptr;
413   /* Locals used by fast fail optimization. */
414   sljit_s32 early_fail_start_ptr;
415   sljit_s32 early_fail_end_ptr;
416 
417   /* Flipped and lower case tables. */
418   const sljit_u8 *fcc;
419   sljit_sw lcc;
420   /* Mode can be PCRE2_JIT_COMPLETE and others. */
421   int mode;
422   /* TRUE, when empty match is accepted for partial matching. */
423   BOOL allow_empty_partial;
424   /* TRUE, when minlength is greater than 0. */
425   BOOL might_be_empty;
426   /* \K is found in the pattern. */
427   BOOL has_set_som;
428   /* (*SKIP:arg) is found in the pattern. */
429   BOOL has_skip_arg;
430   /* (*THEN) is found in the pattern. */
431   BOOL has_then;
432   /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
433   BOOL has_skip_in_assert_back;
434   /* Quit is redirected by recurse, negative assertion, or positive assertion in conditional block. */
435   BOOL local_quit_available;
436   /* Currently in a positive assertion. */
437   BOOL in_positive_assertion;
438   /* Newline control. */
439   int nltype;
440   sljit_u32 nlmax;
441   sljit_u32 nlmin;
442   int newline;
443   int bsr_nltype;
444   sljit_u32 bsr_nlmax;
445   sljit_u32 bsr_nlmin;
446   /* Dollar endonly. */
447   int endonly;
448   /* Tables. */
449   sljit_sw ctypes;
450   /* Named capturing brackets. */
451   PCRE2_SPTR name_table;
452   sljit_sw name_count;
453   sljit_sw name_entry_size;
454 
455   /* Labels and jump lists. */
456   struct sljit_label *partialmatchlabel;
457   struct sljit_label *quit_label;
458   struct sljit_label *abort_label;
459   struct sljit_label *accept_label;
460   struct sljit_label *ff_newline_shortcut;
461   stub_list *stubs;
462   recurse_entry *entries;
463   recurse_entry *currententry;
464   jump_list *partialmatch;
465   jump_list *quit;
466   jump_list *positive_assertion_quit;
467   jump_list *abort;
468   jump_list *failed_match;
469   jump_list *accept;
470   jump_list *calllimit;
471   jump_list *stackalloc;
472   jump_list *revertframes;
473   jump_list *wordboundary;
474   jump_list *anynewline;
475   jump_list *hspace;
476   jump_list *vspace;
477   jump_list *casefulcmp;
478   jump_list *caselesscmp;
479   jump_list *reset_match;
480   BOOL unset_backref;
481   BOOL alt_circumflex;
482 #ifdef SUPPORT_UNICODE
483   BOOL utf;
484   BOOL invalid_utf;
485   BOOL ucp;
486   /* Points to saving area for iref. */
487   sljit_s32 iref_ptr;
488   jump_list *getucd;
489   jump_list *getucdtype;
490 #if PCRE2_CODE_UNIT_WIDTH == 8
491   jump_list *utfreadchar;
492   jump_list *utfreadtype8;
493   jump_list *utfpeakcharback;
494 #endif
495 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
496   jump_list *utfreadchar_invalid;
497   jump_list *utfreadnewline_invalid;
498   jump_list *utfmoveback_invalid;
499   jump_list *utfpeakcharback_invalid;
500 #endif
501 #endif /* SUPPORT_UNICODE */
502 } compiler_common;
503 
504 /* For byte_sequence_compare. */
505 
506 typedef struct compare_context {
507   int length;
508   int sourcereg;
509 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
510   int ucharptr;
511   union {
512     sljit_s32 asint;
513     sljit_u16 asushort;
514 #if PCRE2_CODE_UNIT_WIDTH == 8
515     sljit_u8 asbyte;
516     sljit_u8 asuchars[4];
517 #elif PCRE2_CODE_UNIT_WIDTH == 16
518     sljit_u16 asuchars[2];
519 #elif PCRE2_CODE_UNIT_WIDTH == 32
520     sljit_u32 asuchars[1];
521 #endif
522   } c;
523   union {
524     sljit_s32 asint;
525     sljit_u16 asushort;
526 #if PCRE2_CODE_UNIT_WIDTH == 8
527     sljit_u8 asbyte;
528     sljit_u8 asuchars[4];
529 #elif PCRE2_CODE_UNIT_WIDTH == 16
530     sljit_u16 asuchars[2];
531 #elif PCRE2_CODE_UNIT_WIDTH == 32
532     sljit_u32 asuchars[1];
533 #endif
534   } oc;
535 #endif
536 } compare_context;
537 
538 /* Undefine sljit macros. */
539 #undef CMP
540 
541 /* Used for accessing the elements of the stack. */
542 #define STACK(i)      ((i) * (int)sizeof(sljit_sw))
543 
544 #ifdef SLJIT_PREF_SHIFT_REG
545 #if SLJIT_PREF_SHIFT_REG == SLJIT_R2
546 /* Nothing. */
547 #elif SLJIT_PREF_SHIFT_REG == SLJIT_R3
548 #define SHIFT_REG_IS_R3
549 #else
550 #error "Unsupported shift register"
551 #endif
552 #endif
553 
554 #define TMP1          SLJIT_R0
555 #ifdef SHIFT_REG_IS_R3
556 #define TMP2          SLJIT_R3
557 #define TMP3          SLJIT_R2
558 #else
559 #define TMP2          SLJIT_R2
560 #define TMP3          SLJIT_R3
561 #endif
562 #define STR_PTR       SLJIT_R1
563 #define STR_END       SLJIT_S0
564 #define STACK_TOP     SLJIT_S1
565 #define STACK_LIMIT   SLJIT_S2
566 #define COUNT_MATCH   SLJIT_S3
567 #define ARGUMENTS     SLJIT_S4
568 #define RETURN_ADDR   SLJIT_R4
569 
570 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
571 #define HAS_VIRTUAL_REGISTERS 1
572 #else
573 #define HAS_VIRTUAL_REGISTERS 0
574 #endif
575 
576 /* Local space layout. */
577 /* These two locals can be used by the current opcode. */
578 #define LOCALS0          (0 * sizeof(sljit_sw))
579 #define LOCALS1          (1 * sizeof(sljit_sw))
580 /* Two local variables for possessive quantifiers (char1 cannot use them). */
581 #define POSSESSIVE0      (2 * sizeof(sljit_sw))
582 #define POSSESSIVE1      (3 * sizeof(sljit_sw))
583 /* Max limit of recursions. */
584 #define LIMIT_MATCH      (4 * sizeof(sljit_sw))
585 /* The output vector is stored on the stack, and contains pointers
586 to characters. The vector data is divided into two groups: the first
587 group contains the start / end character pointers, and the second is
588 the start pointers when the end of the capturing group has not yet reached. */
589 #define OVECTOR_START    (common->ovector_start)
590 #define OVECTOR(i)       (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
591 #define OVECTOR_PRIV(i)  (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
592 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
593 
594 #if PCRE2_CODE_UNIT_WIDTH == 8
595 #define MOV_UCHAR  SLJIT_MOV_U8
596 #define IN_UCHARS(x) (x)
597 #elif PCRE2_CODE_UNIT_WIDTH == 16
598 #define MOV_UCHAR  SLJIT_MOV_U16
599 #define UCHAR_SHIFT (1)
600 #define IN_UCHARS(x) ((x) * 2)
601 #elif PCRE2_CODE_UNIT_WIDTH == 32
602 #define MOV_UCHAR  SLJIT_MOV_U32
603 #define UCHAR_SHIFT (2)
604 #define IN_UCHARS(x) ((x) * 4)
605 #else
606 #error Unsupported compiling mode
607 #endif
608 
609 /* Shortcuts. */
610 #define DEFINE_COMPILER \
611   struct sljit_compiler *compiler = common->compiler
612 #define OP1(op, dst, dstw, src, srcw) \
613   sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
614 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
615   sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
616 #define OP_SRC(op, src, srcw) \
617   sljit_emit_op_src(compiler, (op), (src), (srcw))
618 #define LABEL() \
619   sljit_emit_label(compiler)
620 #define JUMP(type) \
621   sljit_emit_jump(compiler, (type))
622 #define JUMPTO(type, label) \
623   sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
624 #define JUMPHERE(jump) \
625   sljit_set_label((jump), sljit_emit_label(compiler))
626 #define SET_LABEL(jump, label) \
627   sljit_set_label((jump), (label))
628 #define CMP(type, src1, src1w, src2, src2w) \
629   sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
630 #define CMPTO(type, src1, src1w, src2, src2w, label) \
631   sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
632 #define OP_FLAGS(op, dst, dstw, type) \
633   sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
634 #define CMOV(type, dst_reg, src, srcw) \
635   sljit_emit_cmov(compiler, (type), (dst_reg), (src), (srcw))
636 #define GET_LOCAL_BASE(dst, dstw, offset) \
637   sljit_get_local_base(compiler, (dst), (dstw), (offset))
638 
639 #define READ_CHAR_MAX 0x7fffffff
640 
641 #define INVALID_UTF_CHAR -1
642 #define UNASSIGNED_UTF_CHAR 888
643 
644 #if defined SUPPORT_UNICODE
645 #if PCRE2_CODE_UNIT_WIDTH == 8
646 
647 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
648   { \
649   if (ptr[0] <= 0x7f) \
650     c = *ptr++; \
651   else if (ptr + 1 < end && ptr[1] >= 0x80 && ptr[1] < 0xc0) \
652     { \
653     c = ptr[1] - 0x80; \
654     \
655     if (ptr[0] >= 0xc2 && ptr[0] <= 0xdf) \
656       { \
657       c |= (ptr[0] - 0xc0) << 6; \
658       ptr += 2; \
659       } \
660     else if (ptr + 2 < end && ptr[2] >= 0x80 && ptr[2] < 0xc0) \
661       { \
662       c = c << 6 | (ptr[2] - 0x80); \
663       \
664       if (ptr[0] >= 0xe0 && ptr[0] <= 0xef) \
665         { \
666         c |= (ptr[0] - 0xe0) << 12; \
667         ptr += 3; \
668         \
669         if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
670           { \
671           invalid_action; \
672           } \
673         } \
674       else if (ptr + 3 < end && ptr[3] >= 0x80 && ptr[3] < 0xc0) \
675         { \
676         c = c << 6 | (ptr[3] - 0x80); \
677         \
678         if (ptr[0] >= 0xf0 && ptr[0] <= 0xf4) \
679           { \
680           c |= (ptr[0] - 0xf0) << 18; \
681           ptr += 4; \
682           \
683           if (c >= 0x110000 || c < 0x10000) \
684             { \
685             invalid_action; \
686             } \
687           } \
688         else \
689           { \
690           invalid_action; \
691           } \
692         } \
693       else \
694         { \
695         invalid_action; \
696         } \
697       } \
698     else \
699       { \
700       invalid_action; \
701       } \
702     } \
703   else \
704     { \
705     invalid_action; \
706     } \
707   }
708 
709 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
710   { \
711   c = ptr[-1]; \
712   if (c <= 0x7f) \
713     ptr--; \
714   else if (ptr - 1 > start && ptr[-1] >= 0x80 && ptr[-1] < 0xc0) \
715     { \
716     c -= 0x80; \
717     \
718     if (ptr[-2] >= 0xc2 && ptr[-2] <= 0xdf) \
719       { \
720       c |= (ptr[-2] - 0xc0) << 6; \
721       ptr -= 2; \
722       } \
723     else if (ptr - 2 > start && ptr[-2] >= 0x80 && ptr[-2] < 0xc0) \
724       { \
725       c = c << 6 | (ptr[-2] - 0x80); \
726       \
727       if (ptr[-3] >= 0xe0 && ptr[-3] <= 0xef) \
728         { \
729         c |= (ptr[-3] - 0xe0) << 12; \
730         ptr -= 3; \
731         \
732         if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
733           { \
734           invalid_action; \
735           } \
736         } \
737       else if (ptr - 3 > start && ptr[-3] >= 0x80 && ptr[-3] < 0xc0) \
738         { \
739         c = c << 6 | (ptr[-3] - 0x80); \
740         \
741         if (ptr[-4] >= 0xf0 && ptr[-4] <= 0xf4) \
742           { \
743           c |= (ptr[-4] - 0xf0) << 18; \
744           ptr -= 4; \
745           \
746           if (c >= 0x110000 || c < 0x10000) \
747             { \
748             invalid_action; \
749             } \
750           } \
751         else \
752           { \
753           invalid_action; \
754           } \
755         } \
756       else \
757         { \
758         invalid_action; \
759         } \
760       } \
761     else \
762       { \
763       invalid_action; \
764       } \
765     } \
766   else \
767     { \
768     invalid_action; \
769     } \
770   }
771 
772 #elif PCRE2_CODE_UNIT_WIDTH == 16
773 
774 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
775   { \
776   if (ptr[0] < 0xd800 || ptr[0] >= 0xe000) \
777     c = *ptr++; \
778   else if (ptr[0] < 0xdc00 && ptr + 1 < end && ptr[1] >= 0xdc00 && ptr[1] < 0xe000) \
779     { \
780     c = (((ptr[0] - 0xd800) << 10) | (ptr[1] - 0xdc00)) + 0x10000; \
781     ptr += 2; \
782     } \
783   else \
784     { \
785     invalid_action; \
786     } \
787   }
788 
789 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
790   { \
791   c = ptr[-1]; \
792   if (c < 0xd800 || c >= 0xe000) \
793     ptr--; \
794   else if (c >= 0xdc00 && ptr - 1 > start && ptr[-2] >= 0xd800 && ptr[-2] < 0xdc00) \
795     { \
796     c = (((ptr[-2] - 0xd800) << 10) | (c - 0xdc00)) + 0x10000; \
797     ptr -= 2; \
798     } \
799   else \
800     { \
801     invalid_action; \
802     } \
803   }
804 
805 
806 #elif PCRE2_CODE_UNIT_WIDTH == 32
807 
808 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
809   { \
810   if (ptr[0] < 0xd800 || (ptr[0] >= 0xe000 && ptr[0] < 0x110000)) \
811     c = *ptr++; \
812   else \
813     { \
814     invalid_action; \
815     } \
816   }
817 
818 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
819   { \
820   c = ptr[-1]; \
821   if (ptr[-1] < 0xd800 || (ptr[-1] >= 0xe000 && ptr[-1] < 0x110000)) \
822     ptr--; \
823   else \
824     { \
825     invalid_action; \
826     } \
827   }
828 
829 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
830 #endif /* SUPPORT_UNICODE */
831 
bracketend(PCRE2_SPTR cc)832 static PCRE2_SPTR bracketend(PCRE2_SPTR cc)
833 {
834 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
835 do cc += GET(cc, 1); while (*cc == OP_ALT);
836 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
837 cc += 1 + LINK_SIZE;
838 return cc;
839 }
840 
no_alternatives(PCRE2_SPTR cc)841 static int no_alternatives(PCRE2_SPTR cc)
842 {
843 int count = 0;
844 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
845 do
846   {
847   cc += GET(cc, 1);
848   count++;
849   }
850 while (*cc == OP_ALT);
851 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
852 return count;
853 }
854 
855 /* Functions whose might need modification for all new supported opcodes:
856  next_opcode
857  check_opcode_types
858  set_private_data_ptrs
859  get_framesize
860  init_frame
861  get_recurse_data_length
862  copy_recurse_data
863  compile_matchingpath
864  compile_backtrackingpath
865 */
866 
next_opcode(compiler_common * common,PCRE2_SPTR cc)867 static PCRE2_SPTR next_opcode(compiler_common *common, PCRE2_SPTR cc)
868 {
869 SLJIT_UNUSED_ARG(common);
870 switch(*cc)
871   {
872   case OP_SOD:
873   case OP_SOM:
874   case OP_SET_SOM:
875   case OP_NOT_WORD_BOUNDARY:
876   case OP_WORD_BOUNDARY:
877   case OP_NOT_DIGIT:
878   case OP_DIGIT:
879   case OP_NOT_WHITESPACE:
880   case OP_WHITESPACE:
881   case OP_NOT_WORDCHAR:
882   case OP_WORDCHAR:
883   case OP_ANY:
884   case OP_ALLANY:
885   case OP_NOTPROP:
886   case OP_PROP:
887   case OP_ANYNL:
888   case OP_NOT_HSPACE:
889   case OP_HSPACE:
890   case OP_NOT_VSPACE:
891   case OP_VSPACE:
892   case OP_EXTUNI:
893   case OP_EODN:
894   case OP_EOD:
895   case OP_CIRC:
896   case OP_CIRCM:
897   case OP_DOLL:
898   case OP_DOLLM:
899   case OP_CRSTAR:
900   case OP_CRMINSTAR:
901   case OP_CRPLUS:
902   case OP_CRMINPLUS:
903   case OP_CRQUERY:
904   case OP_CRMINQUERY:
905   case OP_CRRANGE:
906   case OP_CRMINRANGE:
907   case OP_CRPOSSTAR:
908   case OP_CRPOSPLUS:
909   case OP_CRPOSQUERY:
910   case OP_CRPOSRANGE:
911   case OP_CLASS:
912   case OP_NCLASS:
913   case OP_REF:
914   case OP_REFI:
915   case OP_DNREF:
916   case OP_DNREFI:
917   case OP_RECURSE:
918   case OP_CALLOUT:
919   case OP_ALT:
920   case OP_KET:
921   case OP_KETRMAX:
922   case OP_KETRMIN:
923   case OP_KETRPOS:
924   case OP_REVERSE:
925   case OP_ASSERT:
926   case OP_ASSERT_NOT:
927   case OP_ASSERTBACK:
928   case OP_ASSERTBACK_NOT:
929   case OP_ASSERT_NA:
930   case OP_ASSERTBACK_NA:
931   case OP_ONCE:
932   case OP_SCRIPT_RUN:
933   case OP_BRA:
934   case OP_BRAPOS:
935   case OP_CBRA:
936   case OP_CBRAPOS:
937   case OP_COND:
938   case OP_SBRA:
939   case OP_SBRAPOS:
940   case OP_SCBRA:
941   case OP_SCBRAPOS:
942   case OP_SCOND:
943   case OP_CREF:
944   case OP_DNCREF:
945   case OP_RREF:
946   case OP_DNRREF:
947   case OP_FALSE:
948   case OP_TRUE:
949   case OP_BRAZERO:
950   case OP_BRAMINZERO:
951   case OP_BRAPOSZERO:
952   case OP_PRUNE:
953   case OP_SKIP:
954   case OP_THEN:
955   case OP_COMMIT:
956   case OP_FAIL:
957   case OP_ACCEPT:
958   case OP_ASSERT_ACCEPT:
959   case OP_CLOSE:
960   case OP_SKIPZERO:
961   return cc + PRIV(OP_lengths)[*cc];
962 
963   case OP_CHAR:
964   case OP_CHARI:
965   case OP_NOT:
966   case OP_NOTI:
967   case OP_STAR:
968   case OP_MINSTAR:
969   case OP_PLUS:
970   case OP_MINPLUS:
971   case OP_QUERY:
972   case OP_MINQUERY:
973   case OP_UPTO:
974   case OP_MINUPTO:
975   case OP_EXACT:
976   case OP_POSSTAR:
977   case OP_POSPLUS:
978   case OP_POSQUERY:
979   case OP_POSUPTO:
980   case OP_STARI:
981   case OP_MINSTARI:
982   case OP_PLUSI:
983   case OP_MINPLUSI:
984   case OP_QUERYI:
985   case OP_MINQUERYI:
986   case OP_UPTOI:
987   case OP_MINUPTOI:
988   case OP_EXACTI:
989   case OP_POSSTARI:
990   case OP_POSPLUSI:
991   case OP_POSQUERYI:
992   case OP_POSUPTOI:
993   case OP_NOTSTAR:
994   case OP_NOTMINSTAR:
995   case OP_NOTPLUS:
996   case OP_NOTMINPLUS:
997   case OP_NOTQUERY:
998   case OP_NOTMINQUERY:
999   case OP_NOTUPTO:
1000   case OP_NOTMINUPTO:
1001   case OP_NOTEXACT:
1002   case OP_NOTPOSSTAR:
1003   case OP_NOTPOSPLUS:
1004   case OP_NOTPOSQUERY:
1005   case OP_NOTPOSUPTO:
1006   case OP_NOTSTARI:
1007   case OP_NOTMINSTARI:
1008   case OP_NOTPLUSI:
1009   case OP_NOTMINPLUSI:
1010   case OP_NOTQUERYI:
1011   case OP_NOTMINQUERYI:
1012   case OP_NOTUPTOI:
1013   case OP_NOTMINUPTOI:
1014   case OP_NOTEXACTI:
1015   case OP_NOTPOSSTARI:
1016   case OP_NOTPOSPLUSI:
1017   case OP_NOTPOSQUERYI:
1018   case OP_NOTPOSUPTOI:
1019   cc += PRIV(OP_lengths)[*cc];
1020 #ifdef SUPPORT_UNICODE
1021   if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1022 #endif
1023   return cc;
1024 
1025   /* Special cases. */
1026   case OP_TYPESTAR:
1027   case OP_TYPEMINSTAR:
1028   case OP_TYPEPLUS:
1029   case OP_TYPEMINPLUS:
1030   case OP_TYPEQUERY:
1031   case OP_TYPEMINQUERY:
1032   case OP_TYPEUPTO:
1033   case OP_TYPEMINUPTO:
1034   case OP_TYPEEXACT:
1035   case OP_TYPEPOSSTAR:
1036   case OP_TYPEPOSPLUS:
1037   case OP_TYPEPOSQUERY:
1038   case OP_TYPEPOSUPTO:
1039   return cc + PRIV(OP_lengths)[*cc] - 1;
1040 
1041   case OP_ANYBYTE:
1042 #ifdef SUPPORT_UNICODE
1043   if (common->utf) return NULL;
1044 #endif
1045   return cc + 1;
1046 
1047   case OP_CALLOUT_STR:
1048   return cc + GET(cc, 1 + 2*LINK_SIZE);
1049 
1050 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1051   case OP_XCLASS:
1052   return cc + GET(cc, 1);
1053 #endif
1054 
1055   case OP_MARK:
1056   case OP_COMMIT_ARG:
1057   case OP_PRUNE_ARG:
1058   case OP_SKIP_ARG:
1059   case OP_THEN_ARG:
1060   return cc + 1 + 2 + cc[1];
1061 
1062   default:
1063   SLJIT_UNREACHABLE();
1064   return NULL;
1065   }
1066 }
1067 
check_opcode_types(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend)1068 static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend)
1069 {
1070 int count;
1071 PCRE2_SPTR slot;
1072 PCRE2_SPTR assert_back_end = cc - 1;
1073 PCRE2_SPTR assert_na_end = cc - 1;
1074 
1075 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
1076 while (cc < ccend)
1077   {
1078   switch(*cc)
1079     {
1080     case OP_SET_SOM:
1081     common->has_set_som = TRUE;
1082     common->might_be_empty = TRUE;
1083     cc += 1;
1084     break;
1085 
1086     case OP_REFI:
1087 #ifdef SUPPORT_UNICODE
1088     if (common->iref_ptr == 0)
1089       {
1090       common->iref_ptr = common->ovector_start;
1091       common->ovector_start += 3 * sizeof(sljit_sw);
1092       }
1093 #endif /* SUPPORT_UNICODE */
1094     /* Fall through. */
1095     case OP_REF:
1096     common->optimized_cbracket[GET2(cc, 1)] = 0;
1097     cc += 1 + IMM2_SIZE;
1098     break;
1099 
1100     case OP_ASSERT_NA:
1101     case OP_ASSERTBACK_NA:
1102     slot = bracketend(cc);
1103     if (slot > assert_na_end)
1104       assert_na_end = slot;
1105     cc += 1 + LINK_SIZE;
1106     break;
1107 
1108     case OP_CBRAPOS:
1109     case OP_SCBRAPOS:
1110     common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
1111     cc += 1 + LINK_SIZE + IMM2_SIZE;
1112     break;
1113 
1114     case OP_COND:
1115     case OP_SCOND:
1116     /* Only AUTO_CALLOUT can insert this opcode. We do
1117        not intend to support this case. */
1118     if (cc[1 + LINK_SIZE] == OP_CALLOUT || cc[1 + LINK_SIZE] == OP_CALLOUT_STR)
1119       return FALSE;
1120     cc += 1 + LINK_SIZE;
1121     break;
1122 
1123     case OP_CREF:
1124     common->optimized_cbracket[GET2(cc, 1)] = 0;
1125     cc += 1 + IMM2_SIZE;
1126     break;
1127 
1128     case OP_DNREF:
1129     case OP_DNREFI:
1130     case OP_DNCREF:
1131     count = GET2(cc, 1 + IMM2_SIZE);
1132     slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
1133     while (count-- > 0)
1134       {
1135       common->optimized_cbracket[GET2(slot, 0)] = 0;
1136       slot += common->name_entry_size;
1137       }
1138     cc += 1 + 2 * IMM2_SIZE;
1139     break;
1140 
1141     case OP_RECURSE:
1142     /* Set its value only once. */
1143     if (common->recursive_head_ptr == 0)
1144       {
1145       common->recursive_head_ptr = common->ovector_start;
1146       common->ovector_start += sizeof(sljit_sw);
1147       }
1148     cc += 1 + LINK_SIZE;
1149     break;
1150 
1151     case OP_CALLOUT:
1152     case OP_CALLOUT_STR:
1153     if (common->capture_last_ptr == 0)
1154       {
1155       common->capture_last_ptr = common->ovector_start;
1156       common->ovector_start += sizeof(sljit_sw);
1157       }
1158     cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE);
1159     break;
1160 
1161     case OP_ASSERTBACK:
1162     slot = bracketend(cc);
1163     if (slot > assert_back_end)
1164       assert_back_end = slot;
1165     cc += 1 + LINK_SIZE;
1166     break;
1167 
1168     case OP_THEN_ARG:
1169     common->has_then = TRUE;
1170     common->control_head_ptr = 1;
1171     /* Fall through. */
1172 
1173     case OP_COMMIT_ARG:
1174     case OP_PRUNE_ARG:
1175     if (cc < assert_na_end)
1176       return FALSE;
1177     /* Fall through */
1178     case OP_MARK:
1179     if (common->mark_ptr == 0)
1180       {
1181       common->mark_ptr = common->ovector_start;
1182       common->ovector_start += sizeof(sljit_sw);
1183       }
1184     cc += 1 + 2 + cc[1];
1185     break;
1186 
1187     case OP_THEN:
1188     common->has_then = TRUE;
1189     common->control_head_ptr = 1;
1190     cc += 1;
1191     break;
1192 
1193     case OP_SKIP:
1194     if (cc < assert_back_end)
1195       common->has_skip_in_assert_back = TRUE;
1196     if (cc < assert_na_end)
1197       return FALSE;
1198     cc += 1;
1199     break;
1200 
1201     case OP_SKIP_ARG:
1202     common->control_head_ptr = 1;
1203     common->has_skip_arg = TRUE;
1204     if (cc < assert_back_end)
1205       common->has_skip_in_assert_back = TRUE;
1206     if (cc < assert_na_end)
1207       return FALSE;
1208     cc += 1 + 2 + cc[1];
1209     break;
1210 
1211     case OP_PRUNE:
1212     case OP_COMMIT:
1213     case OP_ASSERT_ACCEPT:
1214     if (cc < assert_na_end)
1215       return FALSE;
1216     cc++;
1217     break;
1218 
1219     default:
1220     cc = next_opcode(common, cc);
1221     if (cc == NULL)
1222       return FALSE;
1223     break;
1224     }
1225   }
1226 return TRUE;
1227 }
1228 
1229 #define EARLY_FAIL_ENHANCE_MAX (1 + 3)
1230 
1231 /*
1232 start:
1233   0 - skip / early fail allowed
1234   1 - only early fail with range allowed
1235   >1 - (start - 1) early fail is processed
1236 
1237 return: current number of iterators enhanced with fast fail
1238 */
detect_early_fail(compiler_common * common,PCRE2_SPTR cc,int * private_data_start,sljit_s32 depth,int start,BOOL fast_forward_allowed)1239 static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start,
1240    sljit_s32 depth, int start, BOOL fast_forward_allowed)
1241 {
1242 PCRE2_SPTR begin = cc;
1243 PCRE2_SPTR next_alt;
1244 PCRE2_SPTR end;
1245 PCRE2_SPTR accelerated_start;
1246 BOOL prev_fast_forward_allowed;
1247 int result = 0;
1248 int count;
1249 
1250 SLJIT_ASSERT(*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA);
1251 SLJIT_ASSERT(*cc != OP_CBRA || common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] != 0);
1252 SLJIT_ASSERT(start < EARLY_FAIL_ENHANCE_MAX);
1253 
1254 next_alt = cc + GET(cc, 1);
1255 if (*next_alt == OP_ALT)
1256   fast_forward_allowed = FALSE;
1257 
1258 do
1259   {
1260   count = start;
1261   cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1262 
1263   while (TRUE)
1264     {
1265     accelerated_start = NULL;
1266 
1267     switch(*cc)
1268       {
1269       case OP_SOD:
1270       case OP_SOM:
1271       case OP_SET_SOM:
1272       case OP_NOT_WORD_BOUNDARY:
1273       case OP_WORD_BOUNDARY:
1274       case OP_EODN:
1275       case OP_EOD:
1276       case OP_CIRC:
1277       case OP_CIRCM:
1278       case OP_DOLL:
1279       case OP_DOLLM:
1280       /* Zero width assertions. */
1281       cc++;
1282       continue;
1283 
1284       case OP_NOT_DIGIT:
1285       case OP_DIGIT:
1286       case OP_NOT_WHITESPACE:
1287       case OP_WHITESPACE:
1288       case OP_NOT_WORDCHAR:
1289       case OP_WORDCHAR:
1290       case OP_ANY:
1291       case OP_ALLANY:
1292       case OP_ANYBYTE:
1293       case OP_NOT_HSPACE:
1294       case OP_HSPACE:
1295       case OP_NOT_VSPACE:
1296       case OP_VSPACE:
1297       fast_forward_allowed = FALSE;
1298       cc++;
1299       continue;
1300 
1301       case OP_ANYNL:
1302       case OP_EXTUNI:
1303       fast_forward_allowed = FALSE;
1304       if (count == 0)
1305         count = 1;
1306       cc++;
1307       continue;
1308 
1309       case OP_NOTPROP:
1310       case OP_PROP:
1311       fast_forward_allowed = FALSE;
1312       cc += 1 + 2;
1313       continue;
1314 
1315       case OP_CHAR:
1316       case OP_CHARI:
1317       case OP_NOT:
1318       case OP_NOTI:
1319       fast_forward_allowed = FALSE;
1320       cc += 2;
1321 #ifdef SUPPORT_UNICODE
1322       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1323 #endif
1324       continue;
1325 
1326       case OP_TYPESTAR:
1327       case OP_TYPEMINSTAR:
1328       case OP_TYPEPLUS:
1329       case OP_TYPEMINPLUS:
1330       case OP_TYPEPOSSTAR:
1331       case OP_TYPEPOSPLUS:
1332       /* The type or prop opcode is skipped in the next iteration. */
1333       cc += 1;
1334 
1335       if (cc[0] != OP_ANYNL && cc[0] != OP_EXTUNI)
1336         {
1337         accelerated_start = cc - 1;
1338         break;
1339         }
1340 
1341       if (count == 0)
1342         count = 1;
1343       fast_forward_allowed = FALSE;
1344       continue;
1345 
1346       case OP_TYPEUPTO:
1347       case OP_TYPEMINUPTO:
1348       case OP_TYPEEXACT:
1349       case OP_TYPEPOSUPTO:
1350       cc += IMM2_SIZE;
1351       /* Fall through */
1352 
1353       case OP_TYPEQUERY:
1354       case OP_TYPEMINQUERY:
1355       case OP_TYPEPOSQUERY:
1356       /* The type or prop opcode is skipped in the next iteration. */
1357       fast_forward_allowed = FALSE;
1358       if (count == 0)
1359         count = 1;
1360       cc += 1;
1361       continue;
1362 
1363       case OP_STAR:
1364       case OP_MINSTAR:
1365       case OP_PLUS:
1366       case OP_MINPLUS:
1367       case OP_POSSTAR:
1368       case OP_POSPLUS:
1369 
1370       case OP_STARI:
1371       case OP_MINSTARI:
1372       case OP_PLUSI:
1373       case OP_MINPLUSI:
1374       case OP_POSSTARI:
1375       case OP_POSPLUSI:
1376 
1377       case OP_NOTSTAR:
1378       case OP_NOTMINSTAR:
1379       case OP_NOTPLUS:
1380       case OP_NOTMINPLUS:
1381       case OP_NOTPOSSTAR:
1382       case OP_NOTPOSPLUS:
1383 
1384       case OP_NOTSTARI:
1385       case OP_NOTMINSTARI:
1386       case OP_NOTPLUSI:
1387       case OP_NOTMINPLUSI:
1388       case OP_NOTPOSSTARI:
1389       case OP_NOTPOSPLUSI:
1390       accelerated_start = cc;
1391       cc += 2;
1392 #ifdef SUPPORT_UNICODE
1393       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1394 #endif
1395       break;
1396 
1397       case OP_UPTO:
1398       case OP_MINUPTO:
1399       case OP_EXACT:
1400       case OP_POSUPTO:
1401       case OP_UPTOI:
1402       case OP_MINUPTOI:
1403       case OP_EXACTI:
1404       case OP_POSUPTOI:
1405       case OP_NOTUPTO:
1406       case OP_NOTMINUPTO:
1407       case OP_NOTEXACT:
1408       case OP_NOTPOSUPTO:
1409       case OP_NOTUPTOI:
1410       case OP_NOTMINUPTOI:
1411       case OP_NOTEXACTI:
1412       case OP_NOTPOSUPTOI:
1413       cc += IMM2_SIZE;
1414       /* Fall through */
1415 
1416       case OP_QUERY:
1417       case OP_MINQUERY:
1418       case OP_POSQUERY:
1419       case OP_QUERYI:
1420       case OP_MINQUERYI:
1421       case OP_POSQUERYI:
1422       case OP_NOTQUERY:
1423       case OP_NOTMINQUERY:
1424       case OP_NOTPOSQUERY:
1425       case OP_NOTQUERYI:
1426       case OP_NOTMINQUERYI:
1427       case OP_NOTPOSQUERYI:
1428       fast_forward_allowed = FALSE;
1429       if (count == 0)
1430         count = 1;
1431       cc += 2;
1432 #ifdef SUPPORT_UNICODE
1433       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1434 #endif
1435       continue;
1436 
1437       case OP_CLASS:
1438       case OP_NCLASS:
1439 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1440       case OP_XCLASS:
1441       accelerated_start = cc;
1442       cc += ((*cc == OP_XCLASS) ? GET(cc, 1) : (unsigned int)(1 + (32 / sizeof(PCRE2_UCHAR))));
1443 #else
1444       accelerated_start = cc;
1445       cc += (1 + (32 / sizeof(PCRE2_UCHAR)));
1446 #endif
1447 
1448       switch (*cc)
1449         {
1450         case OP_CRSTAR:
1451         case OP_CRMINSTAR:
1452         case OP_CRPLUS:
1453         case OP_CRMINPLUS:
1454         case OP_CRPOSSTAR:
1455         case OP_CRPOSPLUS:
1456         cc++;
1457         break;
1458 
1459         case OP_CRRANGE:
1460         case OP_CRMINRANGE:
1461         case OP_CRPOSRANGE:
1462         cc += 2 * IMM2_SIZE;
1463         /* Fall through */
1464         case OP_CRQUERY:
1465         case OP_CRMINQUERY:
1466         case OP_CRPOSQUERY:
1467         cc++;
1468         if (count == 0)
1469           count = 1;
1470         /* Fall through */
1471         default:
1472         accelerated_start = NULL;
1473         fast_forward_allowed = FALSE;
1474         continue;
1475         }
1476       break;
1477 
1478       case OP_ONCE:
1479       case OP_BRA:
1480       case OP_CBRA:
1481       end = cc + GET(cc, 1);
1482 
1483       prev_fast_forward_allowed = fast_forward_allowed;
1484       fast_forward_allowed = FALSE;
1485       if (depth >= 4)
1486         break;
1487 
1488       end = bracketend(cc) - (1 + LINK_SIZE);
1489       if (*end != OP_KET || (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0))
1490         break;
1491 
1492       count = detect_early_fail(common, cc, private_data_start, depth + 1, count, prev_fast_forward_allowed);
1493 
1494       if (PRIVATE_DATA(cc) != 0)
1495         common->private_data_ptrs[begin - common->start] = 1;
1496 
1497       if (count < EARLY_FAIL_ENHANCE_MAX)
1498         {
1499         cc = end + (1 + LINK_SIZE);
1500         continue;
1501         }
1502       break;
1503 
1504       case OP_KET:
1505       SLJIT_ASSERT(PRIVATE_DATA(cc) == 0);
1506       if (cc >= next_alt)
1507         break;
1508       cc += 1 + LINK_SIZE;
1509       continue;
1510       }
1511 
1512     if (accelerated_start != NULL)
1513       {
1514       if (count == 0)
1515         {
1516         count++;
1517 
1518         if (fast_forward_allowed)
1519           {
1520           common->fast_forward_bc_ptr = accelerated_start;
1521           common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_skip;
1522           *private_data_start += sizeof(sljit_sw);
1523           }
1524         else
1525           {
1526           common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail;
1527 
1528           if (common->early_fail_start_ptr == 0)
1529             common->early_fail_start_ptr = *private_data_start;
1530 
1531           *private_data_start += sizeof(sljit_sw);
1532           common->early_fail_end_ptr = *private_data_start;
1533 
1534           if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1535             return EARLY_FAIL_ENHANCE_MAX;
1536           }
1537         }
1538       else
1539         {
1540         common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail_range;
1541 
1542         if (common->early_fail_start_ptr == 0)
1543           common->early_fail_start_ptr = *private_data_start;
1544 
1545         *private_data_start += 2 * sizeof(sljit_sw);
1546         common->early_fail_end_ptr = *private_data_start;
1547 
1548         if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1549           return EARLY_FAIL_ENHANCE_MAX;
1550         }
1551 
1552       /* Cannot be part of a repeat. */
1553       common->private_data_ptrs[begin - common->start] = 1;
1554       count++;
1555 
1556       if (count < EARLY_FAIL_ENHANCE_MAX)
1557         continue;
1558       }
1559 
1560     break;
1561     }
1562 
1563   if (*cc != OP_ALT && *cc != OP_KET)
1564     result = EARLY_FAIL_ENHANCE_MAX;
1565   else if (result < count)
1566     result = count;
1567 
1568   cc = next_alt;
1569   next_alt = cc + GET(cc, 1);
1570   }
1571 while (*cc == OP_ALT);
1572 
1573 return result;
1574 }
1575 
get_class_iterator_size(PCRE2_SPTR cc)1576 static int get_class_iterator_size(PCRE2_SPTR cc)
1577 {
1578 sljit_u32 min;
1579 sljit_u32 max;
1580 switch(*cc)
1581   {
1582   case OP_CRSTAR:
1583   case OP_CRPLUS:
1584   return 2;
1585 
1586   case OP_CRMINSTAR:
1587   case OP_CRMINPLUS:
1588   case OP_CRQUERY:
1589   case OP_CRMINQUERY:
1590   return 1;
1591 
1592   case OP_CRRANGE:
1593   case OP_CRMINRANGE:
1594   min = GET2(cc, 1);
1595   max = GET2(cc, 1 + IMM2_SIZE);
1596   if (max == 0)
1597     return (*cc == OP_CRRANGE) ? 2 : 1;
1598   max -= min;
1599   if (max > 2)
1600     max = 2;
1601   return max;
1602 
1603   default:
1604   return 0;
1605   }
1606 }
1607 
detect_repeat(compiler_common * common,PCRE2_SPTR begin)1608 static BOOL detect_repeat(compiler_common *common, PCRE2_SPTR begin)
1609 {
1610 PCRE2_SPTR end = bracketend(begin);
1611 PCRE2_SPTR next;
1612 PCRE2_SPTR next_end;
1613 PCRE2_SPTR max_end;
1614 PCRE2_UCHAR type;
1615 sljit_sw length = end - begin;
1616 sljit_s32 min, max, i;
1617 
1618 /* Detect fixed iterations first. */
1619 if (end[-(1 + LINK_SIZE)] != OP_KET || PRIVATE_DATA(begin) != 0)
1620   return FALSE;
1621 
1622 /* /(?:AB){4,6}/ is currently converted to /(?:AB){3}(?AB){1,3}/
1623  * Skip the check of the second part. */
1624 if (PRIVATE_DATA(end - LINK_SIZE) != 0)
1625   return TRUE;
1626 
1627 next = end;
1628 min = 1;
1629 while (1)
1630   {
1631   if (*next != *begin)
1632     break;
1633   next_end = bracketend(next);
1634   if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1635     break;
1636   next = next_end;
1637   min++;
1638   }
1639 
1640 if (min == 2)
1641   return FALSE;
1642 
1643 max = 0;
1644 max_end = next;
1645 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1646   {
1647   type = *next;
1648   while (1)
1649     {
1650     if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1651       break;
1652     next_end = bracketend(next + 2 + LINK_SIZE);
1653     if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1654       break;
1655     next = next_end;
1656     max++;
1657     }
1658 
1659   if (next[0] == type && next[1] == *begin && max >= 1)
1660     {
1661     next_end = bracketend(next + 1);
1662     if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1663       {
1664       for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1665         if (*next_end != OP_KET)
1666           break;
1667 
1668       if (i == max)
1669         {
1670         common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1671         common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1672         /* +2 the original and the last. */
1673         common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1674         if (min == 1)
1675           return TRUE;
1676         min--;
1677         max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1678         }
1679       }
1680     }
1681   }
1682 
1683 if (min >= 3)
1684   {
1685   common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1686   common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1687   common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1688   return TRUE;
1689   }
1690 
1691 return FALSE;
1692 }
1693 
1694 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1695     case OP_MINSTAR: \
1696     case OP_MINPLUS: \
1697     case OP_QUERY: \
1698     case OP_MINQUERY: \
1699     case OP_MINSTARI: \
1700     case OP_MINPLUSI: \
1701     case OP_QUERYI: \
1702     case OP_MINQUERYI: \
1703     case OP_NOTMINSTAR: \
1704     case OP_NOTMINPLUS: \
1705     case OP_NOTQUERY: \
1706     case OP_NOTMINQUERY: \
1707     case OP_NOTMINSTARI: \
1708     case OP_NOTMINPLUSI: \
1709     case OP_NOTQUERYI: \
1710     case OP_NOTMINQUERYI:
1711 
1712 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1713     case OP_STAR: \
1714     case OP_PLUS: \
1715     case OP_STARI: \
1716     case OP_PLUSI: \
1717     case OP_NOTSTAR: \
1718     case OP_NOTPLUS: \
1719     case OP_NOTSTARI: \
1720     case OP_NOTPLUSI:
1721 
1722 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1723     case OP_UPTO: \
1724     case OP_MINUPTO: \
1725     case OP_UPTOI: \
1726     case OP_MINUPTOI: \
1727     case OP_NOTUPTO: \
1728     case OP_NOTMINUPTO: \
1729     case OP_NOTUPTOI: \
1730     case OP_NOTMINUPTOI:
1731 
1732 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1733     case OP_TYPEMINSTAR: \
1734     case OP_TYPEMINPLUS: \
1735     case OP_TYPEQUERY: \
1736     case OP_TYPEMINQUERY:
1737 
1738 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1739     case OP_TYPESTAR: \
1740     case OP_TYPEPLUS:
1741 
1742 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1743     case OP_TYPEUPTO: \
1744     case OP_TYPEMINUPTO:
1745 
set_private_data_ptrs(compiler_common * common,int * private_data_start,PCRE2_SPTR ccend)1746 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, PCRE2_SPTR ccend)
1747 {
1748 PCRE2_SPTR cc = common->start;
1749 PCRE2_SPTR alternative;
1750 PCRE2_SPTR end = NULL;
1751 int private_data_ptr = *private_data_start;
1752 int space, size, bracketlen;
1753 BOOL repeat_check = TRUE;
1754 
1755 while (cc < ccend)
1756   {
1757   space = 0;
1758   size = 0;
1759   bracketlen = 0;
1760   if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1761     break;
1762 
1763   /* When the bracket is prefixed by a zero iteration, skip the repeat check (at this point). */
1764   if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1765     {
1766     if (detect_repeat(common, cc))
1767       {
1768       /* These brackets are converted to repeats, so no global
1769       based single character repeat is allowed. */
1770       if (cc >= end)
1771         end = bracketend(cc);
1772       }
1773     }
1774   repeat_check = TRUE;
1775 
1776   switch(*cc)
1777     {
1778     case OP_KET:
1779     if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1780       {
1781       common->private_data_ptrs[cc - common->start] = private_data_ptr;
1782       private_data_ptr += sizeof(sljit_sw);
1783       cc += common->private_data_ptrs[cc + 1 - common->start];
1784       }
1785     cc += 1 + LINK_SIZE;
1786     break;
1787 
1788     case OP_ASSERT:
1789     case OP_ASSERT_NOT:
1790     case OP_ASSERTBACK:
1791     case OP_ASSERTBACK_NOT:
1792     case OP_ASSERT_NA:
1793     case OP_ASSERTBACK_NA:
1794     case OP_ONCE:
1795     case OP_SCRIPT_RUN:
1796     case OP_BRAPOS:
1797     case OP_SBRA:
1798     case OP_SBRAPOS:
1799     case OP_SCOND:
1800     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1801     private_data_ptr += sizeof(sljit_sw);
1802     bracketlen = 1 + LINK_SIZE;
1803     break;
1804 
1805     case OP_CBRAPOS:
1806     case OP_SCBRAPOS:
1807     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1808     private_data_ptr += sizeof(sljit_sw);
1809     bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1810     break;
1811 
1812     case OP_COND:
1813     /* Might be a hidden SCOND. */
1814     common->private_data_ptrs[cc - common->start] = 0;
1815     alternative = cc + GET(cc, 1);
1816     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1817       {
1818       common->private_data_ptrs[cc - common->start] = private_data_ptr;
1819       private_data_ptr += sizeof(sljit_sw);
1820       }
1821     bracketlen = 1 + LINK_SIZE;
1822     break;
1823 
1824     case OP_BRA:
1825     bracketlen = 1 + LINK_SIZE;
1826     break;
1827 
1828     case OP_CBRA:
1829     case OP_SCBRA:
1830     bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1831     break;
1832 
1833     case OP_BRAZERO:
1834     case OP_BRAMINZERO:
1835     case OP_BRAPOSZERO:
1836     size = 1;
1837     repeat_check = FALSE;
1838     break;
1839 
1840     CASE_ITERATOR_PRIVATE_DATA_1
1841     size = -2;
1842     space = 1;
1843     break;
1844 
1845     CASE_ITERATOR_PRIVATE_DATA_2A
1846     size = -2;
1847     space = 2;
1848     break;
1849 
1850     CASE_ITERATOR_PRIVATE_DATA_2B
1851     size = -(2 + IMM2_SIZE);
1852     space = 2;
1853     break;
1854 
1855     CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1856     size = 1;
1857     space = 1;
1858     break;
1859 
1860     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1861     size = 1;
1862     if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1863       space = 2;
1864     break;
1865 
1866     case OP_TYPEUPTO:
1867     size = 1 + IMM2_SIZE;
1868     if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1869       space = 2;
1870     break;
1871 
1872     case OP_TYPEMINUPTO:
1873     size = 1 + IMM2_SIZE;
1874     space = 2;
1875     break;
1876 
1877     case OP_CLASS:
1878     case OP_NCLASS:
1879     size = 1 + 32 / sizeof(PCRE2_UCHAR);
1880     space = get_class_iterator_size(cc + size);
1881     break;
1882 
1883 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1884     case OP_XCLASS:
1885     size = GET(cc, 1);
1886     space = get_class_iterator_size(cc + size);
1887     break;
1888 #endif
1889 
1890     default:
1891     cc = next_opcode(common, cc);
1892     SLJIT_ASSERT(cc != NULL);
1893     break;
1894     }
1895 
1896   /* Character iterators, which are not inside a repeated bracket,
1897      gets a private slot instead of allocating it on the stack. */
1898   if (space > 0 && cc >= end)
1899     {
1900     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1901     private_data_ptr += sizeof(sljit_sw) * space;
1902     }
1903 
1904   if (size != 0)
1905     {
1906     if (size < 0)
1907       {
1908       cc += -size;
1909 #ifdef SUPPORT_UNICODE
1910       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1911 #endif
1912       }
1913     else
1914       cc += size;
1915     }
1916 
1917   if (bracketlen > 0)
1918     {
1919     if (cc >= end)
1920       {
1921       end = bracketend(cc);
1922       if (end[-1 - LINK_SIZE] == OP_KET)
1923         end = NULL;
1924       }
1925     cc += bracketlen;
1926     }
1927   }
1928 *private_data_start = private_data_ptr;
1929 }
1930 
1931 /* Returns with a frame_types (always < 0) if no need for frame. */
get_framesize(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,BOOL recursive,BOOL * needs_control_head)1932 static int get_framesize(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL recursive, BOOL *needs_control_head)
1933 {
1934 int length = 0;
1935 int possessive = 0;
1936 BOOL stack_restore = FALSE;
1937 BOOL setsom_found = recursive;
1938 BOOL setmark_found = recursive;
1939 /* The last capture is a local variable even for recursions. */
1940 BOOL capture_last_found = FALSE;
1941 
1942 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1943 SLJIT_ASSERT(common->control_head_ptr != 0);
1944 *needs_control_head = TRUE;
1945 #else
1946 *needs_control_head = FALSE;
1947 #endif
1948 
1949 if (ccend == NULL)
1950   {
1951   ccend = bracketend(cc) - (1 + LINK_SIZE);
1952   if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1953     {
1954     possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1955     /* This is correct regardless of common->capture_last_ptr. */
1956     capture_last_found = TRUE;
1957     }
1958   cc = next_opcode(common, cc);
1959   }
1960 
1961 SLJIT_ASSERT(cc != NULL);
1962 while (cc < ccend)
1963   switch(*cc)
1964     {
1965     case OP_SET_SOM:
1966     SLJIT_ASSERT(common->has_set_som);
1967     stack_restore = TRUE;
1968     if (!setsom_found)
1969       {
1970       length += 2;
1971       setsom_found = TRUE;
1972       }
1973     cc += 1;
1974     break;
1975 
1976     case OP_MARK:
1977     case OP_COMMIT_ARG:
1978     case OP_PRUNE_ARG:
1979     case OP_THEN_ARG:
1980     SLJIT_ASSERT(common->mark_ptr != 0);
1981     stack_restore = TRUE;
1982     if (!setmark_found)
1983       {
1984       length += 2;
1985       setmark_found = TRUE;
1986       }
1987     if (common->control_head_ptr != 0)
1988       *needs_control_head = TRUE;
1989     cc += 1 + 2 + cc[1];
1990     break;
1991 
1992     case OP_RECURSE:
1993     stack_restore = TRUE;
1994     if (common->has_set_som && !setsom_found)
1995       {
1996       length += 2;
1997       setsom_found = TRUE;
1998       }
1999     if (common->mark_ptr != 0 && !setmark_found)
2000       {
2001       length += 2;
2002       setmark_found = TRUE;
2003       }
2004     if (common->capture_last_ptr != 0 && !capture_last_found)
2005       {
2006       length += 2;
2007       capture_last_found = TRUE;
2008       }
2009     cc += 1 + LINK_SIZE;
2010     break;
2011 
2012     case OP_CBRA:
2013     case OP_CBRAPOS:
2014     case OP_SCBRA:
2015     case OP_SCBRAPOS:
2016     stack_restore = TRUE;
2017     if (common->capture_last_ptr != 0 && !capture_last_found)
2018       {
2019       length += 2;
2020       capture_last_found = TRUE;
2021       }
2022     length += 3;
2023     cc += 1 + LINK_SIZE + IMM2_SIZE;
2024     break;
2025 
2026     case OP_THEN:
2027     stack_restore = TRUE;
2028     if (common->control_head_ptr != 0)
2029       *needs_control_head = TRUE;
2030     cc ++;
2031     break;
2032 
2033     default:
2034     stack_restore = TRUE;
2035     /* Fall through. */
2036 
2037     case OP_NOT_WORD_BOUNDARY:
2038     case OP_WORD_BOUNDARY:
2039     case OP_NOT_DIGIT:
2040     case OP_DIGIT:
2041     case OP_NOT_WHITESPACE:
2042     case OP_WHITESPACE:
2043     case OP_NOT_WORDCHAR:
2044     case OP_WORDCHAR:
2045     case OP_ANY:
2046     case OP_ALLANY:
2047     case OP_ANYBYTE:
2048     case OP_NOTPROP:
2049     case OP_PROP:
2050     case OP_ANYNL:
2051     case OP_NOT_HSPACE:
2052     case OP_HSPACE:
2053     case OP_NOT_VSPACE:
2054     case OP_VSPACE:
2055     case OP_EXTUNI:
2056     case OP_EODN:
2057     case OP_EOD:
2058     case OP_CIRC:
2059     case OP_CIRCM:
2060     case OP_DOLL:
2061     case OP_DOLLM:
2062     case OP_CHAR:
2063     case OP_CHARI:
2064     case OP_NOT:
2065     case OP_NOTI:
2066 
2067     case OP_EXACT:
2068     case OP_POSSTAR:
2069     case OP_POSPLUS:
2070     case OP_POSQUERY:
2071     case OP_POSUPTO:
2072 
2073     case OP_EXACTI:
2074     case OP_POSSTARI:
2075     case OP_POSPLUSI:
2076     case OP_POSQUERYI:
2077     case OP_POSUPTOI:
2078 
2079     case OP_NOTEXACT:
2080     case OP_NOTPOSSTAR:
2081     case OP_NOTPOSPLUS:
2082     case OP_NOTPOSQUERY:
2083     case OP_NOTPOSUPTO:
2084 
2085     case OP_NOTEXACTI:
2086     case OP_NOTPOSSTARI:
2087     case OP_NOTPOSPLUSI:
2088     case OP_NOTPOSQUERYI:
2089     case OP_NOTPOSUPTOI:
2090 
2091     case OP_TYPEEXACT:
2092     case OP_TYPEPOSSTAR:
2093     case OP_TYPEPOSPLUS:
2094     case OP_TYPEPOSQUERY:
2095     case OP_TYPEPOSUPTO:
2096 
2097     case OP_CLASS:
2098     case OP_NCLASS:
2099     case OP_XCLASS:
2100 
2101     case OP_CALLOUT:
2102     case OP_CALLOUT_STR:
2103 
2104     cc = next_opcode(common, cc);
2105     SLJIT_ASSERT(cc != NULL);
2106     break;
2107     }
2108 
2109 /* Possessive quantifiers can use a special case. */
2110 if (SLJIT_UNLIKELY(possessive == length))
2111   return stack_restore ? no_frame : no_stack;
2112 
2113 if (length > 0)
2114   return length + 1;
2115 return stack_restore ? no_frame : no_stack;
2116 }
2117 
init_frame(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,int stackpos,int stacktop)2118 static void init_frame(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, int stackpos, int stacktop)
2119 {
2120 DEFINE_COMPILER;
2121 BOOL setsom_found = FALSE;
2122 BOOL setmark_found = FALSE;
2123 /* The last capture is a local variable even for recursions. */
2124 BOOL capture_last_found = FALSE;
2125 int offset;
2126 
2127 /* >= 1 + shortest item size (2) */
2128 SLJIT_UNUSED_ARG(stacktop);
2129 SLJIT_ASSERT(stackpos >= stacktop + 2);
2130 
2131 stackpos = STACK(stackpos);
2132 if (ccend == NULL)
2133   {
2134   ccend = bracketend(cc) - (1 + LINK_SIZE);
2135   if (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)
2136     cc = next_opcode(common, cc);
2137   }
2138 
2139 SLJIT_ASSERT(cc != NULL);
2140 while (cc < ccend)
2141   switch(*cc)
2142     {
2143     case OP_SET_SOM:
2144     SLJIT_ASSERT(common->has_set_som);
2145     if (!setsom_found)
2146       {
2147       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2148       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2149       stackpos -= (int)sizeof(sljit_sw);
2150       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2151       stackpos -= (int)sizeof(sljit_sw);
2152       setsom_found = TRUE;
2153       }
2154     cc += 1;
2155     break;
2156 
2157     case OP_MARK:
2158     case OP_COMMIT_ARG:
2159     case OP_PRUNE_ARG:
2160     case OP_THEN_ARG:
2161     SLJIT_ASSERT(common->mark_ptr != 0);
2162     if (!setmark_found)
2163       {
2164       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2165       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2166       stackpos -= (int)sizeof(sljit_sw);
2167       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2168       stackpos -= (int)sizeof(sljit_sw);
2169       setmark_found = TRUE;
2170       }
2171     cc += 1 + 2 + cc[1];
2172     break;
2173 
2174     case OP_RECURSE:
2175     if (common->has_set_som && !setsom_found)
2176       {
2177       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2178       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2179       stackpos -= (int)sizeof(sljit_sw);
2180       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2181       stackpos -= (int)sizeof(sljit_sw);
2182       setsom_found = TRUE;
2183       }
2184     if (common->mark_ptr != 0 && !setmark_found)
2185       {
2186       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2187       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2188       stackpos -= (int)sizeof(sljit_sw);
2189       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2190       stackpos -= (int)sizeof(sljit_sw);
2191       setmark_found = TRUE;
2192       }
2193     if (common->capture_last_ptr != 0 && !capture_last_found)
2194       {
2195       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2196       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2197       stackpos -= (int)sizeof(sljit_sw);
2198       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2199       stackpos -= (int)sizeof(sljit_sw);
2200       capture_last_found = TRUE;
2201       }
2202     cc += 1 + LINK_SIZE;
2203     break;
2204 
2205     case OP_CBRA:
2206     case OP_CBRAPOS:
2207     case OP_SCBRA:
2208     case OP_SCBRAPOS:
2209     if (common->capture_last_ptr != 0 && !capture_last_found)
2210       {
2211       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2212       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2213       stackpos -= (int)sizeof(sljit_sw);
2214       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2215       stackpos -= (int)sizeof(sljit_sw);
2216       capture_last_found = TRUE;
2217       }
2218     offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2219     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
2220     stackpos -= (int)sizeof(sljit_sw);
2221     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
2222     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
2223     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2224     stackpos -= (int)sizeof(sljit_sw);
2225     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
2226     stackpos -= (int)sizeof(sljit_sw);
2227 
2228     cc += 1 + LINK_SIZE + IMM2_SIZE;
2229     break;
2230 
2231     default:
2232     cc = next_opcode(common, cc);
2233     SLJIT_ASSERT(cc != NULL);
2234     break;
2235     }
2236 
2237 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
2238 SLJIT_ASSERT(stackpos == STACK(stacktop));
2239 }
2240 
2241 #define RECURSE_TMP_REG_COUNT 3
2242 
2243 typedef struct delayed_mem_copy_status {
2244   struct sljit_compiler *compiler;
2245   int store_bases[RECURSE_TMP_REG_COUNT];
2246   int store_offsets[RECURSE_TMP_REG_COUNT];
2247   int tmp_regs[RECURSE_TMP_REG_COUNT];
2248   int saved_tmp_regs[RECURSE_TMP_REG_COUNT];
2249   int next_tmp_reg;
2250 } delayed_mem_copy_status;
2251 
delayed_mem_copy_init(delayed_mem_copy_status * status,compiler_common * common)2252 static void delayed_mem_copy_init(delayed_mem_copy_status *status, compiler_common *common)
2253 {
2254 int i;
2255 
2256 for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2257   {
2258   SLJIT_ASSERT(status->tmp_regs[i] >= 0);
2259   SLJIT_ASSERT(sljit_get_register_index(status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]);
2260 
2261   status->store_bases[i] = -1;
2262   }
2263 status->next_tmp_reg = 0;
2264 status->compiler = common->compiler;
2265 }
2266 
delayed_mem_copy_move(delayed_mem_copy_status * status,int load_base,sljit_sw load_offset,int store_base,sljit_sw store_offset)2267 static void delayed_mem_copy_move(delayed_mem_copy_status *status, int load_base, sljit_sw load_offset,
2268   int store_base, sljit_sw store_offset)
2269 {
2270 struct sljit_compiler *compiler = status->compiler;
2271 int next_tmp_reg = status->next_tmp_reg;
2272 int tmp_reg = status->tmp_regs[next_tmp_reg];
2273 
2274 SLJIT_ASSERT(load_base > 0 && store_base > 0);
2275 
2276 if (status->store_bases[next_tmp_reg] == -1)
2277   {
2278   /* Preserve virtual registers. */
2279   if (sljit_get_register_index(status->saved_tmp_regs[next_tmp_reg]) < 0)
2280     OP1(SLJIT_MOV, status->saved_tmp_regs[next_tmp_reg], 0, tmp_reg, 0);
2281   }
2282 else
2283   OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2284 
2285 OP1(SLJIT_MOV, tmp_reg, 0, SLJIT_MEM1(load_base), load_offset);
2286 status->store_bases[next_tmp_reg] = store_base;
2287 status->store_offsets[next_tmp_reg] = store_offset;
2288 
2289 status->next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2290 }
2291 
delayed_mem_copy_finish(delayed_mem_copy_status * status)2292 static void delayed_mem_copy_finish(delayed_mem_copy_status *status)
2293 {
2294 struct sljit_compiler *compiler = status->compiler;
2295 int next_tmp_reg = status->next_tmp_reg;
2296 int tmp_reg, saved_tmp_reg, i;
2297 
2298 for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2299   {
2300   if (status->store_bases[next_tmp_reg] != -1)
2301     {
2302     tmp_reg = status->tmp_regs[next_tmp_reg];
2303     saved_tmp_reg = status->saved_tmp_regs[next_tmp_reg];
2304 
2305     OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2306 
2307     /* Restore virtual registers. */
2308     if (sljit_get_register_index(saved_tmp_reg) < 0)
2309       OP1(SLJIT_MOV, tmp_reg, 0, saved_tmp_reg, 0);
2310     }
2311 
2312   next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2313   }
2314 }
2315 
2316 #undef RECURSE_TMP_REG_COUNT
2317 
get_recurse_data_length(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,BOOL * needs_control_head,BOOL * has_quit,BOOL * has_accept)2318 static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
2319   BOOL *needs_control_head, BOOL *has_quit, BOOL *has_accept)
2320 {
2321 int length = 1;
2322 int size;
2323 PCRE2_SPTR alternative;
2324 BOOL quit_found = FALSE;
2325 BOOL accept_found = FALSE;
2326 BOOL setsom_found = FALSE;
2327 BOOL setmark_found = FALSE;
2328 BOOL capture_last_found = FALSE;
2329 BOOL control_head_found = FALSE;
2330 
2331 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2332 SLJIT_ASSERT(common->control_head_ptr != 0);
2333 control_head_found = TRUE;
2334 #endif
2335 
2336 /* Calculate the sum of the private machine words. */
2337 while (cc < ccend)
2338   {
2339   size = 0;
2340   switch(*cc)
2341     {
2342     case OP_SET_SOM:
2343     SLJIT_ASSERT(common->has_set_som);
2344     setsom_found = TRUE;
2345     cc += 1;
2346     break;
2347 
2348     case OP_RECURSE:
2349     if (common->has_set_som)
2350       setsom_found = TRUE;
2351     if (common->mark_ptr != 0)
2352       setmark_found = TRUE;
2353     if (common->capture_last_ptr != 0)
2354       capture_last_found = TRUE;
2355     cc += 1 + LINK_SIZE;
2356     break;
2357 
2358     case OP_KET:
2359     if (PRIVATE_DATA(cc) != 0)
2360       {
2361       length++;
2362       SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2363       cc += PRIVATE_DATA(cc + 1);
2364       }
2365     cc += 1 + LINK_SIZE;
2366     break;
2367 
2368     case OP_ASSERT:
2369     case OP_ASSERT_NOT:
2370     case OP_ASSERTBACK:
2371     case OP_ASSERTBACK_NOT:
2372     case OP_ASSERT_NA:
2373     case OP_ASSERTBACK_NA:
2374     case OP_ONCE:
2375     case OP_SCRIPT_RUN:
2376     case OP_BRAPOS:
2377     case OP_SBRA:
2378     case OP_SBRAPOS:
2379     case OP_SCOND:
2380     length++;
2381     SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
2382     cc += 1 + LINK_SIZE;
2383     break;
2384 
2385     case OP_CBRA:
2386     case OP_SCBRA:
2387     length += 2;
2388     if (common->capture_last_ptr != 0)
2389       capture_last_found = TRUE;
2390     if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
2391       length++;
2392     cc += 1 + LINK_SIZE + IMM2_SIZE;
2393     break;
2394 
2395     case OP_CBRAPOS:
2396     case OP_SCBRAPOS:
2397     length += 2 + 2;
2398     if (common->capture_last_ptr != 0)
2399       capture_last_found = TRUE;
2400     cc += 1 + LINK_SIZE + IMM2_SIZE;
2401     break;
2402 
2403     case OP_COND:
2404     /* Might be a hidden SCOND. */
2405     alternative = cc + GET(cc, 1);
2406     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2407       length++;
2408     cc += 1 + LINK_SIZE;
2409     break;
2410 
2411     CASE_ITERATOR_PRIVATE_DATA_1
2412     if (PRIVATE_DATA(cc) != 0)
2413       length++;
2414     cc += 2;
2415 #ifdef SUPPORT_UNICODE
2416     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2417 #endif
2418     break;
2419 
2420     CASE_ITERATOR_PRIVATE_DATA_2A
2421     if (PRIVATE_DATA(cc) != 0)
2422       length += 2;
2423     cc += 2;
2424 #ifdef SUPPORT_UNICODE
2425     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2426 #endif
2427     break;
2428 
2429     CASE_ITERATOR_PRIVATE_DATA_2B
2430     if (PRIVATE_DATA(cc) != 0)
2431       length += 2;
2432     cc += 2 + IMM2_SIZE;
2433 #ifdef SUPPORT_UNICODE
2434     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2435 #endif
2436     break;
2437 
2438     CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2439     if (PRIVATE_DATA(cc) != 0)
2440       length++;
2441     cc += 1;
2442     break;
2443 
2444     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2445     if (PRIVATE_DATA(cc) != 0)
2446       length += 2;
2447     cc += 1;
2448     break;
2449 
2450     CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2451     if (PRIVATE_DATA(cc) != 0)
2452       length += 2;
2453     cc += 1 + IMM2_SIZE;
2454     break;
2455 
2456     case OP_CLASS:
2457     case OP_NCLASS:
2458 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2459     case OP_XCLASS:
2460     size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2461 #else
2462     size = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2463 #endif
2464     if (PRIVATE_DATA(cc) != 0)
2465       length += get_class_iterator_size(cc + size);
2466     cc += size;
2467     break;
2468 
2469     case OP_MARK:
2470     case OP_COMMIT_ARG:
2471     case OP_PRUNE_ARG:
2472     case OP_THEN_ARG:
2473     SLJIT_ASSERT(common->mark_ptr != 0);
2474     if (!setmark_found)
2475       setmark_found = TRUE;
2476     if (common->control_head_ptr != 0)
2477       control_head_found = TRUE;
2478     if (*cc != OP_MARK)
2479       quit_found = TRUE;
2480 
2481     cc += 1 + 2 + cc[1];
2482     break;
2483 
2484     case OP_PRUNE:
2485     case OP_SKIP:
2486     case OP_COMMIT:
2487     quit_found = TRUE;
2488     cc++;
2489     break;
2490 
2491     case OP_SKIP_ARG:
2492     quit_found = TRUE;
2493     cc += 1 + 2 + cc[1];
2494     break;
2495 
2496     case OP_THEN:
2497     SLJIT_ASSERT(common->control_head_ptr != 0);
2498     quit_found = TRUE;
2499     if (!control_head_found)
2500       control_head_found = TRUE;
2501     cc++;
2502     break;
2503 
2504     case OP_ACCEPT:
2505     case OP_ASSERT_ACCEPT:
2506     accept_found = TRUE;
2507     cc++;
2508     break;
2509 
2510     default:
2511     cc = next_opcode(common, cc);
2512     SLJIT_ASSERT(cc != NULL);
2513     break;
2514     }
2515   }
2516 SLJIT_ASSERT(cc == ccend);
2517 
2518 if (control_head_found)
2519   length++;
2520 if (capture_last_found)
2521   length++;
2522 if (quit_found)
2523   {
2524   if (setsom_found)
2525     length++;
2526   if (setmark_found)
2527     length++;
2528   }
2529 
2530 *needs_control_head = control_head_found;
2531 *has_quit = quit_found;
2532 *has_accept = accept_found;
2533 return length;
2534 }
2535 
2536 enum copy_recurse_data_types {
2537   recurse_copy_from_global,
2538   recurse_copy_private_to_global,
2539   recurse_copy_shared_to_global,
2540   recurse_copy_kept_shared_to_global,
2541   recurse_swap_global
2542 };
2543 
copy_recurse_data(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,int type,int stackptr,int stacktop,BOOL has_quit)2544 static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
2545   int type, int stackptr, int stacktop, BOOL has_quit)
2546 {
2547 delayed_mem_copy_status status;
2548 PCRE2_SPTR alternative;
2549 sljit_sw private_srcw[2];
2550 sljit_sw shared_srcw[3];
2551 sljit_sw kept_shared_srcw[2];
2552 int private_count, shared_count, kept_shared_count;
2553 int from_sp, base_reg, offset, i;
2554 BOOL setsom_found = FALSE;
2555 BOOL setmark_found = FALSE;
2556 BOOL capture_last_found = FALSE;
2557 BOOL control_head_found = FALSE;
2558 
2559 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2560 SLJIT_ASSERT(common->control_head_ptr != 0);
2561 control_head_found = TRUE;
2562 #endif
2563 
2564 switch (type)
2565   {
2566   case recurse_copy_from_global:
2567   from_sp = TRUE;
2568   base_reg = STACK_TOP;
2569   break;
2570 
2571   case recurse_copy_private_to_global:
2572   case recurse_copy_shared_to_global:
2573   case recurse_copy_kept_shared_to_global:
2574   from_sp = FALSE;
2575   base_reg = STACK_TOP;
2576   break;
2577 
2578   default:
2579   SLJIT_ASSERT(type == recurse_swap_global);
2580   from_sp = FALSE;
2581   base_reg = TMP2;
2582   break;
2583   }
2584 
2585 stackptr = STACK(stackptr);
2586 stacktop = STACK(stacktop);
2587 
2588 status.tmp_regs[0] = TMP1;
2589 status.saved_tmp_regs[0] = TMP1;
2590 
2591 if (base_reg != TMP2)
2592   {
2593   status.tmp_regs[1] = TMP2;
2594   status.saved_tmp_regs[1] = TMP2;
2595   }
2596 else
2597   {
2598   status.saved_tmp_regs[1] = RETURN_ADDR;
2599   if (HAS_VIRTUAL_REGISTERS)
2600     status.tmp_regs[1] = STR_PTR;
2601   else
2602     status.tmp_regs[1] = RETURN_ADDR;
2603   }
2604 
2605 status.saved_tmp_regs[2] = TMP3;
2606 if (HAS_VIRTUAL_REGISTERS)
2607   status.tmp_regs[2] = STR_END;
2608 else
2609   status.tmp_regs[2] = TMP3;
2610 
2611 delayed_mem_copy_init(&status, common);
2612 
2613 if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2614   {
2615   SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2616 
2617   if (!from_sp)
2618     delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->recursive_head_ptr);
2619 
2620   if (from_sp || type == recurse_swap_global)
2621     delayed_mem_copy_move(&status, SLJIT_SP, common->recursive_head_ptr, base_reg, stackptr);
2622   }
2623 
2624 stackptr += sizeof(sljit_sw);
2625 
2626 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2627 if (type != recurse_copy_shared_to_global)
2628   {
2629   if (!from_sp)
2630     delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->control_head_ptr);
2631 
2632   if (from_sp || type == recurse_swap_global)
2633     delayed_mem_copy_move(&status, SLJIT_SP, common->control_head_ptr, base_reg, stackptr);
2634   }
2635 
2636 stackptr += sizeof(sljit_sw);
2637 #endif
2638 
2639 while (cc < ccend)
2640   {
2641   private_count = 0;
2642   shared_count = 0;
2643   kept_shared_count = 0;
2644 
2645   switch(*cc)
2646     {
2647     case OP_SET_SOM:
2648     SLJIT_ASSERT(common->has_set_som);
2649     if (has_quit && !setsom_found)
2650       {
2651       kept_shared_srcw[0] = OVECTOR(0);
2652       kept_shared_count = 1;
2653       setsom_found = TRUE;
2654       }
2655     cc += 1;
2656     break;
2657 
2658     case OP_RECURSE:
2659     if (has_quit)
2660       {
2661       if (common->has_set_som && !setsom_found)
2662         {
2663         kept_shared_srcw[0] = OVECTOR(0);
2664         kept_shared_count = 1;
2665         setsom_found = TRUE;
2666         }
2667       if (common->mark_ptr != 0 && !setmark_found)
2668         {
2669         kept_shared_srcw[kept_shared_count] = common->mark_ptr;
2670         kept_shared_count++;
2671         setmark_found = TRUE;
2672         }
2673       }
2674     if (common->capture_last_ptr != 0 && !capture_last_found)
2675       {
2676       shared_srcw[0] = common->capture_last_ptr;
2677       shared_count = 1;
2678       capture_last_found = TRUE;
2679       }
2680     cc += 1 + LINK_SIZE;
2681     break;
2682 
2683     case OP_KET:
2684     if (PRIVATE_DATA(cc) != 0)
2685       {
2686       private_count = 1;
2687       private_srcw[0] = PRIVATE_DATA(cc);
2688       SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2689       cc += PRIVATE_DATA(cc + 1);
2690       }
2691     cc += 1 + LINK_SIZE;
2692     break;
2693 
2694     case OP_ASSERT:
2695     case OP_ASSERT_NOT:
2696     case OP_ASSERTBACK:
2697     case OP_ASSERTBACK_NOT:
2698     case OP_ASSERT_NA:
2699     case OP_ASSERTBACK_NA:
2700     case OP_ONCE:
2701     case OP_SCRIPT_RUN:
2702     case OP_BRAPOS:
2703     case OP_SBRA:
2704     case OP_SBRAPOS:
2705     case OP_SCOND:
2706     private_count = 1;
2707     private_srcw[0] = PRIVATE_DATA(cc);
2708     cc += 1 + LINK_SIZE;
2709     break;
2710 
2711     case OP_CBRA:
2712     case OP_SCBRA:
2713     offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2714     shared_srcw[0] = OVECTOR(offset);
2715     shared_srcw[1] = OVECTOR(offset + 1);
2716     shared_count = 2;
2717 
2718     if (common->capture_last_ptr != 0 && !capture_last_found)
2719       {
2720       shared_srcw[2] = common->capture_last_ptr;
2721       shared_count = 3;
2722       capture_last_found = TRUE;
2723       }
2724 
2725     if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
2726       {
2727       private_count = 1;
2728       private_srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
2729       }
2730     cc += 1 + LINK_SIZE + IMM2_SIZE;
2731     break;
2732 
2733     case OP_CBRAPOS:
2734     case OP_SCBRAPOS:
2735     offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2736     shared_srcw[0] = OVECTOR(offset);
2737     shared_srcw[1] = OVECTOR(offset + 1);
2738     shared_count = 2;
2739 
2740     if (common->capture_last_ptr != 0 && !capture_last_found)
2741       {
2742       shared_srcw[2] = common->capture_last_ptr;
2743       shared_count = 3;
2744       capture_last_found = TRUE;
2745       }
2746 
2747     private_count = 2;
2748     private_srcw[0] = PRIVATE_DATA(cc);
2749     private_srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
2750     cc += 1 + LINK_SIZE + IMM2_SIZE;
2751     break;
2752 
2753     case OP_COND:
2754     /* Might be a hidden SCOND. */
2755     alternative = cc + GET(cc, 1);
2756     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2757       {
2758       private_count = 1;
2759       private_srcw[0] = PRIVATE_DATA(cc);
2760       }
2761     cc += 1 + LINK_SIZE;
2762     break;
2763 
2764     CASE_ITERATOR_PRIVATE_DATA_1
2765     if (PRIVATE_DATA(cc))
2766       {
2767       private_count = 1;
2768       private_srcw[0] = PRIVATE_DATA(cc);
2769       }
2770     cc += 2;
2771 #ifdef SUPPORT_UNICODE
2772     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2773 #endif
2774     break;
2775 
2776     CASE_ITERATOR_PRIVATE_DATA_2A
2777     if (PRIVATE_DATA(cc))
2778       {
2779       private_count = 2;
2780       private_srcw[0] = PRIVATE_DATA(cc);
2781       private_srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2782       }
2783     cc += 2;
2784 #ifdef SUPPORT_UNICODE
2785     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2786 #endif
2787     break;
2788 
2789     CASE_ITERATOR_PRIVATE_DATA_2B
2790     if (PRIVATE_DATA(cc))
2791       {
2792       private_count = 2;
2793       private_srcw[0] = PRIVATE_DATA(cc);
2794       private_srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2795       }
2796     cc += 2 + IMM2_SIZE;
2797 #ifdef SUPPORT_UNICODE
2798     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2799 #endif
2800     break;
2801 
2802     CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2803     if (PRIVATE_DATA(cc))
2804       {
2805       private_count = 1;
2806       private_srcw[0] = PRIVATE_DATA(cc);
2807       }
2808     cc += 1;
2809     break;
2810 
2811     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2812     if (PRIVATE_DATA(cc))
2813       {
2814       private_count = 2;
2815       private_srcw[0] = PRIVATE_DATA(cc);
2816       private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2817       }
2818     cc += 1;
2819     break;
2820 
2821     CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2822     if (PRIVATE_DATA(cc))
2823       {
2824       private_count = 2;
2825       private_srcw[0] = PRIVATE_DATA(cc);
2826       private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2827       }
2828     cc += 1 + IMM2_SIZE;
2829     break;
2830 
2831     case OP_CLASS:
2832     case OP_NCLASS:
2833 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2834     case OP_XCLASS:
2835     i = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2836 #else
2837     i = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2838 #endif
2839     if (PRIVATE_DATA(cc) != 0)
2840       switch(get_class_iterator_size(cc + i))
2841         {
2842         case 1:
2843         private_count = 1;
2844         private_srcw[0] = PRIVATE_DATA(cc);
2845         break;
2846 
2847         case 2:
2848         private_count = 2;
2849         private_srcw[0] = PRIVATE_DATA(cc);
2850         private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2851         break;
2852 
2853         default:
2854         SLJIT_UNREACHABLE();
2855         break;
2856         }
2857     cc += i;
2858     break;
2859 
2860     case OP_MARK:
2861     case OP_COMMIT_ARG:
2862     case OP_PRUNE_ARG:
2863     case OP_THEN_ARG:
2864     SLJIT_ASSERT(common->mark_ptr != 0);
2865     if (has_quit && !setmark_found)
2866       {
2867       kept_shared_srcw[0] = common->mark_ptr;
2868       kept_shared_count = 1;
2869       setmark_found = TRUE;
2870       }
2871     if (common->control_head_ptr != 0 && !control_head_found)
2872       {
2873       private_srcw[0] = common->control_head_ptr;
2874       private_count = 1;
2875       control_head_found = TRUE;
2876       }
2877     cc += 1 + 2 + cc[1];
2878     break;
2879 
2880     case OP_THEN:
2881     SLJIT_ASSERT(common->control_head_ptr != 0);
2882     if (!control_head_found)
2883       {
2884       private_srcw[0] = common->control_head_ptr;
2885       private_count = 1;
2886       control_head_found = TRUE;
2887       }
2888     cc++;
2889     break;
2890 
2891     default:
2892     cc = next_opcode(common, cc);
2893     SLJIT_ASSERT(cc != NULL);
2894     break;
2895     }
2896 
2897   if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2898     {
2899     SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2900 
2901     for (i = 0; i < private_count; i++)
2902       {
2903       SLJIT_ASSERT(private_srcw[i] != 0);
2904 
2905       if (!from_sp)
2906         delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, private_srcw[i]);
2907 
2908       if (from_sp || type == recurse_swap_global)
2909         delayed_mem_copy_move(&status, SLJIT_SP, private_srcw[i], base_reg, stackptr);
2910 
2911       stackptr += sizeof(sljit_sw);
2912       }
2913     }
2914   else
2915     stackptr += sizeof(sljit_sw) * private_count;
2916 
2917   if (type != recurse_copy_private_to_global && type != recurse_copy_kept_shared_to_global)
2918     {
2919     SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_swap_global);
2920 
2921     for (i = 0; i < shared_count; i++)
2922       {
2923       SLJIT_ASSERT(shared_srcw[i] != 0);
2924 
2925       if (!from_sp)
2926         delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, shared_srcw[i]);
2927 
2928       if (from_sp || type == recurse_swap_global)
2929         delayed_mem_copy_move(&status, SLJIT_SP, shared_srcw[i], base_reg, stackptr);
2930 
2931       stackptr += sizeof(sljit_sw);
2932       }
2933     }
2934   else
2935     stackptr += sizeof(sljit_sw) * shared_count;
2936 
2937   if (type != recurse_copy_private_to_global && type != recurse_swap_global)
2938     {
2939     SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_copy_kept_shared_to_global);
2940 
2941     for (i = 0; i < kept_shared_count; i++)
2942       {
2943       SLJIT_ASSERT(kept_shared_srcw[i] != 0);
2944 
2945       if (!from_sp)
2946         delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, kept_shared_srcw[i]);
2947 
2948       if (from_sp || type == recurse_swap_global)
2949         delayed_mem_copy_move(&status, SLJIT_SP, kept_shared_srcw[i], base_reg, stackptr);
2950 
2951       stackptr += sizeof(sljit_sw);
2952       }
2953     }
2954   else
2955     stackptr += sizeof(sljit_sw) * kept_shared_count;
2956   }
2957 
2958 SLJIT_ASSERT(cc == ccend && stackptr == stacktop);
2959 
2960 delayed_mem_copy_finish(&status);
2961 }
2962 
set_then_offsets(compiler_common * common,PCRE2_SPTR cc,sljit_u8 * current_offset)2963 static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_SPTR cc, sljit_u8 *current_offset)
2964 {
2965 PCRE2_SPTR end = bracketend(cc);
2966 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
2967 
2968 /* Assert captures then. */
2969 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA)
2970   current_offset = NULL;
2971 /* Conditional block does not. */
2972 if (*cc == OP_COND || *cc == OP_SCOND)
2973   has_alternatives = FALSE;
2974 
2975 cc = next_opcode(common, cc);
2976 if (has_alternatives)
2977   current_offset = common->then_offsets + (cc - common->start);
2978 
2979 while (cc < end)
2980   {
2981   if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
2982     cc = set_then_offsets(common, cc, current_offset);
2983   else
2984     {
2985     if (*cc == OP_ALT && has_alternatives)
2986       current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2987     if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2988       *current_offset = 1;
2989     cc = next_opcode(common, cc);
2990     }
2991   }
2992 
2993 return end;
2994 }
2995 
2996 #undef CASE_ITERATOR_PRIVATE_DATA_1
2997 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2998 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2999 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
3000 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
3001 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
3002 
is_powerof2(unsigned int value)3003 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
3004 {
3005 return (value & (value - 1)) == 0;
3006 }
3007 
set_jumps(jump_list * list,struct sljit_label * label)3008 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
3009 {
3010 while (list)
3011   {
3012   /* sljit_set_label is clever enough to do nothing
3013   if either the jump or the label is NULL. */
3014   SET_LABEL(list->jump, label);
3015   list = list->next;
3016   }
3017 }
3018 
add_jump(struct sljit_compiler * compiler,jump_list ** list,struct sljit_jump * jump)3019 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
3020 {
3021 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
3022 if (list_item)
3023   {
3024   list_item->next = *list;
3025   list_item->jump = jump;
3026   *list = list_item;
3027   }
3028 }
3029 
add_stub(compiler_common * common,struct sljit_jump * start)3030 static void add_stub(compiler_common *common, struct sljit_jump *start)
3031 {
3032 DEFINE_COMPILER;
3033 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
3034 
3035 if (list_item)
3036   {
3037   list_item->start = start;
3038   list_item->quit = LABEL();
3039   list_item->next = common->stubs;
3040   common->stubs = list_item;
3041   }
3042 }
3043 
flush_stubs(compiler_common * common)3044 static void flush_stubs(compiler_common *common)
3045 {
3046 DEFINE_COMPILER;
3047 stub_list *list_item = common->stubs;
3048 
3049 while (list_item)
3050   {
3051   JUMPHERE(list_item->start);
3052   add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
3053   JUMPTO(SLJIT_JUMP, list_item->quit);
3054   list_item = list_item->next;
3055   }
3056 common->stubs = NULL;
3057 }
3058 
count_match(compiler_common * common)3059 static SLJIT_INLINE void count_match(compiler_common *common)
3060 {
3061 DEFINE_COMPILER;
3062 
3063 OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
3064 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
3065 }
3066 
allocate_stack(compiler_common * common,int size)3067 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
3068 {
3069 /* May destroy all locals and registers except TMP2. */
3070 DEFINE_COMPILER;
3071 
3072 SLJIT_ASSERT(size > 0);
3073 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
3074 #ifdef DESTROY_REGISTERS
3075 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
3076 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3077 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3078 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
3079 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
3080 #endif
3081 add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
3082 }
3083 
free_stack(compiler_common * common,int size)3084 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
3085 {
3086 DEFINE_COMPILER;
3087 
3088 SLJIT_ASSERT(size > 0);
3089 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
3090 }
3091 
allocate_read_only_data(compiler_common * common,sljit_uw size)3092 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
3093 {
3094 DEFINE_COMPILER;
3095 sljit_uw *result;
3096 
3097 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3098   return NULL;
3099 
3100 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
3101 if (SLJIT_UNLIKELY(result == NULL))
3102   {
3103   sljit_set_compiler_memory_error(compiler);
3104   return NULL;
3105   }
3106 
3107 *(void**)result = common->read_only_data_head;
3108 common->read_only_data_head = (void *)result;
3109 return result + 1;
3110 }
3111 
reset_ovector(compiler_common * common,int length)3112 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
3113 {
3114 DEFINE_COMPILER;
3115 struct sljit_label *loop;
3116 sljit_s32 i;
3117 
3118 /* At this point we can freely use all temporary registers. */
3119 SLJIT_ASSERT(length > 1);
3120 /* TMP1 returns with begin - 1. */
3121 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
3122 if (length < 8)
3123   {
3124   for (i = 1; i < length; i++)
3125     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
3126   }
3127 else
3128   {
3129   if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3130     {
3131     GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
3132     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3133     loop = LABEL();
3134     sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
3135     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3136     JUMPTO(SLJIT_NOT_ZERO, loop);
3137     }
3138   else
3139     {
3140     GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));
3141     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3142     loop = LABEL();
3143     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);
3144     OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));
3145     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3146     JUMPTO(SLJIT_NOT_ZERO, loop);
3147     }
3148   }
3149 }
3150 
reset_early_fail(compiler_common * common)3151 static SLJIT_INLINE void reset_early_fail(compiler_common *common)
3152 {
3153 DEFINE_COMPILER;
3154 sljit_u32 size = (sljit_u32)(common->early_fail_end_ptr - common->early_fail_start_ptr);
3155 sljit_u32 uncleared_size;
3156 sljit_s32 src = SLJIT_IMM;
3157 sljit_s32 i;
3158 struct sljit_label *loop;
3159 
3160 SLJIT_ASSERT(common->early_fail_start_ptr < common->early_fail_end_ptr);
3161 
3162 if (size == sizeof(sljit_sw))
3163   {
3164   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->early_fail_start_ptr, SLJIT_IMM, 0);
3165   return;
3166   }
3167 
3168 if (sljit_get_register_index(TMP3) >= 0 && !sljit_has_cpu_feature(SLJIT_HAS_ZERO_REGISTER))
3169   {
3170   OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
3171   src = TMP3;
3172   }
3173 
3174 if (size <= 6 * sizeof(sljit_sw))
3175   {
3176   for (i = common->early_fail_start_ptr; i < common->early_fail_end_ptr; i += sizeof(sljit_sw))
3177     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, src, 0);
3178   return;
3179   }
3180 
3181 GET_LOCAL_BASE(TMP1, 0, common->early_fail_start_ptr);
3182 
3183 uncleared_size = ((size / sizeof(sljit_sw)) % 3) * sizeof(sljit_sw);
3184 
3185 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, size - uncleared_size);
3186 
3187 loop = LABEL();
3188 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3189 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3190 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * (sljit_sw)sizeof(sljit_sw), src, 0);
3191 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * (sljit_sw)sizeof(sljit_sw), src, 0);
3192 CMPTO(SLJIT_LESS, TMP1, 0, TMP2, 0, loop);
3193 
3194 if (uncleared_size >= sizeof(sljit_sw))
3195   OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3196 
3197 if (uncleared_size >= 2 * sizeof(sljit_sw))
3198   OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), sizeof(sljit_sw), src, 0);
3199 }
3200 
do_reset_match(compiler_common * common,int length)3201 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
3202 {
3203 DEFINE_COMPILER;
3204 struct sljit_label *loop;
3205 int i;
3206 
3207 SLJIT_ASSERT(length > 1);
3208 /* OVECTOR(1) contains the "string begin - 1" constant. */
3209 if (length > 2)
3210   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3211 if (length < 8)
3212   {
3213   for (i = 2; i < length; i++)
3214     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
3215   }
3216 else
3217   {
3218   if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3219     {
3220     GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
3221     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3222     loop = LABEL();
3223     sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
3224     OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3225     JUMPTO(SLJIT_NOT_ZERO, loop);
3226     }
3227   else
3228     {
3229     GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));
3230     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3231     loop = LABEL();
3232     OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
3233     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));
3234     OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3235     JUMPTO(SLJIT_NOT_ZERO, loop);
3236     }
3237   }
3238 
3239 if (!HAS_VIRTUAL_REGISTERS)
3240   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, stack));
3241 else
3242   OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
3243 
3244 if (common->mark_ptr != 0)
3245   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
3246 if (common->control_head_ptr != 0)
3247   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
3248 if (HAS_VIRTUAL_REGISTERS)
3249   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
3250 
3251 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3252 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));
3253 }
3254 
do_search_mark(sljit_sw * current,PCRE2_SPTR skip_arg)3255 static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg)
3256 {
3257 while (current != NULL)
3258   {
3259   switch (current[1])
3260     {
3261     case type_then_trap:
3262     break;
3263 
3264     case type_mark:
3265     if (PRIV(strcmp)(skip_arg, (PCRE2_SPTR)current[2]) == 0)
3266       return current[3];
3267     break;
3268 
3269     default:
3270     SLJIT_UNREACHABLE();
3271     break;
3272     }
3273   SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
3274   current = (sljit_sw*)current[0];
3275   }
3276 return 0;
3277 }
3278 
copy_ovector(compiler_common * common,int topbracket)3279 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
3280 {
3281 DEFINE_COMPILER;
3282 struct sljit_label *loop;
3283 BOOL has_pre;
3284 
3285 /* At this point we can freely use all registers. */
3286 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3287 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
3288 
3289 if (HAS_VIRTUAL_REGISTERS)
3290   {
3291   OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
3292   OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3293   if (common->mark_ptr != 0)
3294     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3295   OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount));
3296   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3297   if (common->mark_ptr != 0)
3298     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
3299   OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, match_data),
3300     SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3301   }
3302 else
3303   {
3304   OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3305   OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, match_data));
3306   if (common->mark_ptr != 0)
3307     OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3308   OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, oveccount));
3309   OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3310   if (common->mark_ptr != 0)
3311     OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R0, 0);
3312   OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3313   }
3314 
3315 has_pre = sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
3316 
3317 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
3318 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
3319 
3320 loop = LABEL();
3321 
3322 if (has_pre)
3323   sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
3324 else
3325   {
3326   OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
3327   OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
3328   }
3329 
3330 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(PCRE2_SIZE));
3331 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);
3332 /* Copy the integer value to the output buffer */
3333 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3334 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
3335 #endif
3336 
3337 SLJIT_ASSERT(sizeof(PCRE2_SIZE) == 4 || sizeof(PCRE2_SIZE) == 8);
3338 OP1(((sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV), SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);
3339 
3340 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3341 JUMPTO(SLJIT_NOT_ZERO, loop);
3342 
3343 /* Calculate the return value, which is the maximum ovector value. */
3344 if (topbracket > 1)
3345   {
3346   if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))) == SLJIT_SUCCESS)
3347     {
3348     GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
3349     OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3350 
3351     /* OVECTOR(0) is never equal to SLJIT_S2. */
3352     loop = LABEL();
3353     sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
3354     OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3355     CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3356     OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3357     }
3358   else
3359     {
3360     GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));
3361     OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3362 
3363     /* OVECTOR(0) is never equal to SLJIT_S2. */
3364     loop = LABEL();
3365     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
3366     OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * (sljit_sw)sizeof(sljit_sw));
3367     OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3368     CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3369     OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3370     }
3371   }
3372 else
3373   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
3374 }
3375 
return_with_partial_match(compiler_common * common,struct sljit_label * quit)3376 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
3377 {
3378 DEFINE_COMPILER;
3379 sljit_s32 mov_opcode;
3380 sljit_s32 arguments_reg = !HAS_VIRTUAL_REGISTERS ? ARGUMENTS : SLJIT_R1;
3381 
3382 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S0, str_end_must_be_saved_reg0);
3383 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
3384   && (common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start != 0 : common->hit_start == 0));
3385 
3386 if (arguments_reg != ARGUMENTS)
3387   OP1(SLJIT_MOV, arguments_reg, 0, ARGUMENTS, 0);
3388 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP),
3389   common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start : common->start_ptr);
3390 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_PARTIAL);
3391 
3392 /* Store match begin and end. */
3393 OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, begin));
3394 OP1(SLJIT_MOV, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0);
3395 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, match_data));
3396 
3397 mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
3398 
3399 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S1, 0);
3400 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3401 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
3402 #endif
3403 OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector), SLJIT_R2, 0);
3404 
3405 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_S1, 0);
3406 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3407 OP2(SLJIT_ASHR, STR_END, 0, STR_END, 0, SLJIT_IMM, UCHAR_SHIFT);
3408 #endif
3409 OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector) + sizeof(PCRE2_SIZE), STR_END, 0);
3410 
3411 JUMPTO(SLJIT_JUMP, quit);
3412 }
3413 
check_start_used_ptr(compiler_common * common)3414 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
3415 {
3416 /* May destroy TMP1. */
3417 DEFINE_COMPILER;
3418 struct sljit_jump *jump;
3419 
3420 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3421   {
3422   /* The value of -1 must be kept for start_used_ptr! */
3423   OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
3424   /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
3425   is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
3426   jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
3427   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3428   JUMPHERE(jump);
3429   }
3430 else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3431   {
3432   jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3433   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3434   JUMPHERE(jump);
3435   }
3436 }
3437 
char_has_othercase(compiler_common * common,PCRE2_SPTR cc)3438 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR cc)
3439 {
3440 /* Detects if the character has an othercase. */
3441 unsigned int c;
3442 
3443 #ifdef SUPPORT_UNICODE
3444 if (common->utf || common->ucp)
3445   {
3446   if (common->utf)
3447     {
3448     GETCHAR(c, cc);
3449     }
3450   else
3451     c = *cc;
3452 
3453   if (c > 127)
3454     return c != UCD_OTHERCASE(c);
3455 
3456   return common->fcc[c] != c;
3457   }
3458 else
3459 #endif
3460   c = *cc;
3461 return MAX_255(c) ? common->fcc[c] != c : FALSE;
3462 }
3463 
char_othercase(compiler_common * common,unsigned int c)3464 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
3465 {
3466 /* Returns with the othercase. */
3467 #ifdef SUPPORT_UNICODE
3468 if ((common->utf || common->ucp) && c > 127)
3469   return UCD_OTHERCASE(c);
3470 #endif
3471 return TABLE_GET(c, common->fcc, c);
3472 }
3473 
char_get_othercase_bit(compiler_common * common,PCRE2_SPTR cc)3474 static unsigned int char_get_othercase_bit(compiler_common *common, PCRE2_SPTR cc)
3475 {
3476 /* Detects if the character and its othercase has only 1 bit difference. */
3477 unsigned int c, oc, bit;
3478 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3479 int n;
3480 #endif
3481 
3482 #ifdef SUPPORT_UNICODE
3483 if (common->utf || common->ucp)
3484   {
3485   if (common->utf)
3486     {
3487     GETCHAR(c, cc);
3488     }
3489   else
3490     c = *cc;
3491 
3492   if (c <= 127)
3493     oc = common->fcc[c];
3494   else
3495     oc = UCD_OTHERCASE(c);
3496   }
3497 else
3498   {
3499   c = *cc;
3500   oc = TABLE_GET(c, common->fcc, c);
3501   }
3502 #else
3503 c = *cc;
3504 oc = TABLE_GET(c, common->fcc, c);
3505 #endif
3506 
3507 SLJIT_ASSERT(c != oc);
3508 
3509 bit = c ^ oc;
3510 /* Optimized for English alphabet. */
3511 if (c <= 127 && bit == 0x20)
3512   return (0 << 8) | 0x20;
3513 
3514 /* Since c != oc, they must have at least 1 bit difference. */
3515 if (!is_powerof2(bit))
3516   return 0;
3517 
3518 #if PCRE2_CODE_UNIT_WIDTH == 8
3519 
3520 #ifdef SUPPORT_UNICODE
3521 if (common->utf && c > 127)
3522   {
3523   n = GET_EXTRALEN(*cc);
3524   while ((bit & 0x3f) == 0)
3525     {
3526     n--;
3527     bit >>= 6;
3528     }
3529   return (n << 8) | bit;
3530   }
3531 #endif /* SUPPORT_UNICODE */
3532 return (0 << 8) | bit;
3533 
3534 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3535 
3536 #ifdef SUPPORT_UNICODE
3537 if (common->utf && c > 65535)
3538   {
3539   if (bit >= (1u << 10))
3540     bit >>= 10;
3541   else
3542     return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
3543   }
3544 #endif /* SUPPORT_UNICODE */
3545 return (bit < 256) ? ((0u << 8) | bit) : ((1u << 8) | (bit >> 8));
3546 
3547 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3548 }
3549 
check_partial(compiler_common * common,BOOL force)3550 static void check_partial(compiler_common *common, BOOL force)
3551 {
3552 /* Checks whether a partial matching is occurred. Does not modify registers. */
3553 DEFINE_COMPILER;
3554 struct sljit_jump *jump = NULL;
3555 
3556 SLJIT_ASSERT(!force || common->mode != PCRE2_JIT_COMPLETE);
3557 
3558 if (common->mode == PCRE2_JIT_COMPLETE)
3559   return;
3560 
3561 if (!force && !common->allow_empty_partial)
3562   jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3563 else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3564   jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
3565 
3566 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3567   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3568 else
3569   {
3570   if (common->partialmatchlabel != NULL)
3571     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3572   else
3573     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3574   }
3575 
3576 if (jump != NULL)
3577   JUMPHERE(jump);
3578 }
3579 
check_str_end(compiler_common * common,jump_list ** end_reached)3580 static void check_str_end(compiler_common *common, jump_list **end_reached)
3581 {
3582 /* Does not affect registers. Usually used in a tight spot. */
3583 DEFINE_COMPILER;
3584 struct sljit_jump *jump;
3585 
3586 if (common->mode == PCRE2_JIT_COMPLETE)
3587   {
3588   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3589   return;
3590   }
3591 
3592 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3593 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3594   {
3595   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3596   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3597   add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
3598   }
3599 else
3600   {
3601   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3602   if (common->partialmatchlabel != NULL)
3603     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3604   else
3605     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3606   }
3607 JUMPHERE(jump);
3608 }
3609 
detect_partial_match(compiler_common * common,jump_list ** backtracks)3610 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
3611 {
3612 DEFINE_COMPILER;
3613 struct sljit_jump *jump;
3614 
3615 if (common->mode == PCRE2_JIT_COMPLETE)
3616   {
3617   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3618   return;
3619   }
3620 
3621 /* Partial matching mode. */
3622 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3623 if (!common->allow_empty_partial)
3624   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3625 else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3626   add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1));
3627 
3628 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3629   {
3630   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3631   add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3632   }
3633 else
3634   {
3635   if (common->partialmatchlabel != NULL)
3636     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3637   else
3638     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3639   }
3640 JUMPHERE(jump);
3641 }
3642 
process_partial_match(compiler_common * common)3643 static void process_partial_match(compiler_common *common)
3644 {
3645 DEFINE_COMPILER;
3646 struct sljit_jump *jump;
3647 
3648 /* Partial matching mode. */
3649 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3650   {
3651   jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3652   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3653   JUMPHERE(jump);
3654   }
3655 else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3656   {
3657   if (common->partialmatchlabel != NULL)
3658     CMPTO(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0, common->partialmatchlabel);
3659   else
3660     add_jump(compiler, &common->partialmatch, CMP(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3661   }
3662 }
3663 
detect_partial_match_to(compiler_common * common,struct sljit_label * label)3664 static void detect_partial_match_to(compiler_common *common, struct sljit_label *label)
3665 {
3666 DEFINE_COMPILER;
3667 
3668 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, label);
3669 process_partial_match(common);
3670 }
3671 
peek_char(compiler_common * common,sljit_u32 max,sljit_s32 dst,sljit_sw dstw,jump_list ** backtracks)3672 static void peek_char(compiler_common *common, sljit_u32 max, sljit_s32 dst, sljit_sw dstw, jump_list **backtracks)
3673 {
3674 /* Reads the character into TMP1, keeps STR_PTR.
3675 Does not check STR_END. TMP2, dst, RETURN_ADDR Destroyed. */
3676 DEFINE_COMPILER;
3677 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3678 struct sljit_jump *jump;
3679 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3680 
3681 SLJIT_UNUSED_ARG(max);
3682 SLJIT_UNUSED_ARG(dst);
3683 SLJIT_UNUSED_ARG(dstw);
3684 SLJIT_UNUSED_ARG(backtracks);
3685 
3686 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3687 
3688 #ifdef SUPPORT_UNICODE
3689 #if PCRE2_CODE_UNIT_WIDTH == 8
3690 if (common->utf)
3691   {
3692   if (max < 128) return;
3693 
3694   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3695   OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3696   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3697   add_jump(compiler, common->invalid_utf ? &common->utfreadchar_invalid : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3698   OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3699   if (backtracks && common->invalid_utf)
3700     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3701   JUMPHERE(jump);
3702   }
3703 #elif PCRE2_CODE_UNIT_WIDTH == 16
3704 if (common->utf)
3705   {
3706   if (max < 0xd800) return;
3707 
3708   OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3709 
3710   if (common->invalid_utf)
3711     {
3712     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3713     OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3714     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3715     add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3716     OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3717     if (backtracks && common->invalid_utf)
3718       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3719     }
3720   else
3721     {
3722     /* TMP2 contains the high surrogate. */
3723     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
3724     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3725     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3726     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
3727     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3728     }
3729 
3730   JUMPHERE(jump);
3731   }
3732 #elif PCRE2_CODE_UNIT_WIDTH == 32
3733 if (common->invalid_utf)
3734   {
3735   if (max < 0xd800) return;
3736 
3737   if (backtracks != NULL)
3738     {
3739     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3740     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3741     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
3742     }
3743   else
3744     {
3745     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3746     OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
3747     CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
3748     OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3749     CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
3750     }
3751   }
3752 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3753 #endif /* SUPPORT_UNICODE */
3754 }
3755 
peek_char_back(compiler_common * common,sljit_u32 max,jump_list ** backtracks)3756 static void peek_char_back(compiler_common *common, sljit_u32 max, jump_list **backtracks)
3757 {
3758 /* Reads one character back without moving STR_PTR. TMP2 must
3759 contain the start of the subject buffer. Affects TMP1, TMP2, and RETURN_ADDR. */
3760 DEFINE_COMPILER;
3761 
3762 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3763 struct sljit_jump *jump;
3764 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3765 
3766 SLJIT_UNUSED_ARG(max);
3767 SLJIT_UNUSED_ARG(backtracks);
3768 
3769 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3770 
3771 #ifdef SUPPORT_UNICODE
3772 #if PCRE2_CODE_UNIT_WIDTH == 8
3773 if (common->utf)
3774   {
3775   if (max < 128) return;
3776 
3777   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3778   if (common->invalid_utf)
3779     {
3780     add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3781     if (backtracks != NULL)
3782       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3783     }
3784   else
3785     add_jump(compiler, &common->utfpeakcharback, JUMP(SLJIT_FAST_CALL));
3786   JUMPHERE(jump);
3787   }
3788 #elif PCRE2_CODE_UNIT_WIDTH == 16
3789 if (common->utf)
3790   {
3791   if (max < 0xd800) return;
3792 
3793   if (common->invalid_utf)
3794     {
3795     jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3796     add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3797     if (backtracks != NULL)
3798       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3799     }
3800   else
3801     {
3802     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3803     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xdc00);
3804     /* TMP2 contains the low surrogate. */
3805     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3806     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
3807     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3808     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
3809     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3810     }
3811     JUMPHERE(jump);
3812   }
3813 #elif PCRE2_CODE_UNIT_WIDTH == 32
3814 if (common->invalid_utf)
3815   {
3816   OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3817   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3818   add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
3819   }
3820 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3821 #endif /* SUPPORT_UNICODE */
3822 }
3823 
3824 #define READ_CHAR_UPDATE_STR_PTR 0x1
3825 #define READ_CHAR_UTF8_NEWLINE 0x2
3826 #define READ_CHAR_NEWLINE (READ_CHAR_UPDATE_STR_PTR | READ_CHAR_UTF8_NEWLINE)
3827 #define READ_CHAR_VALID_UTF 0x4
3828 
read_char(compiler_common * common,sljit_u32 min,sljit_u32 max,jump_list ** backtracks,sljit_u32 options)3829 static void read_char(compiler_common *common, sljit_u32 min, sljit_u32 max,
3830   jump_list **backtracks, sljit_u32 options)
3831 {
3832 /* Reads the precise value of a character into TMP1, if the character is
3833 between min and max (c >= min && c <= max). Otherwise it returns with a value
3834 outside the range. Does not check STR_END. */
3835 DEFINE_COMPILER;
3836 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3837 struct sljit_jump *jump;
3838 #endif
3839 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3840 struct sljit_jump *jump2;
3841 #endif
3842 
3843 SLJIT_UNUSED_ARG(min);
3844 SLJIT_UNUSED_ARG(max);
3845 SLJIT_UNUSED_ARG(backtracks);
3846 SLJIT_UNUSED_ARG(options);
3847 SLJIT_ASSERT(min <= max);
3848 
3849 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3850 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3851 
3852 #ifdef SUPPORT_UNICODE
3853 #if PCRE2_CODE_UNIT_WIDTH == 8
3854 if (common->utf)
3855   {
3856   if (max < 128 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
3857 
3858   if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
3859     {
3860     jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3861 
3862     if (options & READ_CHAR_UTF8_NEWLINE)
3863       add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
3864     else
3865       add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3866 
3867     if (backtracks != NULL)
3868       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3869     JUMPHERE(jump);
3870     return;
3871     }
3872 
3873   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3874   if (min >= 0x10000)
3875     {
3876     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
3877     if (options & READ_CHAR_UPDATE_STR_PTR)
3878       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3879     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3880     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
3881     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3882     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3883     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3884     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3885     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3886     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3887     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3888     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
3889     if (!(options & READ_CHAR_UPDATE_STR_PTR))
3890       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
3891     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3892     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3893     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3894     JUMPHERE(jump2);
3895     if (options & READ_CHAR_UPDATE_STR_PTR)
3896       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3897     }
3898   else if (min >= 0x800 && max <= 0xffff)
3899     {
3900     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
3901     if (options & READ_CHAR_UPDATE_STR_PTR)
3902       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3903     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3904     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
3905     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3906     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3907     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3908     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3909     if (!(options & READ_CHAR_UPDATE_STR_PTR))
3910       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3911     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3912     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3913     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3914     JUMPHERE(jump2);
3915     if (options & READ_CHAR_UPDATE_STR_PTR)
3916       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3917     }
3918   else if (max >= 0x800)
3919     {
3920     add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3921     }
3922   else if (max < 128)
3923     {
3924     OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3925     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3926     }
3927   else
3928     {
3929     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3930     if (!(options & READ_CHAR_UPDATE_STR_PTR))
3931       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3932     else
3933       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3934     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3935     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3936     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3937     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3938     if (options & READ_CHAR_UPDATE_STR_PTR)
3939       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3940     }
3941   JUMPHERE(jump);
3942   }
3943 #elif PCRE2_CODE_UNIT_WIDTH == 16
3944 if (common->utf)
3945   {
3946   if (max < 0xd800 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
3947 
3948   if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
3949     {
3950     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3951     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3952 
3953     if (options & READ_CHAR_UTF8_NEWLINE)
3954       add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
3955     else
3956       add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3957 
3958     if (backtracks != NULL)
3959       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3960     JUMPHERE(jump);
3961     return;
3962     }
3963 
3964   if (max >= 0x10000)
3965     {
3966     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3967     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
3968     /* TMP2 contains the high surrogate. */
3969     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3970     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3971     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3972     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
3973     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3974     JUMPHERE(jump);
3975     return;
3976     }
3977 
3978   /* Skip low surrogate if necessary. */
3979   OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3980 
3981   if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
3982     {
3983     if (options & READ_CHAR_UPDATE_STR_PTR)
3984       OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3985     OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
3986     if (options & READ_CHAR_UPDATE_STR_PTR)
3987       CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0);
3988     if (max >= 0xd800)
3989       CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, 0x10000);
3990     }
3991   else
3992     {
3993     jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
3994     if (options & READ_CHAR_UPDATE_STR_PTR)
3995       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3996     if (max >= 0xd800)
3997       OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
3998     JUMPHERE(jump);
3999     }
4000   }
4001 #elif PCRE2_CODE_UNIT_WIDTH == 32
4002 if (common->invalid_utf)
4003   {
4004   if (backtracks != NULL)
4005     {
4006     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4007     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4008     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
4009     }
4010   else
4011     {
4012     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4013     OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
4014     CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4015     OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4016     CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4017     }
4018   }
4019 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4020 #endif /* SUPPORT_UNICODE */
4021 }
4022 
4023 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4024 
is_char7_bitset(const sljit_u8 * bitset,BOOL nclass)4025 static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
4026 {
4027 /* Tells whether the character codes below 128 are enough
4028 to determine a match. */
4029 const sljit_u8 value = nclass ? 0xff : 0;
4030 const sljit_u8 *end = bitset + 32;
4031 
4032 bitset += 16;
4033 do
4034   {
4035   if (*bitset++ != value)
4036     return FALSE;
4037   }
4038 while (bitset < end);
4039 return TRUE;
4040 }
4041 
read_char7_type(compiler_common * common,jump_list ** backtracks,BOOL negated)4042 static void read_char7_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4043 {
4044 /* Reads the precise character type of a character into TMP1, if the character
4045 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
4046 full_read argument tells whether characters above max are accepted or not. */
4047 DEFINE_COMPILER;
4048 struct sljit_jump *jump;
4049 
4050 SLJIT_ASSERT(common->utf);
4051 
4052 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4053 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4054 
4055 /* All values > 127 are zero in ctypes. */
4056 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4057 
4058 if (negated)
4059   {
4060   jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4061 
4062   if (common->invalid_utf)
4063     {
4064     add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4065     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4066     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4067     }
4068   else
4069     {
4070     OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4071     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4072     }
4073   JUMPHERE(jump);
4074   }
4075 }
4076 
4077 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4078 
read_char8_type(compiler_common * common,jump_list ** backtracks,BOOL negated)4079 static void read_char8_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4080 {
4081 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
4082 DEFINE_COMPILER;
4083 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
4084 struct sljit_jump *jump;
4085 #endif
4086 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4087 struct sljit_jump *jump2;
4088 #endif
4089 
4090 SLJIT_UNUSED_ARG(backtracks);
4091 SLJIT_UNUSED_ARG(negated);
4092 
4093 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4094 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4095 
4096 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4097 if (common->utf)
4098   {
4099   /* The result of this read may be unused, but saves an "else" part. */
4100   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4101   jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4102 
4103   if (!negated)
4104     {
4105     if (common->invalid_utf)
4106       add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4107 
4108     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4109     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4110     OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4111     if (common->invalid_utf)
4112       add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe0 - 0xc2));
4113 
4114     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4115     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4116     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4117     if (common->invalid_utf)
4118       add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40));
4119 
4120     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4121     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4122     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4123     JUMPHERE(jump2);
4124     }
4125   else if (common->invalid_utf)
4126     {
4127     add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4128     OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
4129     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4130 
4131     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4132     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4133     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4134     JUMPHERE(jump2);
4135     }
4136   else
4137     add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
4138 
4139   JUMPHERE(jump);
4140   return;
4141   }
4142 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4143 
4144 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32
4145 if (common->invalid_utf && negated)
4146   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x110000));
4147 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32 */
4148 
4149 #if PCRE2_CODE_UNIT_WIDTH != 8
4150 /* The ctypes array contains only 256 values. */
4151 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4152 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4153 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4154 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4155 #if PCRE2_CODE_UNIT_WIDTH != 8
4156 JUMPHERE(jump);
4157 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4158 
4159 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
4160 if (common->utf && negated)
4161   {
4162   /* Skip low surrogate if necessary. */
4163   if (!common->invalid_utf)
4164     {
4165     OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4166 
4167     if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4168       {
4169       OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4170       OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
4171       CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0);
4172       }
4173     else
4174       {
4175       jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4176       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4177       JUMPHERE(jump);
4178       }
4179     return;
4180     }
4181 
4182   OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4183   jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4184   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4185   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4186 
4187   OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4188   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4189   OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
4190   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4191 
4192   JUMPHERE(jump);
4193   return;
4194   }
4195 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 */
4196 }
4197 
move_back(compiler_common * common,jump_list ** backtracks,BOOL must_be_valid)4198 static void move_back(compiler_common *common, jump_list **backtracks, BOOL must_be_valid)
4199 {
4200 /* Goes one character back. Affects STR_PTR and TMP1. If must_be_valid is TRUE,
4201 TMP2 is not used. Otherwise TMP2 must contain the start of the subject buffer,
4202 and it is destroyed. Does not modify STR_PTR for invalid character sequences. */
4203 DEFINE_COMPILER;
4204 
4205 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4206 struct sljit_jump *jump;
4207 #endif
4208 
4209 #ifdef SUPPORT_UNICODE
4210 #if PCRE2_CODE_UNIT_WIDTH == 8
4211 struct sljit_label *label;
4212 
4213 if (common->utf)
4214   {
4215   if (!must_be_valid && common->invalid_utf)
4216     {
4217     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4218     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4219     jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4220     add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4221     if (backtracks != NULL)
4222       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4223     JUMPHERE(jump);
4224     return;
4225     }
4226 
4227   label = LABEL();
4228   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4229   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4230   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4231   CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
4232   return;
4233   }
4234 #elif PCRE2_CODE_UNIT_WIDTH == 16
4235 if (common->utf)
4236   {
4237   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4238   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4239 
4240   if (!must_be_valid && common->invalid_utf)
4241     {
4242     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4243     jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0xd800);
4244     add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4245     if (backtracks != NULL)
4246       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4247     JUMPHERE(jump);
4248     return;
4249     }
4250 
4251   /* Skip low surrogate if necessary. */
4252   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4253   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
4254   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4255   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4256   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4257   return;
4258   }
4259 #elif PCRE2_CODE_UNIT_WIDTH == 32
4260 if (common->invalid_utf && !must_be_valid)
4261   {
4262   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4263   if (backtracks != NULL)
4264     {
4265     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4266     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4267     return;
4268     }
4269 
4270   OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
4271   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
4272   OP2(SLJIT_SHL,  TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4273   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4274   return;
4275   }
4276 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4277 #endif /* SUPPORT_UNICODE */
4278 
4279 SLJIT_UNUSED_ARG(backtracks);
4280 SLJIT_UNUSED_ARG(must_be_valid);
4281 
4282 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4283 }
4284 
check_newlinechar(compiler_common * common,int nltype,jump_list ** backtracks,BOOL jumpifmatch)4285 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
4286 {
4287 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
4288 DEFINE_COMPILER;
4289 struct sljit_jump *jump;
4290 
4291 if (nltype == NLTYPE_ANY)
4292   {
4293   add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4294   sljit_set_current_flags(compiler, SLJIT_SET_Z);
4295   add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
4296   }
4297 else if (nltype == NLTYPE_ANYCRLF)
4298   {
4299   if (jumpifmatch)
4300     {
4301     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
4302     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4303     }
4304   else
4305     {
4306     jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4307     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4308     JUMPHERE(jump);
4309     }
4310   }
4311 else
4312   {
4313   SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
4314   add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4315   }
4316 }
4317 
4318 #ifdef SUPPORT_UNICODE
4319 
4320 #if PCRE2_CODE_UNIT_WIDTH == 8
do_utfreadchar(compiler_common * common)4321 static void do_utfreadchar(compiler_common *common)
4322 {
4323 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
4324 of the character (>= 0xc0). Return char value in TMP1. */
4325 DEFINE_COMPILER;
4326 struct sljit_jump *jump;
4327 
4328 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4329 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4330 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4331 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4332 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4333 
4334 /* Searching for the first zero. */
4335 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4336 jump = JUMP(SLJIT_NOT_ZERO);
4337 /* Two byte sequence. */
4338 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3000);
4339 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4340 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4341 
4342 JUMPHERE(jump);
4343 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4344 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4345 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4346 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4347 
4348 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4349 jump = JUMP(SLJIT_NOT_ZERO);
4350 /* Three byte sequence. */
4351 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0000);
4352 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4353 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4354 
4355 /* Four byte sequence. */
4356 JUMPHERE(jump);
4357 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4358 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0000);
4359 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4360 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4361 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4362 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4363 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4364 }
4365 
do_utfreadtype8(compiler_common * common)4366 static void do_utfreadtype8(compiler_common *common)
4367 {
4368 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
4369 of the character (>= 0xc0). Return value in TMP1. */
4370 DEFINE_COMPILER;
4371 struct sljit_jump *jump;
4372 struct sljit_jump *compare;
4373 
4374 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4375 
4376 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
4377 jump = JUMP(SLJIT_NOT_ZERO);
4378 /* Two byte sequence. */
4379 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4380 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4381 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
4382 /* The upper 5 bits are known at this point. */
4383 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
4384 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4385 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4386 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
4387 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4388 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4389 
4390 JUMPHERE(compare);
4391 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4392 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4393 
4394 /* We only have types for characters less than 256. */
4395 JUMPHERE(jump);
4396 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4397 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4398 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4399 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4400 }
4401 
do_utfreadchar_invalid(compiler_common * common)4402 static void do_utfreadchar_invalid(compiler_common *common)
4403 {
4404 /* Slow decoding a UTF-8 character. TMP1 contains the first byte
4405 of the character (>= 0xc0). Return char value in TMP1. STR_PTR is
4406 undefined for invalid characters. */
4407 DEFINE_COMPILER;
4408 sljit_s32 i;
4409 sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4410 struct sljit_jump *jump;
4411 struct sljit_jump *buffer_end_close;
4412 struct sljit_label *three_byte_entry;
4413 struct sljit_label *exit_invalid_label;
4414 struct sljit_jump *exit_invalid[11];
4415 
4416 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4417 
4418 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc2);
4419 
4420 /* Usually more than 3 characters remained in the subject buffer. */
4421 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4422 
4423 /* Not a valid start of a multi-byte sequence, no more bytes read. */
4424 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xf5 - 0xc2);
4425 
4426 buffer_end_close = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4427 
4428 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4429 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4430 /* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4431 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4432 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4433 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4434 
4435 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4436 jump = JUMP(SLJIT_NOT_ZERO);
4437 
4438 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4439 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4440 
4441 JUMPHERE(jump);
4442 
4443 /* Three-byte sequence. */
4444 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4445 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4446 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4447 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4448 if (has_cmov)
4449   {
4450   OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
4451   CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0x20000);
4452   exit_invalid[2] = NULL;
4453   }
4454 else
4455   exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4456 
4457 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4458 jump = JUMP(SLJIT_NOT_ZERO);
4459 
4460 three_byte_entry = LABEL();
4461 
4462 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2d800);
4463 if (has_cmov)
4464   {
4465   OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4466   CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0xd800);
4467   exit_invalid[3] = NULL;
4468   }
4469 else
4470   exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4471 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4472 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4473 
4474 if (has_cmov)
4475   {
4476   OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4477   CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4478   exit_invalid[4] = NULL;
4479   }
4480 else
4481   exit_invalid[4] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4482 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4483 
4484 JUMPHERE(jump);
4485 
4486 /* Four-byte sequence. */
4487 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4488 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4489 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4490 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4491 if (has_cmov)
4492   {
4493   OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
4494   CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0);
4495   exit_invalid[5] = NULL;
4496   }
4497 else
4498   exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4499 
4500 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc10000);
4501 if (has_cmov)
4502   {
4503   OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x100000);
4504   CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000);
4505   exit_invalid[6] = NULL;
4506   }
4507 else
4508   exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4509 
4510 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4511 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4512 
4513 JUMPHERE(buffer_end_close);
4514 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4515 exit_invalid[7] = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4516 
4517 /* Two-byte sequence. */
4518 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4519 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4520 /* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4521 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4522 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4523 exit_invalid[8] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4524 
4525 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4526 jump = JUMP(SLJIT_NOT_ZERO);
4527 
4528 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4529 
4530 /* Three-byte sequence. */
4531 JUMPHERE(jump);
4532 exit_invalid[9] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4533 
4534 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4535 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4536 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4537 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4538 if (has_cmov)
4539   {
4540   OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
4541   CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4542   exit_invalid[10] = NULL;
4543   }
4544 else
4545   exit_invalid[10] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4546 
4547 /* One will be substracted from STR_PTR later. */
4548 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4549 
4550 /* Four byte sequences are not possible. */
4551 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x30000, three_byte_entry);
4552 
4553 exit_invalid_label = LABEL();
4554 for (i = 0; i < 11; i++)
4555   sljit_set_label(exit_invalid[i], exit_invalid_label);
4556 
4557 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4558 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4559 }
4560 
do_utfreadnewline_invalid(compiler_common * common)4561 static void do_utfreadnewline_invalid(compiler_common *common)
4562 {
4563 /* Slow decoding a UTF-8 character, specialized for newlines.
4564 TMP1 contains the first byte of the character (>= 0xc0). Return
4565 char value in TMP1. */
4566 DEFINE_COMPILER;
4567 struct sljit_label *loop;
4568 struct sljit_label *skip_start;
4569 struct sljit_label *three_byte_exit;
4570 struct sljit_jump *jump[5];
4571 
4572 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4573 
4574 if (common->nltype != NLTYPE_ANY)
4575   {
4576   SLJIT_ASSERT(common->nltype != NLTYPE_FIXED || common->newline < 128);
4577 
4578   /* All newlines are ascii, just skip intermediate octets. */
4579   jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4580   loop = LABEL();
4581   if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)) == SLJIT_SUCCESS)
4582     sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4583   else
4584     {
4585     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4586     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4587     }
4588 
4589   OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4590   CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4591   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4592 
4593   JUMPHERE(jump[0]);
4594 
4595   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4596   OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4597   return;
4598   }
4599 
4600 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4601 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4602 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4603 
4604 jump[1] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xc2);
4605 jump[2] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xe2);
4606 
4607 skip_start = LABEL();
4608 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4609 jump[3] = CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80);
4610 
4611 /* Skip intermediate octets. */
4612 loop = LABEL();
4613 jump[4] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4614 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4615 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4616 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4617 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4618 
4619 JUMPHERE(jump[3]);
4620 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4621 
4622 three_byte_exit = LABEL();
4623 JUMPHERE(jump[0]);
4624 JUMPHERE(jump[4]);
4625 
4626 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4627 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4628 
4629 /* Two byte long newline: 0x85. */
4630 JUMPHERE(jump[1]);
4631 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x85, skip_start);
4632 
4633 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x85);
4634 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4635 
4636 /* Three byte long newlines: 0x2028 and 0x2029. */
4637 JUMPHERE(jump[2]);
4638 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, skip_start);
4639 CMPTO(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0, three_byte_exit);
4640 
4641 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4642 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4643 
4644 OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 0x80);
4645 CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40, skip_start);
4646 
4647 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0x2000);
4648 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4649 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4650 }
4651 
do_utfmoveback_invalid(compiler_common * common)4652 static void do_utfmoveback_invalid(compiler_common *common)
4653 {
4654 /* Goes one character back. */
4655 DEFINE_COMPILER;
4656 sljit_s32 i;
4657 struct sljit_jump *jump;
4658 struct sljit_jump *buffer_start_close;
4659 struct sljit_label *exit_ok_label;
4660 struct sljit_label *exit_invalid_label;
4661 struct sljit_jump *exit_invalid[7];
4662 
4663 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4664 
4665 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4666 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
4667 
4668 /* Two-byte sequence. */
4669 buffer_start_close = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4670 
4671 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4672 
4673 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4674 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x20);
4675 
4676 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4677 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4678 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4679 
4680 /* Three-byte sequence. */
4681 JUMPHERE(jump);
4682 exit_invalid[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4683 
4684 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4685 
4686 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4687 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x10);
4688 
4689 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4690 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4691 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4692 
4693 /* Four-byte sequence. */
4694 JUMPHERE(jump);
4695 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4696 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40);
4697 
4698 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4699 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0);
4700 exit_invalid[3] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x05);
4701 
4702 exit_ok_label = LABEL();
4703 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4704 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4705 
4706 /* Two-byte sequence. */
4707 JUMPHERE(buffer_start_close);
4708 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4709 
4710 exit_invalid[4] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4711 
4712 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4713 
4714 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4715 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20, exit_ok_label);
4716 
4717 /* Three-byte sequence. */
4718 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4719 exit_invalid[5] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4720 exit_invalid[6] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4721 
4722 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4723 
4724 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4725 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10, exit_ok_label);
4726 
4727 /* Four-byte sequences are not possible. */
4728 
4729 exit_invalid_label = LABEL();
4730 sljit_set_label(exit_invalid[5], exit_invalid_label);
4731 sljit_set_label(exit_invalid[6], exit_invalid_label);
4732 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4733 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4734 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4735 
4736 JUMPHERE(exit_invalid[4]);
4737 /* -2 + 4 = 2 */
4738 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4739 
4740 exit_invalid_label = LABEL();
4741 for (i = 0; i < 4; i++)
4742   sljit_set_label(exit_invalid[i], exit_invalid_label);
4743 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4744 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(4));
4745 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4746 }
4747 
do_utfpeakcharback(compiler_common * common)4748 static void do_utfpeakcharback(compiler_common *common)
4749 {
4750 /* Peak a character back. Does not modify STR_PTR. */
4751 DEFINE_COMPILER;
4752 struct sljit_jump *jump[2];
4753 
4754 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4755 
4756 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4757 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4758 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20);
4759 
4760 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4761 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4762 jump[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10);
4763 
4764 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
4765 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4766 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
4767 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4768 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4769 
4770 JUMPHERE(jump[1]);
4771 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4772 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4773 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4774 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4775 
4776 JUMPHERE(jump[0]);
4777 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4778 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4779 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4780 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4781 
4782 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4783 }
4784 
do_utfpeakcharback_invalid(compiler_common * common)4785 static void do_utfpeakcharback_invalid(compiler_common *common)
4786 {
4787 /* Peak a character back. Does not modify STR_PTR. */
4788 DEFINE_COMPILER;
4789 sljit_s32 i;
4790 sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4791 struct sljit_jump *jump[2];
4792 struct sljit_label *two_byte_entry;
4793 struct sljit_label *three_byte_entry;
4794 struct sljit_label *exit_invalid_label;
4795 struct sljit_jump *exit_invalid[8];
4796 
4797 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4798 
4799 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
4800 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
4801 jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4802 
4803 /* Two-byte sequence. */
4804 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4805 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4806 jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x1e);
4807 
4808 two_byte_entry = LABEL();
4809 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4810 /* If TMP1 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4811 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4812 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4813 
4814 JUMPHERE(jump[1]);
4815 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
4816 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4817 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4818 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4819 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4820 
4821 /* Three-byte sequence. */
4822 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4823 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
4824 jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x10);
4825 
4826 three_byte_entry = LABEL();
4827 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
4828 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4829 
4830 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4831 if (has_cmov)
4832   {
4833   OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4834   CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, -0xd800);
4835   exit_invalid[2] = NULL;
4836   }
4837 else
4838   exit_invalid[2] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4839 
4840 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4841 if (has_cmov)
4842   {
4843   OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4844   CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4845   exit_invalid[3] = NULL;
4846   }
4847 else
4848   exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4849 
4850 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4851 
4852 JUMPHERE(jump[1]);
4853 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0 - 0x80);
4854 exit_invalid[4] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4855 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
4856 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4857 
4858 /* Four-byte sequence. */
4859 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
4860 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4861 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
4862 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18);
4863 /* ADD is used instead of OR because of the SUB 0x10000 above. */
4864 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4865 
4866 if (has_cmov)
4867   {
4868   OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x100000);
4869   CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000);
4870   exit_invalid[5] = NULL;
4871   }
4872 else
4873   exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4874 
4875 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4876 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4877 
4878 JUMPHERE(jump[0]);
4879 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4880 jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4881 
4882 /* Two-byte sequence. */
4883 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4884 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4885 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
4886 
4887 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
4888 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4889 exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4890 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4891 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4892 
4893 /* Three-byte sequence. */
4894 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4895 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
4896 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x10, three_byte_entry);
4897 
4898 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4899 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4900 
4901 JUMPHERE(jump[0]);
4902 exit_invalid[7] = CMP(SLJIT_GREATER, TMP2, 0, STR_PTR, 0);
4903 
4904 /* Two-byte sequence. */
4905 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4906 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4907 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
4908 
4909 exit_invalid_label = LABEL();
4910 for (i = 0; i < 8; i++)
4911   sljit_set_label(exit_invalid[i], exit_invalid_label);
4912 
4913 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4914 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4915 }
4916 
4917 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
4918 
4919 #if PCRE2_CODE_UNIT_WIDTH == 16
4920 
do_utfreadchar_invalid(compiler_common * common)4921 static void do_utfreadchar_invalid(compiler_common *common)
4922 {
4923 /* Slow decoding a UTF-16 character. TMP1 contains the first half
4924 of the character (>= 0xd800). Return char value in TMP1. STR_PTR is
4925 undefined for invalid characters. */
4926 DEFINE_COMPILER;
4927 struct sljit_jump *exit_invalid[3];
4928 
4929 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4930 
4931 /* TMP2 contains the high surrogate. */
4932 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
4933 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4934 
4935 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4936 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
4937 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4938 
4939 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
4940 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
4941 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
4942 
4943 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4944 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4945 
4946 JUMPHERE(exit_invalid[0]);
4947 JUMPHERE(exit_invalid[1]);
4948 JUMPHERE(exit_invalid[2]);
4949 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4950 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4951 }
4952 
do_utfreadnewline_invalid(compiler_common * common)4953 static void do_utfreadnewline_invalid(compiler_common *common)
4954 {
4955 /* Slow decoding a UTF-16 character, specialized for newlines.
4956 TMP1 contains the first half of the character (>= 0xd800). Return
4957 char value in TMP1. */
4958 
4959 DEFINE_COMPILER;
4960 struct sljit_jump *exit_invalid[2];
4961 
4962 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4963 
4964 /* TMP2 contains the high surrogate. */
4965 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4966 
4967 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4968 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
4969 
4970 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
4971 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
4972 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
4973 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
4974 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
4975 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4976 
4977 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4978 
4979 JUMPHERE(exit_invalid[0]);
4980 JUMPHERE(exit_invalid[1]);
4981 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4982 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4983 }
4984 
do_utfmoveback_invalid(compiler_common * common)4985 static void do_utfmoveback_invalid(compiler_common *common)
4986 {
4987 /* Goes one character back. */
4988 DEFINE_COMPILER;
4989 struct sljit_jump *exit_invalid[3];
4990 
4991 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4992 
4993 exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400);
4994 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4995 
4996 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4997 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4998 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
4999 
5000 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5001 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
5002 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5003 
5004 JUMPHERE(exit_invalid[0]);
5005 JUMPHERE(exit_invalid[1]);
5006 JUMPHERE(exit_invalid[2]);
5007 
5008 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5009 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
5010 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5011 }
5012 
do_utfpeakcharback_invalid(compiler_common * common)5013 static void do_utfpeakcharback_invalid(compiler_common *common)
5014 {
5015 /* Peak a character back. Does not modify STR_PTR. */
5016 DEFINE_COMPILER;
5017 struct sljit_jump *jump;
5018 struct sljit_jump *exit_invalid[3];
5019 
5020 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5021 
5022 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000);
5023 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5024 exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
5025 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5026 
5027 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5028 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
5029 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
5030 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
5031 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5032 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5033 
5034 JUMPHERE(jump);
5035 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5036 
5037 JUMPHERE(exit_invalid[0]);
5038 JUMPHERE(exit_invalid[1]);
5039 JUMPHERE(exit_invalid[2]);
5040 
5041 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5042 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5043 }
5044 
5045 #endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
5046 
5047 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
5048 #define UCD_BLOCK_MASK 127
5049 #define UCD_BLOCK_SHIFT 7
5050 
do_getucd(compiler_common * common)5051 static void do_getucd(compiler_common *common)
5052 {
5053 /* Search the UCD record for the character comes in TMP1.
5054 Returns chartype in TMP1 and UCD offset in TMP2. */
5055 DEFINE_COMPILER;
5056 #if PCRE2_CODE_UNIT_WIDTH == 32
5057 struct sljit_jump *jump;
5058 #endif
5059 
5060 #if defined SLJIT_DEBUG && SLJIT_DEBUG
5061 /* dummy_ucd_record */
5062 const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5063 SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5064 SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5065 #endif
5066 
5067 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5068 
5069 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5070 
5071 #if PCRE2_CODE_UNIT_WIDTH == 32
5072 if (!common->utf)
5073   {
5074   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5075   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5076   JUMPHERE(jump);
5077   }
5078 #endif
5079 
5080 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5081 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5082 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5083 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5084 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5085 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5086 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5087 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5088 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5089 }
5090 
do_getucdtype(compiler_common * common)5091 static void do_getucdtype(compiler_common *common)
5092 {
5093 /* Search the UCD record for the character comes in TMP1.
5094 Returns chartype in TMP1 and UCD offset in TMP2. */
5095 DEFINE_COMPILER;
5096 #if PCRE2_CODE_UNIT_WIDTH == 32
5097 struct sljit_jump *jump;
5098 #endif
5099 
5100 #if defined SLJIT_DEBUG && SLJIT_DEBUG
5101 /* dummy_ucd_record */
5102 const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5103 SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5104 SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5105 #endif
5106 
5107 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5108 
5109 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5110 
5111 #if PCRE2_CODE_UNIT_WIDTH == 32
5112 if (!common->utf)
5113   {
5114   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5115   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5116   JUMPHERE(jump);
5117   }
5118 #endif
5119 
5120 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5121 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5122 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5123 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5124 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5125 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5126 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5127 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5128 
5129 /* TMP2 is multiplied by 12. Same as (TMP2 << 2) + ((TMP2 << 2) << 1). */
5130 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5131 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
5132 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5133 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 1);
5134 
5135 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5136 }
5137 
5138 #endif /* SUPPORT_UNICODE */
5139 
mainloop_entry(compiler_common * common)5140 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common)
5141 {
5142 DEFINE_COMPILER;
5143 struct sljit_label *mainloop;
5144 struct sljit_label *newlinelabel = NULL;
5145 struct sljit_jump *start;
5146 struct sljit_jump *end = NULL;
5147 struct sljit_jump *end2 = NULL;
5148 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5149 struct sljit_label *loop;
5150 struct sljit_jump *jump;
5151 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5152 jump_list *newline = NULL;
5153 sljit_u32 overall_options = common->re->overall_options;
5154 BOOL hascrorlf = (common->re->flags & PCRE2_HASCRORLF) != 0;
5155 BOOL newlinecheck = FALSE;
5156 BOOL readuchar = FALSE;
5157 
5158 if (!(hascrorlf || (overall_options & PCRE2_FIRSTLINE) != 0)
5159     && (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
5160   newlinecheck = TRUE;
5161 
5162 SLJIT_ASSERT(common->abort_label == NULL);
5163 
5164 if ((overall_options & PCRE2_FIRSTLINE) != 0)
5165   {
5166   /* Search for the end of the first line. */
5167   SLJIT_ASSERT(common->match_end_ptr != 0);
5168   OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5169 
5170   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5171     {
5172     mainloop = LABEL();
5173     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5174     end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5175     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5176     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5177     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
5178     CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
5179     JUMPHERE(end);
5180     OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5181     }
5182   else
5183     {
5184     end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5185     mainloop = LABEL();
5186     /* Continual stores does not cause data dependency. */
5187     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5188     read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
5189     check_newlinechar(common, common->nltype, &newline, TRUE);
5190     CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
5191     JUMPHERE(end);
5192     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5193     set_jumps(newline, LABEL());
5194     }
5195 
5196   OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5197   }
5198 else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0)
5199   {
5200   /* Check whether offset limit is set and valid. */
5201   SLJIT_ASSERT(common->match_end_ptr != 0);
5202 
5203   if (HAS_VIRTUAL_REGISTERS)
5204     {
5205     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5206     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5207     }
5208   else
5209     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5210 
5211   OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5212   end = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET);
5213   if (HAS_VIRTUAL_REGISTERS)
5214     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5215   else
5216     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
5217 
5218 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5219   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5220 #endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5221   if (HAS_VIRTUAL_REGISTERS)
5222     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5223 
5224   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
5225   end2 = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5226   OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5227   JUMPHERE(end2);
5228   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
5229   add_jump(compiler, &common->abort, CMP(SLJIT_LESS, TMP2, 0, STR_PTR, 0));
5230   JUMPHERE(end);
5231   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, TMP2, 0);
5232   }
5233 
5234 start = JUMP(SLJIT_JUMP);
5235 
5236 if (newlinecheck)
5237   {
5238   newlinelabel = LABEL();
5239   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5240   end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5241   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5242   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
5243   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
5244 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5245   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5246 #endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5247   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5248   end2 = JUMP(SLJIT_JUMP);
5249   }
5250 
5251 mainloop = LABEL();
5252 
5253 /* Increasing the STR_PTR here requires one less jump in the most common case. */
5254 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5255 if (common->utf && !common->invalid_utf) readuchar = TRUE;
5256 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5257 if (newlinecheck) readuchar = TRUE;
5258 
5259 if (readuchar)
5260   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5261 
5262 if (newlinecheck)
5263   CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
5264 
5265 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5266 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5267 #if PCRE2_CODE_UNIT_WIDTH == 8
5268 if (common->invalid_utf)
5269   {
5270   /* Skip continuation code units. */
5271   loop = LABEL();
5272   jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5273   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5274   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5275   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5276   CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x40, loop);
5277   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5278   JUMPHERE(jump);
5279   }
5280 else if (common->utf)
5281   {
5282   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5283   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5284   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5285   JUMPHERE(jump);
5286   }
5287 #elif PCRE2_CODE_UNIT_WIDTH == 16
5288 if (common->invalid_utf)
5289   {
5290   /* Skip continuation code units. */
5291   loop = LABEL();
5292   jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5293   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5294   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5295   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5296   CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400, loop);
5297   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5298   JUMPHERE(jump);
5299   }
5300 else if (common->utf)
5301   {
5302   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5303 
5304   if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
5305     {
5306     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5307     OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
5308     CMOV(SLJIT_LESS, STR_PTR, TMP2, 0);
5309     }
5310   else
5311     {
5312     OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
5313     OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
5314     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5315     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5316     }
5317   }
5318 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
5319 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5320 JUMPHERE(start);
5321 
5322 if (newlinecheck)
5323   {
5324   JUMPHERE(end);
5325   JUMPHERE(end2);
5326   }
5327 
5328 return mainloop;
5329 }
5330 
5331 
add_prefix_char(PCRE2_UCHAR chr,fast_forward_char_data * chars,BOOL last)5332 static SLJIT_INLINE void add_prefix_char(PCRE2_UCHAR chr, fast_forward_char_data *chars, BOOL last)
5333 {
5334 sljit_u32 i, count = chars->count;
5335 
5336 if (count == 255)
5337   return;
5338 
5339 if (count == 0)
5340   {
5341   chars->count = 1;
5342   chars->chars[0] = chr;
5343 
5344   if (last)
5345     chars->last_count = 1;
5346   return;
5347   }
5348 
5349 for (i = 0; i < count; i++)
5350   if (chars->chars[i] == chr)
5351     return;
5352 
5353 if (count >= MAX_DIFF_CHARS)
5354   {
5355   chars->count = 255;
5356   return;
5357   }
5358 
5359 chars->chars[count] = chr;
5360 chars->count = count + 1;
5361 
5362 if (last)
5363   chars->last_count++;
5364 }
5365 
scan_prefix(compiler_common * common,PCRE2_SPTR cc,fast_forward_char_data * chars,int max_chars,sljit_u32 * rec_count)5366 static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars, int max_chars, sljit_u32 *rec_count)
5367 {
5368 /* Recursive function, which scans prefix literals. */
5369 BOOL last, any, class, caseless;
5370 int len, repeat, len_save, consumed = 0;
5371 sljit_u32 chr; /* Any unicode character. */
5372 sljit_u8 *bytes, *bytes_end, byte;
5373 PCRE2_SPTR alternative, cc_save, oc;
5374 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5375 PCRE2_UCHAR othercase[4];
5376 #elif defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
5377 PCRE2_UCHAR othercase[2];
5378 #else
5379 PCRE2_UCHAR othercase[1];
5380 #endif
5381 
5382 repeat = 1;
5383 while (TRUE)
5384   {
5385   if (*rec_count == 0)
5386     return 0;
5387   (*rec_count)--;
5388 
5389   last = TRUE;
5390   any = FALSE;
5391   class = FALSE;
5392   caseless = FALSE;
5393 
5394   switch (*cc)
5395     {
5396     case OP_CHARI:
5397     caseless = TRUE;
5398     /* Fall through */
5399     case OP_CHAR:
5400     last = FALSE;
5401     cc++;
5402     break;
5403 
5404     case OP_SOD:
5405     case OP_SOM:
5406     case OP_SET_SOM:
5407     case OP_NOT_WORD_BOUNDARY:
5408     case OP_WORD_BOUNDARY:
5409     case OP_EODN:
5410     case OP_EOD:
5411     case OP_CIRC:
5412     case OP_CIRCM:
5413     case OP_DOLL:
5414     case OP_DOLLM:
5415     /* Zero width assertions. */
5416     cc++;
5417     continue;
5418 
5419     case OP_ASSERT:
5420     case OP_ASSERT_NOT:
5421     case OP_ASSERTBACK:
5422     case OP_ASSERTBACK_NOT:
5423     case OP_ASSERT_NA:
5424     case OP_ASSERTBACK_NA:
5425     cc = bracketend(cc);
5426     continue;
5427 
5428     case OP_PLUSI:
5429     case OP_MINPLUSI:
5430     case OP_POSPLUSI:
5431     caseless = TRUE;
5432     /* Fall through */
5433     case OP_PLUS:
5434     case OP_MINPLUS:
5435     case OP_POSPLUS:
5436     cc++;
5437     break;
5438 
5439     case OP_EXACTI:
5440     caseless = TRUE;
5441     /* Fall through */
5442     case OP_EXACT:
5443     repeat = GET2(cc, 1);
5444     last = FALSE;
5445     cc += 1 + IMM2_SIZE;
5446     break;
5447 
5448     case OP_QUERYI:
5449     case OP_MINQUERYI:
5450     case OP_POSQUERYI:
5451     caseless = TRUE;
5452     /* Fall through */
5453     case OP_QUERY:
5454     case OP_MINQUERY:
5455     case OP_POSQUERY:
5456     len = 1;
5457     cc++;
5458 #ifdef SUPPORT_UNICODE
5459     if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5460 #endif
5461     max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
5462     if (max_chars == 0)
5463       return consumed;
5464     last = FALSE;
5465     break;
5466 
5467     case OP_KET:
5468     cc += 1 + LINK_SIZE;
5469     continue;
5470 
5471     case OP_ALT:
5472     cc += GET(cc, 1);
5473     continue;
5474 
5475     case OP_ONCE:
5476     case OP_BRA:
5477     case OP_BRAPOS:
5478     case OP_CBRA:
5479     case OP_CBRAPOS:
5480     alternative = cc + GET(cc, 1);
5481     while (*alternative == OP_ALT)
5482       {
5483       max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
5484       if (max_chars == 0)
5485         return consumed;
5486       alternative += GET(alternative, 1);
5487       }
5488 
5489     if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
5490       cc += IMM2_SIZE;
5491     cc += 1 + LINK_SIZE;
5492     continue;
5493 
5494     case OP_CLASS:
5495 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5496     if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
5497       return consumed;
5498 #endif
5499     class = TRUE;
5500     break;
5501 
5502     case OP_NCLASS:
5503 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5504     if (common->utf) return consumed;
5505 #endif
5506     class = TRUE;
5507     break;
5508 
5509 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
5510     case OP_XCLASS:
5511 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5512     if (common->utf) return consumed;
5513 #endif
5514     any = TRUE;
5515     cc += GET(cc, 1);
5516     break;
5517 #endif
5518 
5519     case OP_DIGIT:
5520 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5521     if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
5522       return consumed;
5523 #endif
5524     any = TRUE;
5525     cc++;
5526     break;
5527 
5528     case OP_WHITESPACE:
5529 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5530     if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
5531       return consumed;
5532 #endif
5533     any = TRUE;
5534     cc++;
5535     break;
5536 
5537     case OP_WORDCHAR:
5538 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5539     if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
5540       return consumed;
5541 #endif
5542     any = TRUE;
5543     cc++;
5544     break;
5545 
5546     case OP_NOT:
5547     case OP_NOTI:
5548     cc++;
5549     /* Fall through. */
5550     case OP_NOT_DIGIT:
5551     case OP_NOT_WHITESPACE:
5552     case OP_NOT_WORDCHAR:
5553     case OP_ANY:
5554     case OP_ALLANY:
5555 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5556     if (common->utf) return consumed;
5557 #endif
5558     any = TRUE;
5559     cc++;
5560     break;
5561 
5562 #ifdef SUPPORT_UNICODE
5563     case OP_NOTPROP:
5564     case OP_PROP:
5565 #if PCRE2_CODE_UNIT_WIDTH != 32
5566     if (common->utf) return consumed;
5567 #endif
5568     any = TRUE;
5569     cc += 1 + 2;
5570     break;
5571 #endif
5572 
5573     case OP_TYPEEXACT:
5574     repeat = GET2(cc, 1);
5575     cc += 1 + IMM2_SIZE;
5576     continue;
5577 
5578     case OP_NOTEXACT:
5579     case OP_NOTEXACTI:
5580 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5581     if (common->utf) return consumed;
5582 #endif
5583     any = TRUE;
5584     repeat = GET2(cc, 1);
5585     cc += 1 + IMM2_SIZE + 1;
5586     break;
5587 
5588     default:
5589     return consumed;
5590     }
5591 
5592   if (any)
5593     {
5594     do
5595       {
5596       chars->count = 255;
5597 
5598       consumed++;
5599       if (--max_chars == 0)
5600         return consumed;
5601       chars++;
5602       }
5603     while (--repeat > 0);
5604 
5605     repeat = 1;
5606     continue;
5607     }
5608 
5609   if (class)
5610     {
5611     bytes = (sljit_u8*) (cc + 1);
5612     cc += 1 + 32 / sizeof(PCRE2_UCHAR);
5613 
5614     switch (*cc)
5615       {
5616       case OP_CRSTAR:
5617       case OP_CRMINSTAR:
5618       case OP_CRPOSSTAR:
5619       case OP_CRQUERY:
5620       case OP_CRMINQUERY:
5621       case OP_CRPOSQUERY:
5622       max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
5623       if (max_chars == 0)
5624         return consumed;
5625       break;
5626 
5627       default:
5628       case OP_CRPLUS:
5629       case OP_CRMINPLUS:
5630       case OP_CRPOSPLUS:
5631       break;
5632 
5633       case OP_CRRANGE:
5634       case OP_CRMINRANGE:
5635       case OP_CRPOSRANGE:
5636       repeat = GET2(cc, 1);
5637       if (repeat <= 0)
5638         return consumed;
5639       break;
5640       }
5641 
5642     do
5643       {
5644       if (bytes[31] & 0x80)
5645         chars->count = 255;
5646       else if (chars->count != 255)
5647         {
5648         bytes_end = bytes + 32;
5649         chr = 0;
5650         do
5651           {
5652           byte = *bytes++;
5653           SLJIT_ASSERT((chr & 0x7) == 0);
5654           if (byte == 0)
5655             chr += 8;
5656           else
5657             {
5658             do
5659               {
5660               if ((byte & 0x1) != 0)
5661                 add_prefix_char(chr, chars, TRUE);
5662               byte >>= 1;
5663               chr++;
5664               }
5665             while (byte != 0);
5666             chr = (chr + 7) & ~7;
5667             }
5668           }
5669         while (chars->count != 255 && bytes < bytes_end);
5670         bytes = bytes_end - 32;
5671         }
5672 
5673       consumed++;
5674       if (--max_chars == 0)
5675         return consumed;
5676       chars++;
5677       }
5678     while (--repeat > 0);
5679 
5680     switch (*cc)
5681       {
5682       case OP_CRSTAR:
5683       case OP_CRMINSTAR:
5684       case OP_CRPOSSTAR:
5685       return consumed;
5686 
5687       case OP_CRQUERY:
5688       case OP_CRMINQUERY:
5689       case OP_CRPOSQUERY:
5690       cc++;
5691       break;
5692 
5693       case OP_CRRANGE:
5694       case OP_CRMINRANGE:
5695       case OP_CRPOSRANGE:
5696       if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
5697         return consumed;
5698       cc += 1 + 2 * IMM2_SIZE;
5699       break;
5700       }
5701 
5702     repeat = 1;
5703     continue;
5704     }
5705 
5706   len = 1;
5707 #ifdef SUPPORT_UNICODE
5708   if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5709 #endif
5710 
5711   if (caseless && char_has_othercase(common, cc))
5712     {
5713 #ifdef SUPPORT_UNICODE
5714     if (common->utf)
5715       {
5716       GETCHAR(chr, cc);
5717       if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
5718         return consumed;
5719       }
5720     else
5721 #endif
5722       {
5723       chr = *cc;
5724 #ifdef SUPPORT_UNICODE
5725       if (common->ucp && chr > 127)
5726         othercase[0] = UCD_OTHERCASE(chr);
5727       else
5728 #endif
5729         othercase[0] = TABLE_GET(chr, common->fcc, chr);
5730       }
5731     }
5732   else
5733     {
5734     caseless = FALSE;
5735     othercase[0] = 0; /* Stops compiler warning - PH */
5736     }
5737 
5738   len_save = len;
5739   cc_save = cc;
5740   while (TRUE)
5741     {
5742     oc = othercase;
5743     do
5744       {
5745       len--;
5746       consumed++;
5747 
5748       chr = *cc;
5749       add_prefix_char(*cc, chars, len == 0);
5750 
5751       if (caseless)
5752         add_prefix_char(*oc, chars, len == 0);
5753 
5754       if (--max_chars == 0)
5755         return consumed;
5756       chars++;
5757       cc++;
5758       oc++;
5759       }
5760     while (len > 0);
5761 
5762     if (--repeat == 0)
5763       break;
5764 
5765     len = len_save;
5766     cc = cc_save;
5767     }
5768 
5769   repeat = 1;
5770   if (last)
5771     return consumed;
5772   }
5773 }
5774 
5775 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
jumpto_if_not_utf_char_start(struct sljit_compiler * compiler,sljit_s32 reg,struct sljit_label * label)5776 static void jumpto_if_not_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg, struct sljit_label *label)
5777 {
5778 #if PCRE2_CODE_UNIT_WIDTH == 8
5779 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
5780 CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0x80, label);
5781 #elif PCRE2_CODE_UNIT_WIDTH == 16
5782 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
5783 CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00, label);
5784 #else
5785 #error "Unknown code width"
5786 #endif
5787 }
5788 #endif
5789 
5790 #include "pcre2_jit_simd_inc.h"
5791 
5792 #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
5793 
check_fast_forward_char_pair_simd(compiler_common * common,fast_forward_char_data * chars,int max)5794 static BOOL check_fast_forward_char_pair_simd(compiler_common *common, fast_forward_char_data *chars, int max)
5795 {
5796   sljit_s32 i, j, max_i = 0, max_j = 0;
5797   sljit_u32 max_pri = 0;
5798   PCRE2_UCHAR a1, a2, a_pri, b1, b2, b_pri;
5799 
5800   for (i = max - 1; i >= 1; i--)
5801     {
5802     if (chars[i].last_count > 2)
5803       {
5804       a1 = chars[i].chars[0];
5805       a2 = chars[i].chars[1];
5806       a_pri = chars[i].last_count;
5807 
5808       j = i - max_fast_forward_char_pair_offset();
5809       if (j < 0)
5810         j = 0;
5811 
5812       while (j < i)
5813         {
5814         b_pri = chars[j].last_count;
5815         if (b_pri > 2 && a_pri + b_pri >= max_pri)
5816           {
5817           b1 = chars[j].chars[0];
5818           b2 = chars[j].chars[1];
5819 
5820           if (a1 != b1 && a1 != b2 && a2 != b1 && a2 != b2)
5821             {
5822             max_pri = a_pri + b_pri;
5823             max_i = i;
5824             max_j = j;
5825             }
5826           }
5827         j++;
5828         }
5829       }
5830     }
5831 
5832 if (max_pri == 0)
5833   return FALSE;
5834 
5835 fast_forward_char_pair_simd(common, max_i, chars[max_i].chars[0], chars[max_i].chars[1], max_j, chars[max_j].chars[0], chars[max_j].chars[1]);
5836 return TRUE;
5837 }
5838 
5839 #endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
5840 
fast_forward_first_char2(compiler_common * common,PCRE2_UCHAR char1,PCRE2_UCHAR char2,sljit_s32 offset)5841 static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
5842 {
5843 DEFINE_COMPILER;
5844 struct sljit_label *start;
5845 struct sljit_jump *match;
5846 struct sljit_jump *partial_quit;
5847 PCRE2_UCHAR mask;
5848 BOOL has_match_end = (common->match_end_ptr != 0);
5849 
5850 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);
5851 
5852 if (has_match_end)
5853   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
5854 
5855 if (offset > 0)
5856   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
5857 
5858 if (has_match_end)
5859   {
5860   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
5861 
5862   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
5863   OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
5864   CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
5865   }
5866 
5867 #ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
5868 
5869 if (JIT_HAS_FAST_FORWARD_CHAR_SIMD)
5870   {
5871   fast_forward_char_simd(common, char1, char2, offset);
5872 
5873   if (offset > 0)
5874     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
5875 
5876   if (has_match_end)
5877     OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
5878   return;
5879   }
5880 
5881 #endif
5882 
5883 start = LABEL();
5884 
5885 partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5886 if (common->mode == PCRE2_JIT_COMPLETE)
5887   add_jump(compiler, &common->failed_match, partial_quit);
5888 
5889 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5890 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5891 
5892 if (char1 == char2)
5893   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1, start);
5894 else
5895   {
5896   mask = char1 ^ char2;
5897   if (is_powerof2(mask))
5898     {
5899     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
5900     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask, start);
5901     }
5902   else
5903     {
5904     match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
5905     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char2, start);
5906     JUMPHERE(match);
5907     }
5908   }
5909 
5910 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5911 if (common->utf && offset > 0)
5912   {
5913   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-(offset + 1)));
5914   jumpto_if_not_utf_char_start(compiler, TMP1, start);
5915   }
5916 #endif
5917 
5918 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
5919 
5920 if (common->mode != PCRE2_JIT_COMPLETE)
5921   JUMPHERE(partial_quit);
5922 
5923 if (has_match_end)
5924   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
5925 }
5926 
fast_forward_first_n_chars(compiler_common * common)5927 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
5928 {
5929 DEFINE_COMPILER;
5930 struct sljit_label *start;
5931 struct sljit_jump *match;
5932 fast_forward_char_data chars[MAX_N_CHARS];
5933 sljit_s32 offset;
5934 PCRE2_UCHAR mask;
5935 PCRE2_UCHAR *char_set, *char_set_end;
5936 int i, max, from;
5937 int range_right = -1, range_len;
5938 sljit_u8 *update_table = NULL;
5939 BOOL in_range;
5940 sljit_u32 rec_count;
5941 
5942 for (i = 0; i < MAX_N_CHARS; i++)
5943   {
5944   chars[i].count = 0;
5945   chars[i].last_count = 0;
5946   }
5947 
5948 rec_count = 10000;
5949 max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
5950 
5951 if (max < 1)
5952   return FALSE;
5953 
5954 /* Convert last_count to priority. */
5955 for (i = 0; i < max; i++)
5956   {
5957   SLJIT_ASSERT(chars[i].count > 0 && chars[i].last_count <= chars[i].count);
5958 
5959   if (chars[i].count == 1)
5960     {
5961     chars[i].last_count = (chars[i].last_count == 1) ? 7 : 5;
5962     /* Simplifies algorithms later. */
5963     chars[i].chars[1] = chars[i].chars[0];
5964     }
5965   else if (chars[i].count == 2)
5966     {
5967     SLJIT_ASSERT(chars[i].chars[0] != chars[i].chars[1]);
5968 
5969     if (is_powerof2(chars[i].chars[0] ^ chars[i].chars[1]))
5970       chars[i].last_count = (chars[i].last_count == 2) ? 6 : 4;
5971     else
5972       chars[i].last_count = (chars[i].last_count == 2) ? 3 : 2;
5973     }
5974   else
5975     chars[i].last_count = (chars[i].count == 255) ? 0 : 1;
5976   }
5977 
5978 #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
5979 if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && check_fast_forward_char_pair_simd(common, chars, max))
5980   return TRUE;
5981 #endif
5982 
5983 in_range = FALSE;
5984 /* Prevent compiler "uninitialized" warning */
5985 from = 0;
5986 range_len = 4 /* minimum length */ - 1;
5987 for (i = 0; i <= max; i++)
5988   {
5989   if (in_range && (i - from) > range_len && (chars[i - 1].count < 255))
5990     {
5991     range_len = i - from;
5992     range_right = i - 1;
5993     }
5994 
5995   if (i < max && chars[i].count < 255)
5996     {
5997     SLJIT_ASSERT(chars[i].count > 0);
5998     if (!in_range)
5999       {
6000       in_range = TRUE;
6001       from = i;
6002       }
6003     }
6004   else
6005     in_range = FALSE;
6006   }
6007 
6008 if (range_right >= 0)
6009   {
6010   update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
6011   if (update_table == NULL)
6012     return TRUE;
6013   memset(update_table, IN_UCHARS(range_len), 256);
6014 
6015   for (i = 0; i < range_len; i++)
6016     {
6017     SLJIT_ASSERT(chars[range_right - i].count > 0 && chars[range_right - i].count < 255);
6018 
6019     char_set = chars[range_right - i].chars;
6020     char_set_end = char_set + chars[range_right - i].count;
6021     do
6022       {
6023       if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
6024         update_table[(*char_set) & 0xff] = IN_UCHARS(i);
6025       char_set++;
6026       }
6027     while (char_set < char_set_end);
6028     }
6029   }
6030 
6031 offset = -1;
6032 /* Scan forward. */
6033 for (i = 0; i < max; i++)
6034   {
6035   if (range_right == i)
6036     continue;
6037 
6038   if (offset == -1)
6039     {
6040     if (chars[i].last_count >= 2)
6041       offset = i;
6042     }
6043   else if (chars[offset].last_count < chars[i].last_count)
6044     offset = i;
6045   }
6046 
6047 SLJIT_ASSERT(offset == -1 || (chars[offset].count >= 1 && chars[offset].count <= 2));
6048 
6049 if (range_right < 0)
6050   {
6051   if (offset < 0)
6052     return FALSE;
6053   /* Works regardless the value is 1 or 2. */
6054   fast_forward_first_char2(common, chars[offset].chars[0], chars[offset].chars[1], offset);
6055   return TRUE;
6056   }
6057 
6058 SLJIT_ASSERT(range_right != offset);
6059 
6060 if (common->match_end_ptr != 0)
6061   {
6062   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6063   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6064   OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6065   add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6066   OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
6067   CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
6068   }
6069 else
6070   {
6071   OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6072   add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6073   }
6074 
6075 SLJIT_ASSERT(range_right >= 0);
6076 
6077 if (!HAS_VIRTUAL_REGISTERS)
6078   OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
6079 
6080 start = LABEL();
6081 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6082 
6083 #if PCRE2_CODE_UNIT_WIDTH == 8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
6084 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
6085 #else
6086 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
6087 #endif
6088 
6089 if (!HAS_VIRTUAL_REGISTERS)
6090   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
6091 else
6092   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
6093 
6094 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6095 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
6096 
6097 if (offset >= 0)
6098   {
6099   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
6100   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6101 
6102   if (chars[offset].count == 1)
6103     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0], start);
6104   else
6105     {
6106     mask = chars[offset].chars[0] ^ chars[offset].chars[1];
6107     if (is_powerof2(mask))
6108       {
6109       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
6110       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0] | mask, start);
6111       }
6112     else
6113       {
6114       match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0]);
6115       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[1], start);
6116       JUMPHERE(match);
6117       }
6118     }
6119   }
6120 
6121 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6122 if (common->utf && offset != 0)
6123   {
6124   if (offset < 0)
6125     {
6126     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6127     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6128     }
6129   else
6130     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6131 
6132   jumpto_if_not_utf_char_start(compiler, TMP1, start);
6133 
6134   if (offset < 0)
6135     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6136   }
6137 #endif
6138 
6139 if (offset >= 0)
6140   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6141 
6142 if (common->match_end_ptr != 0)
6143   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6144 else
6145   OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6146 return TRUE;
6147 }
6148 
fast_forward_first_char(compiler_common * common)6149 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common)
6150 {
6151 PCRE2_UCHAR first_char = (PCRE2_UCHAR)(common->re->first_codeunit);
6152 PCRE2_UCHAR oc;
6153 
6154 oc = first_char;
6155 if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0)
6156   {
6157   oc = TABLE_GET(first_char, common->fcc, first_char);
6158 #if defined SUPPORT_UNICODE
6159   if (first_char > 127 && (common->utf || common->ucp))
6160     oc = UCD_OTHERCASE(first_char);
6161 #endif
6162   }
6163 
6164 fast_forward_first_char2(common, first_char, oc, 0);
6165 }
6166 
fast_forward_newline(compiler_common * common)6167 static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
6168 {
6169 DEFINE_COMPILER;
6170 struct sljit_label *loop;
6171 struct sljit_jump *lastchar = NULL;
6172 struct sljit_jump *firstchar;
6173 struct sljit_jump *quit = NULL;
6174 struct sljit_jump *foundcr = NULL;
6175 struct sljit_jump *notfoundnl;
6176 jump_list *newline = NULL;
6177 
6178 if (common->match_end_ptr != 0)
6179   {
6180   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6181   OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6182   }
6183 
6184 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6185   {
6186 #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6187   if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && common->mode == PCRE2_JIT_COMPLETE)
6188     {
6189     if (HAS_VIRTUAL_REGISTERS)
6190       {
6191       OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6192       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6193       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6194       }
6195     else
6196       {
6197       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6198       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6199       }
6200     firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6201 
6202     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6203     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
6204     OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_NOT_EQUAL);
6205 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6206     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6207 #endif
6208     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6209 
6210     fast_forward_char_pair_simd(common, 1, common->newline & 0xff, common->newline & 0xff, 0, (common->newline >> 8) & 0xff, (common->newline >> 8) & 0xff);
6211     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6212     }
6213   else
6214 #endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
6215     {
6216     lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6217     if (HAS_VIRTUAL_REGISTERS)
6218       {
6219       OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6220       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6221       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6222       }
6223     else
6224       {
6225       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6226       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6227       }
6228     firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6229 
6230     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
6231     OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
6232     OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
6233 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6234     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
6235 #endif
6236     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6237 
6238     loop = LABEL();
6239     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6240     quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6241     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6242     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6243     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
6244     CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
6245 
6246     JUMPHERE(quit);
6247     JUMPHERE(lastchar);
6248     }
6249 
6250   JUMPHERE(firstchar);
6251 
6252   if (common->match_end_ptr != 0)
6253     OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6254   return;
6255   }
6256 
6257 if (HAS_VIRTUAL_REGISTERS)
6258   {
6259   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6260   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6261   }
6262 else
6263   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6264 
6265 /* Example: match /^/ to \r\n from offset 1. */
6266 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6267 
6268 if (common->nltype == NLTYPE_ANY)
6269   move_back(common, NULL, FALSE);
6270 else
6271   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6272 
6273 loop = LABEL();
6274 common->ff_newline_shortcut = loop;
6275 
6276 #ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
6277 if (JIT_HAS_FAST_FORWARD_CHAR_SIMD && (common->nltype == NLTYPE_FIXED || common->nltype == NLTYPE_ANYCRLF))
6278   {
6279   if (common->nltype == NLTYPE_ANYCRLF)
6280     {
6281     fast_forward_char_simd(common, CHAR_CR, CHAR_LF, 0);
6282     if (common->mode != PCRE2_JIT_COMPLETE)
6283       lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6284 
6285     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6286     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6287     quit = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6288     }
6289    else
6290     {
6291     fast_forward_char_simd(common, common->newline, common->newline, 0);
6292 
6293     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6294     if (common->mode != PCRE2_JIT_COMPLETE)
6295       {
6296       OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
6297       CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
6298       }
6299     }
6300   }
6301 else
6302 #endif /* JIT_HAS_FAST_FORWARD_CHAR_SIMD */
6303   {
6304   read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
6305   lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6306   if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6307     foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6308   check_newlinechar(common, common->nltype, &newline, FALSE);
6309   set_jumps(newline, loop);
6310   }
6311 
6312 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6313   {
6314   if (quit == NULL)
6315     {
6316     quit = JUMP(SLJIT_JUMP);
6317     JUMPHERE(foundcr);
6318     }
6319 
6320   notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6321   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6322   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
6323   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
6324 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6325   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6326 #endif
6327   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6328   JUMPHERE(notfoundnl);
6329   JUMPHERE(quit);
6330   }
6331 
6332 if (lastchar)
6333   JUMPHERE(lastchar);
6334 JUMPHERE(firstchar);
6335 
6336 if (common->match_end_ptr != 0)
6337   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6338 }
6339 
6340 static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
6341 
fast_forward_start_bits(compiler_common * common)6342 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common)
6343 {
6344 DEFINE_COMPILER;
6345 const sljit_u8 *start_bits = common->re->start_bitmap;
6346 struct sljit_label *start;
6347 struct sljit_jump *partial_quit;
6348 #if PCRE2_CODE_UNIT_WIDTH != 8
6349 struct sljit_jump *found = NULL;
6350 #endif
6351 jump_list *matches = NULL;
6352 
6353 if (common->match_end_ptr != 0)
6354   {
6355   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6356   OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
6357   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6358   OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
6359   CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
6360   }
6361 
6362 start = LABEL();
6363 
6364 partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6365 if (common->mode == PCRE2_JIT_COMPLETE)
6366   add_jump(compiler, &common->failed_match, partial_quit);
6367 
6368 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6369 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6370 
6371 if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &matches))
6372   {
6373 #if PCRE2_CODE_UNIT_WIDTH != 8
6374   if ((start_bits[31] & 0x80) != 0)
6375     found = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255);
6376   else
6377     CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255, start);
6378 #elif defined SUPPORT_UNICODE
6379   if (common->utf && is_char7_bitset(start_bits, FALSE))
6380     CMPTO(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 127, start);
6381 #endif
6382   OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6383   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6384   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
6385   if (!HAS_VIRTUAL_REGISTERS)
6386     {
6387     OP2(SLJIT_SHL, TMP3, 0, SLJIT_IMM, 1, TMP2, 0);
6388     OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP3, 0);
6389     }
6390   else
6391     {
6392     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6393     OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6394     }
6395   JUMPTO(SLJIT_ZERO, start);
6396   }
6397 else
6398   set_jumps(matches, start);
6399 
6400 #if PCRE2_CODE_UNIT_WIDTH != 8
6401 if (found != NULL)
6402   JUMPHERE(found);
6403 #endif
6404 
6405 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6406 
6407 if (common->mode != PCRE2_JIT_COMPLETE)
6408   JUMPHERE(partial_quit);
6409 
6410 if (common->match_end_ptr != 0)
6411   OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
6412 }
6413 
search_requested_char(compiler_common * common,PCRE2_UCHAR req_char,BOOL caseless,BOOL has_firstchar)6414 static SLJIT_INLINE jump_list *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar)
6415 {
6416 DEFINE_COMPILER;
6417 struct sljit_label *loop;
6418 struct sljit_jump *toolong;
6419 struct sljit_jump *already_found;
6420 struct sljit_jump *found;
6421 struct sljit_jump *found_oc = NULL;
6422 jump_list *not_found = NULL;
6423 sljit_u32 oc, bit;
6424 
6425 SLJIT_ASSERT(common->req_char_ptr != 0);
6426 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(REQ_CU_MAX) * 100);
6427 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
6428 toolong = CMP(SLJIT_LESS, TMP2, 0, STR_END, 0);
6429 already_found = CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0);
6430 
6431 if (has_firstchar)
6432   OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6433 else
6434   OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
6435 
6436 oc = req_char;
6437 if (caseless)
6438   {
6439   oc = TABLE_GET(req_char, common->fcc, req_char);
6440 #if defined SUPPORT_UNICODE
6441   if (req_char > 127 && (common->utf || common->ucp))
6442     oc = UCD_OTHERCASE(req_char);
6443 #endif
6444   }
6445 
6446 #ifdef JIT_HAS_FAST_REQUESTED_CHAR_SIMD
6447 if (JIT_HAS_FAST_REQUESTED_CHAR_SIMD)
6448   {
6449   not_found = fast_requested_char_simd(common, req_char, oc);
6450   }
6451 else
6452 #endif
6453   {
6454   loop = LABEL();
6455   add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
6456 
6457   OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
6458 
6459   if (req_char == oc)
6460     found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6461   else
6462     {
6463     bit = req_char ^ oc;
6464     if (is_powerof2(bit))
6465       {
6466        OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
6467       found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
6468       }
6469     else
6470       {
6471       found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6472       found_oc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
6473       }
6474     }
6475   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6476   JUMPTO(SLJIT_JUMP, loop);
6477 
6478   JUMPHERE(found);
6479   if (found_oc)
6480     JUMPHERE(found_oc);
6481   }
6482 
6483 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
6484 
6485 JUMPHERE(already_found);
6486 JUMPHERE(toolong);
6487 return not_found;
6488 }
6489 
do_revertframes(compiler_common * common)6490 static void do_revertframes(compiler_common *common)
6491 {
6492 DEFINE_COMPILER;
6493 struct sljit_jump *jump;
6494 struct sljit_label *mainloop;
6495 
6496 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
6497 GET_LOCAL_BASE(TMP1, 0, 0);
6498 
6499 /* Drop frames until we reach STACK_TOP. */
6500 mainloop = LABEL();
6501 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -sizeof(sljit_sw));
6502 jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6503 
6504 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6505 if (HAS_VIRTUAL_REGISTERS)
6506   {
6507   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6508   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
6509   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
6510   }
6511 else
6512   {
6513   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6514   OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
6515   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
6516   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
6517   GET_LOCAL_BASE(TMP1, 0, 0);
6518   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0);
6519   }
6520 JUMPTO(SLJIT_JUMP, mainloop);
6521 
6522 JUMPHERE(jump);
6523 jump = CMP(SLJIT_NOT_ZERO /* SIG_LESS */, TMP2, 0, SLJIT_IMM, 0);
6524 /* End of reverting values. */
6525 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
6526 
6527 JUMPHERE(jump);
6528 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
6529 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6530 if (HAS_VIRTUAL_REGISTERS)
6531   {
6532   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6533   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
6534   }
6535 else
6536   {
6537   OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6538   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
6539   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0);
6540   }
6541 JUMPTO(SLJIT_JUMP, mainloop);
6542 }
6543 
check_wordboundary(compiler_common * common)6544 static void check_wordboundary(compiler_common *common)
6545 {
6546 DEFINE_COMPILER;
6547 struct sljit_jump *skipread;
6548 jump_list *skipread_list = NULL;
6549 #ifdef SUPPORT_UNICODE
6550 struct sljit_label *valid_utf;
6551 jump_list *invalid_utf1 = NULL;
6552 #endif /* SUPPORT_UNICODE */
6553 jump_list *invalid_utf2 = NULL;
6554 #if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE
6555 struct sljit_jump *jump;
6556 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 || SUPPORT_UNICODE */
6557 
6558 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
6559 
6560 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6561 /* Get type of the previous char, and put it to TMP3. */
6562 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6563 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6564 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
6565 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6566 
6567 #ifdef SUPPORT_UNICODE
6568 if (common->invalid_utf)
6569   {
6570   peek_char_back(common, READ_CHAR_MAX, &invalid_utf1);
6571 
6572   if (common->mode != PCRE2_JIT_COMPLETE)
6573     {
6574     OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
6575     OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
6576     move_back(common, NULL, TRUE);
6577     check_start_used_ptr(common);
6578     OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
6579     OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
6580     }
6581   }
6582 else
6583 #endif /* SUPPORT_UNICODE */
6584   {
6585   if (common->mode == PCRE2_JIT_COMPLETE)
6586     peek_char_back(common, READ_CHAR_MAX, NULL);
6587   else
6588     {
6589     move_back(common, NULL, TRUE);
6590     check_start_used_ptr(common);
6591     read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR);
6592     }
6593   }
6594 
6595 /* Testing char type. */
6596 #ifdef SUPPORT_UNICODE
6597 if (common->ucp)
6598   {
6599   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
6600   jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
6601   add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6602   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
6603   OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
6604   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6605   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
6606   OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
6607   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6608   JUMPHERE(jump);
6609   OP1(SLJIT_MOV, TMP3, 0, TMP2, 0);
6610   }
6611 else
6612 #endif /* SUPPORT_UNICODE */
6613   {
6614 #if PCRE2_CODE_UNIT_WIDTH != 8
6615   jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6616 #elif defined SUPPORT_UNICODE
6617   /* Here TMP3 has already been zeroed. */
6618   jump = NULL;
6619   if (common->utf)
6620     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6621 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6622   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
6623   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
6624   OP2(SLJIT_AND, TMP3, 0, TMP1, 0, SLJIT_IMM, 1);
6625 #if PCRE2_CODE_UNIT_WIDTH != 8
6626   JUMPHERE(jump);
6627 #elif defined SUPPORT_UNICODE
6628   if (jump != NULL)
6629     JUMPHERE(jump);
6630 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6631   }
6632 JUMPHERE(skipread);
6633 
6634 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6635 check_str_end(common, &skipread_list);
6636 peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf2);
6637 
6638 /* Testing char type. This is a code duplication. */
6639 #ifdef SUPPORT_UNICODE
6640 
6641 valid_utf = LABEL();
6642 
6643 if (common->ucp)
6644   {
6645   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
6646   jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
6647   add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6648   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
6649   OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
6650   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6651   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
6652   OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
6653   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6654   JUMPHERE(jump);
6655   }
6656 else
6657 #endif /* SUPPORT_UNICODE */
6658   {
6659 #if PCRE2_CODE_UNIT_WIDTH != 8
6660   /* TMP2 may be destroyed by peek_char. */
6661   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6662   jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6663 #elif defined SUPPORT_UNICODE
6664   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6665   jump = NULL;
6666   if (common->utf)
6667     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6668 #endif
6669   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
6670   OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
6671   OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
6672 #if PCRE2_CODE_UNIT_WIDTH != 8
6673   JUMPHERE(jump);
6674 #elif defined SUPPORT_UNICODE
6675   if (jump != NULL)
6676     JUMPHERE(jump);
6677 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6678   }
6679 set_jumps(skipread_list, LABEL());
6680 
6681 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6682 OP2(SLJIT_XOR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, TMP3, 0);
6683 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6684 
6685 #ifdef SUPPORT_UNICODE
6686 if (common->invalid_utf)
6687   {
6688   set_jumps(invalid_utf1, LABEL());
6689 
6690   peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, NULL);
6691   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR, valid_utf);
6692 
6693   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6694   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, -1);
6695   OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6696 
6697   set_jumps(invalid_utf2, LABEL());
6698   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6699   OP1(SLJIT_MOV, TMP2, 0, TMP3, 0);
6700   OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6701   }
6702 #endif /* SUPPORT_UNICODE */
6703 }
6704 
optimize_class_ranges(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)6705 static BOOL optimize_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6706 {
6707 /* May destroy TMP1. */
6708 DEFINE_COMPILER;
6709 int ranges[MAX_CLASS_RANGE_SIZE];
6710 sljit_u8 bit, cbit, all;
6711 int i, byte, length = 0;
6712 
6713 bit = bits[0] & 0x1;
6714 /* All bits will be zero or one (since bit is zero or one). */
6715 all = -bit;
6716 
6717 for (i = 0; i < 256; )
6718   {
6719   byte = i >> 3;
6720   if ((i & 0x7) == 0 && bits[byte] == all)
6721     i += 8;
6722   else
6723     {
6724     cbit = (bits[byte] >> (i & 0x7)) & 0x1;
6725     if (cbit != bit)
6726       {
6727       if (length >= MAX_CLASS_RANGE_SIZE)
6728         return FALSE;
6729       ranges[length] = i;
6730       length++;
6731       bit = cbit;
6732       all = -cbit;
6733       }
6734     i++;
6735     }
6736   }
6737 
6738 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
6739   {
6740   if (length >= MAX_CLASS_RANGE_SIZE)
6741     return FALSE;
6742   ranges[length] = 256;
6743   length++;
6744   }
6745 
6746 if (length < 0 || length > 4)
6747   return FALSE;
6748 
6749 bit = bits[0] & 0x1;
6750 if (invert) bit ^= 0x1;
6751 
6752 /* No character is accepted. */
6753 if (length == 0 && bit == 0)
6754   add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6755 
6756 switch(length)
6757   {
6758   case 0:
6759   /* When bit != 0, all characters are accepted. */
6760   return TRUE;
6761 
6762   case 1:
6763   add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6764   return TRUE;
6765 
6766   case 2:
6767   if (ranges[0] + 1 != ranges[1])
6768     {
6769     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6770     add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6771     }
6772   else
6773     add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6774   return TRUE;
6775 
6776   case 3:
6777   if (bit != 0)
6778     {
6779     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
6780     if (ranges[0] + 1 != ranges[1])
6781       {
6782       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6783       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6784       }
6785     else
6786       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6787     return TRUE;
6788     }
6789 
6790   add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
6791   if (ranges[1] + 1 != ranges[2])
6792     {
6793     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
6794     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
6795     }
6796   else
6797     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
6798   return TRUE;
6799 
6800   case 4:
6801   if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
6802       && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
6803       && (ranges[1] & (ranges[2] - ranges[0])) == 0
6804       && is_powerof2(ranges[2] - ranges[0]))
6805     {
6806     SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
6807     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
6808     if (ranges[2] + 1 != ranges[3])
6809       {
6810       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
6811       add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
6812       }
6813     else
6814       add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
6815     return TRUE;
6816     }
6817 
6818   if (bit != 0)
6819     {
6820     i = 0;
6821     if (ranges[0] + 1 != ranges[1])
6822       {
6823       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6824       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6825       i = ranges[0];
6826       }
6827     else
6828       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6829 
6830     if (ranges[2] + 1 != ranges[3])
6831       {
6832       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
6833       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
6834       }
6835     else
6836       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
6837     return TRUE;
6838     }
6839 
6840   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6841   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
6842   if (ranges[1] + 1 != ranges[2])
6843     {
6844     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
6845     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
6846     }
6847   else
6848     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6849   return TRUE;
6850 
6851   default:
6852   SLJIT_UNREACHABLE();
6853   return FALSE;
6854   }
6855 }
6856 
optimize_class_chars(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)6857 static BOOL optimize_class_chars(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6858 {
6859 /* May destroy TMP1. */
6860 DEFINE_COMPILER;
6861 uint16_t char_list[MAX_CLASS_CHARS_SIZE];
6862 uint8_t byte;
6863 sljit_s32 type;
6864 int i, j, k, len, c;
6865 
6866 if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
6867   return FALSE;
6868 
6869 len = 0;
6870 
6871 for (i = 0; i < 32; i++)
6872   {
6873   byte = bits[i];
6874 
6875   if (nclass)
6876     byte = ~byte;
6877 
6878   j = 0;
6879   while (byte != 0)
6880     {
6881     if (byte & 0x1)
6882       {
6883       c = i * 8 + j;
6884 
6885       k = len;
6886 
6887       if ((c & 0x20) != 0)
6888         {
6889         for (k = 0; k < len; k++)
6890           if (char_list[k] == c - 0x20)
6891             {
6892             char_list[k] |= 0x120;
6893             break;
6894             }
6895         }
6896 
6897       if (k == len)
6898         {
6899         if (len >= MAX_CLASS_CHARS_SIZE)
6900           return FALSE;
6901 
6902         char_list[len++] = (uint16_t) c;
6903         }
6904       }
6905 
6906     byte >>= 1;
6907     j++;
6908     }
6909   }
6910 
6911 if (len == 0) return FALSE;  /* Should never occur, but stops analyzers complaining. */
6912 
6913 i = 0;
6914 j = 0;
6915 
6916 if (char_list[0] == 0)
6917   {
6918   i++;
6919   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0);
6920   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_ZERO);
6921   }
6922 else
6923   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6924 
6925 while (i < len)
6926   {
6927   if ((char_list[i] & 0x100) != 0)
6928     j++;
6929   else
6930     {
6931     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i]);
6932     CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
6933     }
6934   i++;
6935   }
6936 
6937 if (j != 0)
6938   {
6939   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x20);
6940 
6941   for (i = 0; i < len; i++)
6942     if ((char_list[i] & 0x100) != 0)
6943       {
6944       j--;
6945       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff);
6946       CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
6947       }
6948   }
6949 
6950 if (invert)
6951   nclass = !nclass;
6952 
6953 type = nclass ? SLJIT_NOT_EQUAL : SLJIT_EQUAL;
6954 add_jump(compiler, backtracks, CMP(type, TMP2, 0, SLJIT_IMM, 0));
6955 return TRUE;
6956 }
6957 
optimize_class(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)6958 static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6959 {
6960 /* May destroy TMP1. */
6961 if (optimize_class_ranges(common, bits, nclass, invert, backtracks))
6962   return TRUE;
6963 return optimize_class_chars(common, bits, nclass, invert, backtracks);
6964 }
6965 
check_anynewline(compiler_common * common)6966 static void check_anynewline(compiler_common *common)
6967 {
6968 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
6969 DEFINE_COMPILER;
6970 
6971 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
6972 
6973 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
6974 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
6975 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6976 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
6977 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6978 #if PCRE2_CODE_UNIT_WIDTH == 8
6979 if (common->utf)
6980   {
6981 #endif
6982   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6983   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
6984   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
6985 #if PCRE2_CODE_UNIT_WIDTH == 8
6986   }
6987 #endif
6988 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
6989 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
6990 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
6991 }
6992 
check_hspace(compiler_common * common)6993 static void check_hspace(compiler_common *common)
6994 {
6995 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
6996 DEFINE_COMPILER;
6997 
6998 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
6999 
7000 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
7001 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7002 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
7003 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7004 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
7005 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7006 #if PCRE2_CODE_UNIT_WIDTH == 8
7007 if (common->utf)
7008   {
7009 #endif
7010   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7011   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
7012   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7013   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
7014   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7015   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
7016   OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
7017   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7018   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
7019   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7020   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
7021   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7022   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
7023 #if PCRE2_CODE_UNIT_WIDTH == 8
7024   }
7025 #endif
7026 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7027 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7028 
7029 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7030 }
7031 
check_vspace(compiler_common * common)7032 static void check_vspace(compiler_common *common)
7033 {
7034 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7035 DEFINE_COMPILER;
7036 
7037 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
7038 
7039 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
7040 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
7041 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7042 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
7043 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7044 #if PCRE2_CODE_UNIT_WIDTH == 8
7045 if (common->utf)
7046   {
7047 #endif
7048   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7049   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7050   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
7051 #if PCRE2_CODE_UNIT_WIDTH == 8
7052   }
7053 #endif
7054 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7055 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7056 
7057 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7058 }
7059 
do_casefulcmp(compiler_common * common)7060 static void do_casefulcmp(compiler_common *common)
7061 {
7062 DEFINE_COMPILER;
7063 struct sljit_jump *jump;
7064 struct sljit_label *label;
7065 int char1_reg;
7066 int char2_reg;
7067 
7068 if (HAS_VIRTUAL_REGISTERS)
7069   {
7070   char1_reg = STR_END;
7071   char2_reg = STACK_TOP;
7072   }
7073 else
7074   {
7075   char1_reg = TMP3;
7076   char2_reg = RETURN_ADDR;
7077   }
7078 
7079 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7080 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7081 
7082 if (char1_reg == STR_END)
7083   {
7084   OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0);
7085   OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
7086   }
7087 
7088 if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7089   {
7090   label = LABEL();
7091   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7092   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7093   jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7094   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7095   JUMPTO(SLJIT_NOT_ZERO, label);
7096 
7097   JUMPHERE(jump);
7098   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7099   }
7100 else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7101   {
7102   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7103   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7104 
7105   label = LABEL();
7106   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7107   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7108   jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7109   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7110   JUMPTO(SLJIT_NOT_ZERO, label);
7111 
7112   JUMPHERE(jump);
7113   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7114   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7115   }
7116 else
7117   {
7118   label = LABEL();
7119   OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7120   OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7121   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7122   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7123   jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7124   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7125   JUMPTO(SLJIT_NOT_ZERO, label);
7126 
7127   JUMPHERE(jump);
7128   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7129   }
7130 
7131 if (char1_reg == STR_END)
7132   {
7133   OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0);
7134   OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);
7135   }
7136 
7137 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7138 }
7139 
do_caselesscmp(compiler_common * common)7140 static void do_caselesscmp(compiler_common *common)
7141 {
7142 DEFINE_COMPILER;
7143 struct sljit_jump *jump;
7144 struct sljit_label *label;
7145 int char1_reg = STR_END;
7146 int char2_reg;
7147 int lcc_table;
7148 int opt_type = 0;
7149 
7150 if (HAS_VIRTUAL_REGISTERS)
7151   {
7152   char2_reg = STACK_TOP;
7153   lcc_table = STACK_LIMIT;
7154   }
7155 else
7156   {
7157   char2_reg = RETURN_ADDR;
7158   lcc_table = TMP3;
7159   }
7160 
7161 if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7162   opt_type = 1;
7163 else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7164   opt_type = 2;
7165 
7166 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7167 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7168 
7169 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0);
7170 
7171 if (char2_reg == STACK_TOP)
7172   {
7173   OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0);
7174   OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0);
7175   }
7176 
7177 OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
7178 
7179 if (opt_type == 1)
7180   {
7181   label = LABEL();
7182   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7183   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7184   }
7185 else if (opt_type == 2)
7186   {
7187   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7188   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7189 
7190   label = LABEL();
7191   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7192   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7193   }
7194 else
7195   {
7196   label = LABEL();
7197   OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7198   OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7199   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7200   }
7201 
7202 #if PCRE2_CODE_UNIT_WIDTH != 8
7203 jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255);
7204 #endif
7205 OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0);
7206 #if PCRE2_CODE_UNIT_WIDTH != 8
7207 JUMPHERE(jump);
7208 jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255);
7209 #endif
7210 OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0);
7211 #if PCRE2_CODE_UNIT_WIDTH != 8
7212 JUMPHERE(jump);
7213 #endif
7214 
7215 if (opt_type == 0)
7216   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7217 
7218 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7219 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7220 JUMPTO(SLJIT_NOT_ZERO, label);
7221 
7222 JUMPHERE(jump);
7223 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7224 
7225 if (opt_type == 2)
7226   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7227 
7228 if (char2_reg == STACK_TOP)
7229   {
7230   OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0);
7231   OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0);
7232   }
7233 
7234 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
7235 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7236 }
7237 
byte_sequence_compare(compiler_common * common,BOOL caseless,PCRE2_SPTR cc,compare_context * context,jump_list ** backtracks)7238 static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc,
7239     compare_context *context, jump_list **backtracks)
7240 {
7241 DEFINE_COMPILER;
7242 unsigned int othercasebit = 0;
7243 PCRE2_SPTR othercasechar = NULL;
7244 #ifdef SUPPORT_UNICODE
7245 int utflength;
7246 #endif
7247 
7248 if (caseless && char_has_othercase(common, cc))
7249   {
7250   othercasebit = char_get_othercase_bit(common, cc);
7251   SLJIT_ASSERT(othercasebit);
7252   /* Extracting bit difference info. */
7253 #if PCRE2_CODE_UNIT_WIDTH == 8
7254   othercasechar = cc + (othercasebit >> 8);
7255   othercasebit &= 0xff;
7256 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7257   /* Note that this code only handles characters in the BMP. If there
7258   ever are characters outside the BMP whose othercase differs in only one
7259   bit from itself (there currently are none), this code will need to be
7260   revised for PCRE2_CODE_UNIT_WIDTH == 32. */
7261   othercasechar = cc + (othercasebit >> 9);
7262   if ((othercasebit & 0x100) != 0)
7263     othercasebit = (othercasebit & 0xff) << 8;
7264   else
7265     othercasebit &= 0xff;
7266 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7267   }
7268 
7269 if (context->sourcereg == -1)
7270   {
7271 #if PCRE2_CODE_UNIT_WIDTH == 8
7272 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7273   if (context->length >= 4)
7274     OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7275   else if (context->length >= 2)
7276     OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7277   else
7278 #endif
7279     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7280 #elif PCRE2_CODE_UNIT_WIDTH == 16
7281 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7282   if (context->length >= 4)
7283     OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7284   else
7285 #endif
7286     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7287 #elif PCRE2_CODE_UNIT_WIDTH == 32
7288   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7289 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7290   context->sourcereg = TMP2;
7291   }
7292 
7293 #ifdef SUPPORT_UNICODE
7294 utflength = 1;
7295 if (common->utf && HAS_EXTRALEN(*cc))
7296   utflength += GET_EXTRALEN(*cc);
7297 
7298 do
7299   {
7300 #endif
7301 
7302   context->length -= IN_UCHARS(1);
7303 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7304 
7305   /* Unaligned read is supported. */
7306   if (othercasebit != 0 && othercasechar == cc)
7307     {
7308     context->c.asuchars[context->ucharptr] = *cc | othercasebit;
7309     context->oc.asuchars[context->ucharptr] = othercasebit;
7310     }
7311   else
7312     {
7313     context->c.asuchars[context->ucharptr] = *cc;
7314     context->oc.asuchars[context->ucharptr] = 0;
7315     }
7316   context->ucharptr++;
7317 
7318 #if PCRE2_CODE_UNIT_WIDTH == 8
7319   if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
7320 #else
7321   if (context->ucharptr >= 2 || context->length == 0)
7322 #endif
7323     {
7324     if (context->length >= 4)
7325       OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7326     else if (context->length >= 2)
7327       OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7328 #if PCRE2_CODE_UNIT_WIDTH == 8
7329     else if (context->length >= 1)
7330       OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7331 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7332     context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7333 
7334     switch(context->ucharptr)
7335       {
7336       case 4 / sizeof(PCRE2_UCHAR):
7337       if (context->oc.asint != 0)
7338         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
7339       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
7340       break;
7341 
7342       case 2 / sizeof(PCRE2_UCHAR):
7343       if (context->oc.asushort != 0)
7344         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
7345       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
7346       break;
7347 
7348 #if PCRE2_CODE_UNIT_WIDTH == 8
7349       case 1:
7350       if (context->oc.asbyte != 0)
7351         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
7352       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
7353       break;
7354 #endif
7355 
7356       default:
7357       SLJIT_UNREACHABLE();
7358       break;
7359       }
7360     context->ucharptr = 0;
7361     }
7362 
7363 #else
7364 
7365   /* Unaligned read is unsupported or in 32 bit mode. */
7366   if (context->length >= 1)
7367     OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7368 
7369   context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7370 
7371   if (othercasebit != 0 && othercasechar == cc)
7372     {
7373     OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
7374     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
7375     }
7376   else
7377     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
7378 
7379 #endif
7380 
7381   cc++;
7382 #ifdef SUPPORT_UNICODE
7383   utflength--;
7384   }
7385 while (utflength > 0);
7386 #endif
7387 
7388 return cc;
7389 }
7390 
7391 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
7392 
7393 #define SET_TYPE_OFFSET(value) \
7394   if ((value) != typeoffset) \
7395     { \
7396     if ((value) < typeoffset) \
7397       OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
7398     else \
7399       OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
7400     } \
7401   typeoffset = (value);
7402 
7403 #define SET_CHAR_OFFSET(value) \
7404   if ((value) != charoffset) \
7405     { \
7406     if ((value) < charoffset) \
7407       OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
7408     else \
7409       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
7410     } \
7411   charoffset = (value);
7412 
7413 static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr);
7414 
compile_xclass_matchingpath(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks)7415 static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
7416 {
7417 DEFINE_COMPILER;
7418 jump_list *found = NULL;
7419 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
7420 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
7421 struct sljit_jump *jump = NULL;
7422 PCRE2_SPTR ccbegin;
7423 int compares, invertcmp, numberofcmps;
7424 #if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7425 BOOL utf = common->utf;
7426 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
7427 
7428 #ifdef SUPPORT_UNICODE
7429 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
7430 BOOL charsaved = FALSE;
7431 int typereg = TMP1;
7432 const sljit_u32 *other_cases;
7433 sljit_uw typeoffset;
7434 #endif /* SUPPORT_UNICODE */
7435 
7436 /* Scanning the necessary info. */
7437 cc++;
7438 ccbegin = cc;
7439 compares = 0;
7440 
7441 if (cc[-1] & XCL_MAP)
7442   {
7443   min = 0;
7444   cc += 32 / sizeof(PCRE2_UCHAR);
7445   }
7446 
7447 while (*cc != XCL_END)
7448   {
7449   compares++;
7450   if (*cc == XCL_SINGLE)
7451     {
7452     cc ++;
7453     GETCHARINCTEST(c, cc);
7454     if (c > max) max = c;
7455     if (c < min) min = c;
7456 #ifdef SUPPORT_UNICODE
7457     needschar = TRUE;
7458 #endif /* SUPPORT_UNICODE */
7459     }
7460   else if (*cc == XCL_RANGE)
7461     {
7462     cc ++;
7463     GETCHARINCTEST(c, cc);
7464     if (c < min) min = c;
7465     GETCHARINCTEST(c, cc);
7466     if (c > max) max = c;
7467 #ifdef SUPPORT_UNICODE
7468     needschar = TRUE;
7469 #endif /* SUPPORT_UNICODE */
7470     }
7471 #ifdef SUPPORT_UNICODE
7472   else
7473     {
7474     SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7475     cc++;
7476     if (*cc == PT_CLIST)
7477       {
7478       other_cases = PRIV(ucd_caseless_sets) + cc[1];
7479       while (*other_cases != NOTACHAR)
7480         {
7481         if (*other_cases > max) max = *other_cases;
7482         if (*other_cases < min) min = *other_cases;
7483         other_cases++;
7484         }
7485       }
7486     else
7487       {
7488       max = READ_CHAR_MAX;
7489       min = 0;
7490       }
7491 
7492     switch(*cc)
7493       {
7494       case PT_ANY:
7495       /* Any either accepts everything or ignored. */
7496       if (cc[-1] == XCL_PROP)
7497         {
7498         compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
7499         if (list == backtracks)
7500           add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7501         return;
7502         }
7503       break;
7504 
7505       case PT_LAMP:
7506       case PT_GC:
7507       case PT_PC:
7508       case PT_ALNUM:
7509       needstype = TRUE;
7510       break;
7511 
7512       case PT_SC:
7513       needsscript = TRUE;
7514       break;
7515 
7516       case PT_SPACE:
7517       case PT_PXSPACE:
7518       case PT_WORD:
7519       case PT_PXGRAPH:
7520       case PT_PXPRINT:
7521       case PT_PXPUNCT:
7522       needstype = TRUE;
7523       needschar = TRUE;
7524       break;
7525 
7526       case PT_CLIST:
7527       case PT_UCNC:
7528       needschar = TRUE;
7529       break;
7530 
7531       default:
7532       SLJIT_UNREACHABLE();
7533       break;
7534       }
7535     cc += 2;
7536     }
7537 #endif /* SUPPORT_UNICODE */
7538   }
7539 SLJIT_ASSERT(compares > 0);
7540 
7541 /* We are not necessary in utf mode even in 8 bit mode. */
7542 cc = ccbegin;
7543 if ((cc[-1] & XCL_NOT) != 0)
7544   read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR);
7545 else
7546   {
7547 #ifdef SUPPORT_UNICODE
7548   read_char(common, min, max, (needstype || needsscript) ? backtracks : NULL, 0);
7549 #else /* !SUPPORT_UNICODE */
7550   read_char(common, min, max, NULL, 0);
7551 #endif /* SUPPORT_UNICODE */
7552   }
7553 
7554 if ((cc[-1] & XCL_HASPROP) == 0)
7555   {
7556   if ((cc[-1] & XCL_MAP) != 0)
7557     {
7558     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7559     if (!optimize_class(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
7560       {
7561       OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7562       OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7563       OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7564       OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7565       OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
7566       add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
7567       }
7568 
7569     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7570     JUMPHERE(jump);
7571 
7572     cc += 32 / sizeof(PCRE2_UCHAR);
7573     }
7574   else
7575     {
7576     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
7577     add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
7578     }
7579   }
7580 else if ((cc[-1] & XCL_MAP) != 0)
7581   {
7582   OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7583 #ifdef SUPPORT_UNICODE
7584   charsaved = TRUE;
7585 #endif /* SUPPORT_UNICODE */
7586   if (!optimize_class(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
7587     {
7588 #if PCRE2_CODE_UNIT_WIDTH == 8
7589     jump = NULL;
7590     if (common->utf)
7591 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7592       jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7593 
7594     OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7595     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7596     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7597     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7598     OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
7599     add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
7600 
7601 #if PCRE2_CODE_UNIT_WIDTH == 8
7602     if (common->utf)
7603 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7604       JUMPHERE(jump);
7605     }
7606 
7607   OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7608   cc += 32 / sizeof(PCRE2_UCHAR);
7609   }
7610 
7611 #ifdef SUPPORT_UNICODE
7612 if (needstype || needsscript)
7613   {
7614   if (needschar && !charsaved)
7615     OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7616 
7617 #if PCRE2_CODE_UNIT_WIDTH == 32
7618   if (!common->utf)
7619     {
7620     jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
7621     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
7622     JUMPHERE(jump);
7623     }
7624 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
7625 
7626   OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7627   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
7628   OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
7629   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
7630   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7631   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7632   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
7633   OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
7634 
7635   /* Before anything else, we deal with scripts. */
7636   if (needsscript)
7637     {
7638     OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
7639     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
7640     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7641 
7642     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
7643 
7644     ccbegin = cc;
7645 
7646     while (*cc != XCL_END)
7647       {
7648       if (*cc == XCL_SINGLE)
7649         {
7650         cc ++;
7651         GETCHARINCTEST(c, cc);
7652         }
7653       else if (*cc == XCL_RANGE)
7654         {
7655         cc ++;
7656         GETCHARINCTEST(c, cc);
7657         GETCHARINCTEST(c, cc);
7658         }
7659       else
7660         {
7661         SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7662         cc++;
7663         if (*cc == PT_SC)
7664           {
7665           compares--;
7666           invertcmp = (compares == 0 && list != backtracks);
7667           if (cc[-1] == XCL_NOTPROP)
7668             invertcmp ^= 0x1;
7669           jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
7670           add_jump(compiler, compares > 0 ? list : backtracks, jump);
7671           }
7672         cc += 2;
7673         }
7674       }
7675 
7676     cc = ccbegin;
7677 
7678     if (needstype)
7679       {
7680       /* TMP2 has already been shifted by 2 */
7681       if (!needschar)
7682         {
7683         OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP2, 0);
7684         OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7685 
7686         OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7687         }
7688       else
7689         {
7690         OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP2, 0);
7691         OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
7692 
7693         OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7694         OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7695         typereg = RETURN_ADDR;
7696         }
7697       }
7698     else if (needschar)
7699       OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7700     }
7701   else if (needstype)
7702     {
7703     OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
7704     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
7705 
7706     if (!needschar)
7707       {
7708       OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7709 
7710       OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7711       }
7712     else
7713       {
7714       OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
7715 
7716       OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7717       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7718       typereg = RETURN_ADDR;
7719       }
7720     }
7721   else if (needschar)
7722     OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7723   }
7724 #endif /* SUPPORT_UNICODE */
7725 
7726 /* Generating code. */
7727 charoffset = 0;
7728 numberofcmps = 0;
7729 #ifdef SUPPORT_UNICODE
7730 typeoffset = 0;
7731 #endif /* SUPPORT_UNICODE */
7732 
7733 while (*cc != XCL_END)
7734   {
7735   compares--;
7736   invertcmp = (compares == 0 && list != backtracks);
7737   jump = NULL;
7738 
7739   if (*cc == XCL_SINGLE)
7740     {
7741     cc ++;
7742     GETCHARINCTEST(c, cc);
7743 
7744     if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
7745       {
7746       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7747       OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7748       numberofcmps++;
7749       }
7750     else if (numberofcmps > 0)
7751       {
7752       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7753       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7754       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7755       numberofcmps = 0;
7756       }
7757     else
7758       {
7759       jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7760       numberofcmps = 0;
7761       }
7762     }
7763   else if (*cc == XCL_RANGE)
7764     {
7765     cc ++;
7766     GETCHARINCTEST(c, cc);
7767     SET_CHAR_OFFSET(c);
7768     GETCHARINCTEST(c, cc);
7769 
7770     if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
7771       {
7772       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7773       OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7774       numberofcmps++;
7775       }
7776     else if (numberofcmps > 0)
7777       {
7778       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7779       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
7780       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7781       numberofcmps = 0;
7782       }
7783     else
7784       {
7785       jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7786       numberofcmps = 0;
7787       }
7788     }
7789 #ifdef SUPPORT_UNICODE
7790   else
7791     {
7792     SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7793     if (*cc == XCL_NOTPROP)
7794       invertcmp ^= 0x1;
7795     cc++;
7796     switch(*cc)
7797       {
7798       case PT_ANY:
7799       if (!invertcmp)
7800         jump = JUMP(SLJIT_JUMP);
7801       break;
7802 
7803       case PT_LAMP:
7804       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
7805       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7806       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
7807       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7808       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
7809       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7810       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7811       break;
7812 
7813       case PT_GC:
7814       c = PRIV(ucp_typerange)[(int)cc[1] * 2];
7815       SET_TYPE_OFFSET(c);
7816       jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
7817       break;
7818 
7819       case PT_PC:
7820       jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
7821       break;
7822 
7823       case PT_SC:
7824       compares++;
7825       /* Do nothing. */
7826       break;
7827 
7828       case PT_SPACE:
7829       case PT_PXSPACE:
7830       SET_CHAR_OFFSET(9);
7831       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
7832       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7833 
7834       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
7835       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7836 
7837       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
7838       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7839 
7840       SET_TYPE_OFFSET(ucp_Zl);
7841       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
7842       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
7843       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7844       break;
7845 
7846       case PT_WORD:
7847       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
7848       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7849       /* Fall through. */
7850 
7851       case PT_ALNUM:
7852       SET_TYPE_OFFSET(ucp_Ll);
7853       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
7854       OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7855       SET_TYPE_OFFSET(ucp_Nd);
7856       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
7857       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
7858       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7859       break;
7860 
7861       case PT_CLIST:
7862       other_cases = PRIV(ucd_caseless_sets) + cc[1];
7863 
7864       /* At least three characters are required.
7865          Otherwise this case would be handled by the normal code path. */
7866       SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
7867       SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
7868 
7869       /* Optimizing character pairs, if their difference is power of 2. */
7870       if (is_powerof2(other_cases[1] ^ other_cases[0]))
7871         {
7872         if (charoffset == 0)
7873           OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
7874         else
7875           {
7876           OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
7877           OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
7878           }
7879         OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
7880         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7881         other_cases += 2;
7882         }
7883       else if (is_powerof2(other_cases[2] ^ other_cases[1]))
7884         {
7885         if (charoffset == 0)
7886           OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
7887         else
7888           {
7889           OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
7890           OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
7891           }
7892         OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
7893         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7894 
7895         OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
7896         OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
7897 
7898         other_cases += 3;
7899         }
7900       else
7901         {
7902         OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
7903         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7904         }
7905 
7906       while (*other_cases != NOTACHAR)
7907         {
7908         OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
7909         OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
7910         }
7911       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7912       break;
7913 
7914       case PT_UCNC:
7915       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
7916       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7917       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
7918       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7919       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
7920       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7921 
7922       SET_CHAR_OFFSET(0xa0);
7923       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
7924       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7925       SET_CHAR_OFFSET(0);
7926       OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
7927       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
7928       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7929       break;
7930 
7931       case PT_PXGRAPH:
7932       /* C and Z groups are the farthest two groups. */
7933       SET_TYPE_OFFSET(ucp_Ll);
7934       OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
7935       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
7936 
7937       jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
7938 
7939       /* In case of ucp_Cf, we overwrite the result. */
7940       SET_CHAR_OFFSET(0x2066);
7941       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
7942       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7943 
7944       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
7945       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7946 
7947       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
7948       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7949 
7950       JUMPHERE(jump);
7951       jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
7952       break;
7953 
7954       case PT_PXPRINT:
7955       /* C and Z groups are the farthest two groups. */
7956       SET_TYPE_OFFSET(ucp_Ll);
7957       OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
7958       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
7959 
7960       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
7961       OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_NOT_EQUAL);
7962 
7963       jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
7964 
7965       /* In case of ucp_Cf, we overwrite the result. */
7966       SET_CHAR_OFFSET(0x2066);
7967       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
7968       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7969 
7970       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
7971       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7972 
7973       JUMPHERE(jump);
7974       jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
7975       break;
7976 
7977       case PT_PXPUNCT:
7978       SET_TYPE_OFFSET(ucp_Sc);
7979       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
7980       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7981 
7982       SET_CHAR_OFFSET(0);
7983       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
7984       OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
7985 
7986       SET_TYPE_OFFSET(ucp_Pc);
7987       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
7988       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
7989       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7990       break;
7991 
7992       default:
7993       SLJIT_UNREACHABLE();
7994       break;
7995       }
7996     cc += 2;
7997     }
7998 #endif /* SUPPORT_UNICODE */
7999 
8000   if (jump != NULL)
8001     add_jump(compiler, compares > 0 ? list : backtracks, jump);
8002   }
8003 
8004 if (found != NULL)
8005   set_jumps(found, LABEL());
8006 }
8007 
8008 #undef SET_TYPE_OFFSET
8009 #undef SET_CHAR_OFFSET
8010 
8011 #endif
8012 
compile_simple_assertion_matchingpath(compiler_common * common,PCRE2_UCHAR type,PCRE2_SPTR cc,jump_list ** backtracks)8013 static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks)
8014 {
8015 DEFINE_COMPILER;
8016 int length;
8017 struct sljit_jump *jump[4];
8018 #ifdef SUPPORT_UNICODE
8019 struct sljit_label *label;
8020 #endif /* SUPPORT_UNICODE */
8021 
8022 switch(type)
8023   {
8024   case OP_SOD:
8025   if (HAS_VIRTUAL_REGISTERS)
8026     {
8027     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8028     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8029     }
8030   else
8031     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8032   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8033   return cc;
8034 
8035   case OP_SOM:
8036   if (HAS_VIRTUAL_REGISTERS)
8037     {
8038     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8039     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
8040     }
8041   else
8042     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
8043   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8044   return cc;
8045 
8046   case OP_NOT_WORD_BOUNDARY:
8047   case OP_WORD_BOUNDARY:
8048   add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
8049 #ifdef SUPPORT_UNICODE
8050   if (common->invalid_utf)
8051     {
8052     add_jump(compiler, backtracks, CMP((type == OP_NOT_WORD_BOUNDARY) ? SLJIT_NOT_EQUAL : SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8053     return cc;
8054     }
8055 #endif /* SUPPORT_UNICODE */
8056   sljit_set_current_flags(compiler, SLJIT_SET_Z);
8057   add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8058   return cc;
8059 
8060   case OP_EODN:
8061   /* Requires rather complex checks. */
8062   jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
8063   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8064     {
8065     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8066     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8067     if (common->mode == PCRE2_JIT_COMPLETE)
8068       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8069     else
8070       {
8071       jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
8072       OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
8073       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
8074       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
8075       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
8076       add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
8077       check_partial(common, TRUE);
8078       add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8079       JUMPHERE(jump[1]);
8080       }
8081     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8082     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8083     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8084     }
8085   else if (common->nltype == NLTYPE_FIXED)
8086     {
8087     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8088     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8089     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8090     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
8091     }
8092   else
8093     {
8094     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8095     jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
8096     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8097     OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
8098     jump[2] = JUMP(SLJIT_GREATER);
8099     add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
8100     /* Equal. */
8101     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8102     jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
8103     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8104 
8105     JUMPHERE(jump[1]);
8106     if (common->nltype == NLTYPE_ANYCRLF)
8107       {
8108       OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8109       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
8110       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
8111       }
8112     else
8113       {
8114       OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
8115       read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
8116       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
8117       add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
8118       sljit_set_current_flags(compiler, SLJIT_SET_Z);
8119       add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
8120       OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
8121       }
8122     JUMPHERE(jump[2]);
8123     JUMPHERE(jump[3]);
8124     }
8125   JUMPHERE(jump[0]);
8126   if (common->mode != PCRE2_JIT_COMPLETE)
8127     check_partial(common, TRUE);
8128   return cc;
8129 
8130   case OP_EOD:
8131   add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8132   if (common->mode != PCRE2_JIT_COMPLETE)
8133     check_partial(common, TRUE);
8134   return cc;
8135 
8136   case OP_DOLL:
8137   if (HAS_VIRTUAL_REGISTERS)
8138     {
8139     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8140     OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8141     }
8142   else
8143     OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8144   add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8145 
8146   if (!common->endonly)
8147     compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
8148   else
8149     {
8150     add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8151     check_partial(common, FALSE);
8152     }
8153   return cc;
8154 
8155   case OP_DOLLM:
8156   jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
8157   if (HAS_VIRTUAL_REGISTERS)
8158     {
8159     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8160     OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8161     }
8162   else
8163     OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8164   add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8165   check_partial(common, FALSE);
8166   jump[0] = JUMP(SLJIT_JUMP);
8167   JUMPHERE(jump[1]);
8168 
8169   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8170     {
8171     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8172     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8173     if (common->mode == PCRE2_JIT_COMPLETE)
8174       add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
8175     else
8176       {
8177       jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
8178       /* STR_PTR = STR_END - IN_UCHARS(1) */
8179       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8180       check_partial(common, TRUE);
8181       add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8182       JUMPHERE(jump[1]);
8183       }
8184 
8185     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8186     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8187     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8188     }
8189   else
8190     {
8191     peek_char(common, common->nlmax, TMP3, 0, NULL);
8192     check_newlinechar(common, common->nltype, backtracks, FALSE);
8193     }
8194   JUMPHERE(jump[0]);
8195   return cc;
8196 
8197   case OP_CIRC:
8198   if (HAS_VIRTUAL_REGISTERS)
8199     {
8200     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8201     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
8202     add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8203     OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8204     add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8205     }
8206   else
8207     {
8208     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8209     add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8210     OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8211     add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8212     }
8213   return cc;
8214 
8215   case OP_CIRCM:
8216   /* TMP2 might be used by peek_char_back. */
8217   if (HAS_VIRTUAL_REGISTERS)
8218     {
8219     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8220     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8221     jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8222     OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8223     }
8224   else
8225     {
8226     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8227     jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8228     OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8229     }
8230   add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8231   jump[0] = JUMP(SLJIT_JUMP);
8232   JUMPHERE(jump[1]);
8233 
8234   if (!common->alt_circumflex)
8235     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8236 
8237   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8238     {
8239     OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8240     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, TMP2, 0));
8241     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
8242     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
8243     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8244     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8245     }
8246   else
8247     {
8248     peek_char_back(common, common->nlmax, backtracks);
8249     check_newlinechar(common, common->nltype, backtracks, FALSE);
8250     }
8251   JUMPHERE(jump[0]);
8252   return cc;
8253 
8254   case OP_REVERSE:
8255   length = GET(cc, 0);
8256   if (length == 0)
8257     return cc + LINK_SIZE;
8258   if (HAS_VIRTUAL_REGISTERS)
8259     {
8260     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8261     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8262     }
8263   else
8264     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8265 #ifdef SUPPORT_UNICODE
8266   if (common->utf)
8267     {
8268     OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, length);
8269     label = LABEL();
8270     add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0));
8271     move_back(common, backtracks, FALSE);
8272     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
8273     JUMPTO(SLJIT_NOT_ZERO, label);
8274     }
8275   else
8276 #endif
8277     {
8278     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
8279     add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0));
8280     }
8281   check_start_used_ptr(common);
8282   return cc + LINK_SIZE;
8283   }
8284 SLJIT_UNREACHABLE();
8285 return cc;
8286 }
8287 
8288 #ifdef SUPPORT_UNICODE
8289 
8290 #if PCRE2_CODE_UNIT_WIDTH != 32
8291 
do_extuni_utf(jit_arguments * args,PCRE2_SPTR cc)8292 static PCRE2_SPTR SLJIT_FUNC do_extuni_utf(jit_arguments *args, PCRE2_SPTR cc)
8293 {
8294 PCRE2_SPTR start_subject = args->begin;
8295 PCRE2_SPTR end_subject = args->end;
8296 int lgb, rgb, ricount;
8297 PCRE2_SPTR prevcc, endcc, bptr;
8298 BOOL first = TRUE;
8299 uint32_t c;
8300 
8301 prevcc = cc;
8302 endcc = NULL;
8303 do
8304   {
8305   GETCHARINC(c, cc);
8306   rgb = UCD_GRAPHBREAK(c);
8307 
8308   if (first)
8309     {
8310     lgb = rgb;
8311     endcc = cc;
8312     first = FALSE;
8313     continue;
8314     }
8315 
8316   if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8317     break;
8318 
8319   /* Not breaking between Regional Indicators is allowed only if there
8320   are an even number of preceding RIs. */
8321 
8322   if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
8323     {
8324     ricount = 0;
8325     bptr = prevcc;
8326 
8327     /* bptr is pointing to the left-hand character */
8328     while (bptr > start_subject)
8329       {
8330       bptr--;
8331       BACKCHAR(bptr);
8332       GETCHAR(c, bptr);
8333 
8334       if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator)
8335         break;
8336 
8337       ricount++;
8338       }
8339 
8340     if ((ricount & 1) != 0) break;  /* Grapheme break required */
8341     }
8342 
8343   /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8344   allows any number of them before a following Extended_Pictographic. */
8345 
8346   if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8347        lgb != ucp_gbExtended_Pictographic)
8348     lgb = rgb;
8349 
8350   prevcc = endcc;
8351   endcc = cc;
8352   }
8353 while (cc < end_subject);
8354 
8355 return endcc;
8356 }
8357 
8358 #endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
8359 
do_extuni_utf_invalid(jit_arguments * args,PCRE2_SPTR cc)8360 static PCRE2_SPTR SLJIT_FUNC do_extuni_utf_invalid(jit_arguments *args, PCRE2_SPTR cc)
8361 {
8362 PCRE2_SPTR start_subject = args->begin;
8363 PCRE2_SPTR end_subject = args->end;
8364 int lgb, rgb, ricount;
8365 PCRE2_SPTR prevcc, endcc, bptr;
8366 BOOL first = TRUE;
8367 uint32_t c;
8368 
8369 prevcc = cc;
8370 endcc = NULL;
8371 do
8372   {
8373   GETCHARINC_INVALID(c, cc, end_subject, break);
8374   rgb = UCD_GRAPHBREAK(c);
8375 
8376   if (first)
8377     {
8378     lgb = rgb;
8379     endcc = cc;
8380     first = FALSE;
8381     continue;
8382     }
8383 
8384   if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8385     break;
8386 
8387   /* Not breaking between Regional Indicators is allowed only if there
8388   are an even number of preceding RIs. */
8389 
8390   if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
8391     {
8392     ricount = 0;
8393     bptr = prevcc;
8394 
8395     /* bptr is pointing to the left-hand character */
8396     while (bptr > start_subject)
8397       {
8398       GETCHARBACK_INVALID(c, bptr, start_subject, break);
8399 
8400       if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator)
8401         break;
8402 
8403       ricount++;
8404       }
8405 
8406     if ((ricount & 1) != 0)
8407       break;  /* Grapheme break required */
8408     }
8409 
8410   /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8411   allows any number of them before a following Extended_Pictographic. */
8412 
8413   if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8414        lgb != ucp_gbExtended_Pictographic)
8415     lgb = rgb;
8416 
8417   prevcc = endcc;
8418   endcc = cc;
8419   }
8420 while (cc < end_subject);
8421 
8422 return endcc;
8423 }
8424 
do_extuni_no_utf(jit_arguments * args,PCRE2_SPTR cc)8425 static PCRE2_SPTR SLJIT_FUNC do_extuni_no_utf(jit_arguments *args, PCRE2_SPTR cc)
8426 {
8427 PCRE2_SPTR start_subject = args->begin;
8428 PCRE2_SPTR end_subject = args->end;
8429 int lgb, rgb, ricount;
8430 PCRE2_SPTR bptr;
8431 uint32_t c;
8432 
8433 /* Patch by PH */
8434 /* GETCHARINC(c, cc); */
8435 c = *cc++;
8436 
8437 #if PCRE2_CODE_UNIT_WIDTH == 32
8438 if (c >= 0x110000)
8439   return NULL;
8440 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8441 lgb = UCD_GRAPHBREAK(c);
8442 
8443 while (cc < end_subject)
8444   {
8445   c = *cc;
8446 #if PCRE2_CODE_UNIT_WIDTH == 32
8447   if (c >= 0x110000)
8448     break;
8449 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8450   rgb = UCD_GRAPHBREAK(c);
8451 
8452   if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8453     break;
8454 
8455   /* Not breaking between Regional Indicators is allowed only if there
8456   are an even number of preceding RIs. */
8457 
8458   if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
8459     {
8460     ricount = 0;
8461     bptr = cc - 1;
8462 
8463     /* bptr is pointing to the left-hand character */
8464     while (bptr > start_subject)
8465       {
8466       bptr--;
8467       c = *bptr;
8468 #if PCRE2_CODE_UNIT_WIDTH == 32
8469       if (c >= 0x110000)
8470         break;
8471 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8472 
8473       if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) break;
8474 
8475       ricount++;
8476       }
8477 
8478     if ((ricount & 1) != 0)
8479       break;  /* Grapheme break required */
8480     }
8481 
8482   /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8483   allows any number of them before a following Extended_Pictographic. */
8484 
8485   if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8486        lgb != ucp_gbExtended_Pictographic)
8487     lgb = rgb;
8488 
8489   cc++;
8490   }
8491 
8492 return cc;
8493 }
8494 
8495 #endif /* SUPPORT_UNICODE */
8496 
compile_char1_matchingpath(compiler_common * common,PCRE2_UCHAR type,PCRE2_SPTR cc,jump_list ** backtracks,BOOL check_str_ptr)8497 static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr)
8498 {
8499 DEFINE_COMPILER;
8500 int length;
8501 unsigned int c, oc, bit;
8502 compare_context context;
8503 struct sljit_jump *jump[3];
8504 jump_list *end_list;
8505 #ifdef SUPPORT_UNICODE
8506 PCRE2_UCHAR propdata[5];
8507 #endif /* SUPPORT_UNICODE */
8508 
8509 switch(type)
8510   {
8511   case OP_NOT_DIGIT:
8512   case OP_DIGIT:
8513   /* Digits are usually 0-9, so it is worth to optimize them. */
8514   if (check_str_ptr)
8515     detect_partial_match(common, backtracks);
8516 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8517   if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_digit, FALSE))
8518     read_char7_type(common, backtracks, type == OP_NOT_DIGIT);
8519   else
8520 #endif
8521     read_char8_type(common, backtracks, type == OP_NOT_DIGIT);
8522     /* Flip the starting bit in the negative case. */
8523   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
8524   add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8525   return cc;
8526 
8527   case OP_NOT_WHITESPACE:
8528   case OP_WHITESPACE:
8529   if (check_str_ptr)
8530     detect_partial_match(common, backtracks);
8531 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8532   if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_space, FALSE))
8533     read_char7_type(common, backtracks, type == OP_NOT_WHITESPACE);
8534   else
8535 #endif
8536     read_char8_type(common, backtracks, type == OP_NOT_WHITESPACE);
8537   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
8538   add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8539   return cc;
8540 
8541   case OP_NOT_WORDCHAR:
8542   case OP_WORDCHAR:
8543   if (check_str_ptr)
8544     detect_partial_match(common, backtracks);
8545 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8546   if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_word, FALSE))
8547     read_char7_type(common, backtracks, type == OP_NOT_WORDCHAR);
8548   else
8549 #endif
8550     read_char8_type(common, backtracks, type == OP_NOT_WORDCHAR);
8551   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
8552   add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8553   return cc;
8554 
8555   case OP_ANY:
8556   if (check_str_ptr)
8557     detect_partial_match(common, backtracks);
8558   read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
8559   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8560     {
8561     jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
8562     end_list = NULL;
8563     if (common->mode != PCRE2_JIT_PARTIAL_HARD)
8564       add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8565     else
8566       check_str_end(common, &end_list);
8567 
8568     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8569     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
8570     set_jumps(end_list, LABEL());
8571     JUMPHERE(jump[0]);
8572     }
8573   else
8574     check_newlinechar(common, common->nltype, backtracks, TRUE);
8575   return cc;
8576 
8577   case OP_ALLANY:
8578   if (check_str_ptr)
8579     detect_partial_match(common, backtracks);
8580 #ifdef SUPPORT_UNICODE
8581   if (common->utf)
8582     {
8583     if (common->invalid_utf)
8584       {
8585       read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR);
8586       return cc;
8587       }
8588 
8589 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
8590     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8591     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8592 #if PCRE2_CODE_UNIT_WIDTH == 8
8593     jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
8594     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
8595     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8596 #elif PCRE2_CODE_UNIT_WIDTH == 16
8597     jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
8598     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
8599     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
8600     OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
8601     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8602     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8603 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
8604     JUMPHERE(jump[0]);
8605     return cc;
8606 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
8607     }
8608 #endif /* SUPPORT_UNICODE */
8609   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8610   return cc;
8611 
8612   case OP_ANYBYTE:
8613   if (check_str_ptr)
8614     detect_partial_match(common, backtracks);
8615   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8616   return cc;
8617 
8618 #ifdef SUPPORT_UNICODE
8619   case OP_NOTPROP:
8620   case OP_PROP:
8621   propdata[0] = XCL_HASPROP;
8622   propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
8623   propdata[2] = cc[0];
8624   propdata[3] = cc[1];
8625   propdata[4] = XCL_END;
8626   if (check_str_ptr)
8627     detect_partial_match(common, backtracks);
8628   compile_xclass_matchingpath(common, propdata, backtracks);
8629   return cc + 2;
8630 #endif
8631 
8632   case OP_ANYNL:
8633   if (check_str_ptr)
8634     detect_partial_match(common, backtracks);
8635   read_char(common, common->bsr_nlmin, common->bsr_nlmax, NULL, 0);
8636   jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
8637   /* We don't need to handle soft partial matching case. */
8638   end_list = NULL;
8639   if (common->mode != PCRE2_JIT_PARTIAL_HARD)
8640     add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8641   else
8642     check_str_end(common, &end_list);
8643   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8644   jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
8645   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8646   jump[2] = JUMP(SLJIT_JUMP);
8647   JUMPHERE(jump[0]);
8648   check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
8649   set_jumps(end_list, LABEL());
8650   JUMPHERE(jump[1]);
8651   JUMPHERE(jump[2]);
8652   return cc;
8653 
8654   case OP_NOT_HSPACE:
8655   case OP_HSPACE:
8656   if (check_str_ptr)
8657     detect_partial_match(common, backtracks);
8658 
8659   if (type == OP_NOT_HSPACE)
8660     read_char(common, 0x9, 0x3000, backtracks, READ_CHAR_UPDATE_STR_PTR);
8661   else
8662     read_char(common, 0x9, 0x3000, NULL, 0);
8663 
8664   add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
8665   sljit_set_current_flags(compiler, SLJIT_SET_Z);
8666   add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8667   return cc;
8668 
8669   case OP_NOT_VSPACE:
8670   case OP_VSPACE:
8671   if (check_str_ptr)
8672     detect_partial_match(common, backtracks);
8673 
8674   if (type == OP_NOT_VSPACE)
8675     read_char(common, 0xa, 0x2029, backtracks, READ_CHAR_UPDATE_STR_PTR);
8676   else
8677     read_char(common, 0xa, 0x2029, NULL, 0);
8678 
8679   add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
8680   sljit_set_current_flags(compiler, SLJIT_SET_Z);
8681   add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8682   return cc;
8683 
8684 #ifdef SUPPORT_UNICODE
8685   case OP_EXTUNI:
8686   if (check_str_ptr)
8687     detect_partial_match(common, backtracks);
8688 
8689   SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
8690   OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
8691 
8692 #if PCRE2_CODE_UNIT_WIDTH != 32
8693   sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
8694     common->utf ? (common->invalid_utf ? SLJIT_FUNC_OFFSET(do_extuni_utf_invalid) : SLJIT_FUNC_OFFSET(do_extuni_utf)) : SLJIT_FUNC_OFFSET(do_extuni_no_utf));
8695   if (common->invalid_utf)
8696     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
8697 #else
8698   sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
8699     common->invalid_utf ? SLJIT_FUNC_OFFSET(do_extuni_utf_invalid) : SLJIT_FUNC_OFFSET(do_extuni_no_utf));
8700   if (!common->utf || common->invalid_utf)
8701     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
8702 #endif
8703 
8704   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
8705 
8706   if (common->mode == PCRE2_JIT_PARTIAL_HARD)
8707     {
8708     jump[0] = CMP(SLJIT_LESS, SLJIT_RETURN_REG, 0, STR_END, 0);
8709     /* Since we successfully read a char above, partial matching must occure. */
8710     check_partial(common, TRUE);
8711     JUMPHERE(jump[0]);
8712     }
8713   return cc;
8714 #endif
8715 
8716   case OP_CHAR:
8717   case OP_CHARI:
8718   length = 1;
8719 #ifdef SUPPORT_UNICODE
8720   if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
8721 #endif
8722 
8723   if (check_str_ptr && common->mode != PCRE2_JIT_COMPLETE)
8724     detect_partial_match(common, backtracks);
8725 
8726   if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)
8727     {
8728     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
8729     if (length > 1 || (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE))
8730       add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
8731 
8732     context.length = IN_UCHARS(length);
8733     context.sourcereg = -1;
8734 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
8735     context.ucharptr = 0;
8736 #endif
8737     return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
8738     }
8739 
8740 #ifdef SUPPORT_UNICODE
8741   if (common->utf)
8742     {
8743     GETCHAR(c, cc);
8744     }
8745   else
8746 #endif
8747     c = *cc;
8748 
8749   SLJIT_ASSERT(type == OP_CHARI && char_has_othercase(common, cc));
8750 
8751   if (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE)
8752     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8753 
8754   oc = char_othercase(common, c);
8755   read_char(common, c < oc ? c : oc, c > oc ? c : oc, NULL, 0);
8756 
8757   SLJIT_ASSERT(!is_powerof2(c ^ oc));
8758 
8759   if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
8760     {
8761     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
8762     CMOV(SLJIT_EQUAL, TMP1, SLJIT_IMM, c);
8763     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8764     }
8765   else
8766     {
8767     jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
8768     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
8769     JUMPHERE(jump[0]);
8770     }
8771   return cc + length;
8772 
8773   case OP_NOT:
8774   case OP_NOTI:
8775   if (check_str_ptr)
8776     detect_partial_match(common, backtracks);
8777 
8778   length = 1;
8779 #ifdef SUPPORT_UNICODE
8780   if (common->utf)
8781     {
8782 #if PCRE2_CODE_UNIT_WIDTH == 8
8783     c = *cc;
8784     if (c < 128 && !common->invalid_utf)
8785       {
8786       OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8787       if (type == OP_NOT || !char_has_othercase(common, cc))
8788         add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8789       else
8790         {
8791         /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
8792         OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
8793         add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
8794         }
8795       /* Skip the variable-length character. */
8796       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8797       jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
8798       OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
8799       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8800       JUMPHERE(jump[0]);
8801       return cc + 1;
8802       }
8803     else
8804 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
8805       {
8806       GETCHARLEN(c, cc, length);
8807       }
8808     }
8809   else
8810 #endif /* SUPPORT_UNICODE */
8811     c = *cc;
8812 
8813   if (type == OP_NOT || !char_has_othercase(common, cc))
8814     {
8815     read_char(common, c, c, backtracks, READ_CHAR_UPDATE_STR_PTR);
8816     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8817     }
8818   else
8819     {
8820     oc = char_othercase(common, c);
8821     read_char(common, c < oc ? c : oc, c > oc ? c : oc, backtracks, READ_CHAR_UPDATE_STR_PTR);
8822     bit = c ^ oc;
8823     if (is_powerof2(bit))
8824       {
8825       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
8826       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
8827       }
8828     else
8829       {
8830       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8831       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
8832       }
8833     }
8834   return cc + length;
8835 
8836   case OP_CLASS:
8837   case OP_NCLASS:
8838   if (check_str_ptr)
8839     detect_partial_match(common, backtracks);
8840 
8841 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8842   bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
8843   if (type == OP_NCLASS)
8844     read_char(common, 0, bit, backtracks, READ_CHAR_UPDATE_STR_PTR);
8845   else
8846     read_char(common, 0, bit, NULL, 0);
8847 #else
8848   if (type == OP_NCLASS)
8849     read_char(common, 0, 255, backtracks, READ_CHAR_UPDATE_STR_PTR);
8850   else
8851     read_char(common, 0, 255, NULL, 0);
8852 #endif
8853 
8854   if (optimize_class(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
8855     return cc + 32 / sizeof(PCRE2_UCHAR);
8856 
8857 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8858   jump[0] = NULL;
8859   if (common->utf)
8860     {
8861     jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
8862     if (type == OP_CLASS)
8863       {
8864       add_jump(compiler, backtracks, jump[0]);
8865       jump[0] = NULL;
8866       }
8867     }
8868 #elif PCRE2_CODE_UNIT_WIDTH != 8
8869   jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
8870   if (type == OP_CLASS)
8871     {
8872     add_jump(compiler, backtracks, jump[0]);
8873     jump[0] = NULL;
8874     }
8875 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
8876 
8877   OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
8878   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
8879   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
8880   OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
8881   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
8882   add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
8883 
8884 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
8885   if (jump[0] != NULL)
8886     JUMPHERE(jump[0]);
8887 #endif
8888   return cc + 32 / sizeof(PCRE2_UCHAR);
8889 
8890 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
8891   case OP_XCLASS:
8892   if (check_str_ptr)
8893     detect_partial_match(common, backtracks);
8894   compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
8895   return cc + GET(cc, 0) - 1;
8896 #endif
8897   }
8898 SLJIT_UNREACHABLE();
8899 return cc;
8900 }
8901 
compile_charn_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,jump_list ** backtracks)8902 static SLJIT_INLINE PCRE2_SPTR compile_charn_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, jump_list **backtracks)
8903 {
8904 /* This function consumes at least one input character. */
8905 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
8906 DEFINE_COMPILER;
8907 PCRE2_SPTR ccbegin = cc;
8908 compare_context context;
8909 int size;
8910 
8911 context.length = 0;
8912 do
8913   {
8914   if (cc >= ccend)
8915     break;
8916 
8917   if (*cc == OP_CHAR)
8918     {
8919     size = 1;
8920 #ifdef SUPPORT_UNICODE
8921     if (common->utf && HAS_EXTRALEN(cc[1]))
8922       size += GET_EXTRALEN(cc[1]);
8923 #endif
8924     }
8925   else if (*cc == OP_CHARI)
8926     {
8927     size = 1;
8928 #ifdef SUPPORT_UNICODE
8929     if (common->utf)
8930       {
8931       if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
8932         size = 0;
8933       else if (HAS_EXTRALEN(cc[1]))
8934         size += GET_EXTRALEN(cc[1]);
8935       }
8936     else
8937 #endif
8938     if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
8939       size = 0;
8940     }
8941   else
8942     size = 0;
8943 
8944   cc += 1 + size;
8945   context.length += IN_UCHARS(size);
8946   }
8947 while (size > 0 && context.length <= 128);
8948 
8949 cc = ccbegin;
8950 if (context.length > 0)
8951   {
8952   /* We have a fixed-length byte sequence. */
8953   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
8954   add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
8955 
8956   context.sourcereg = -1;
8957 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
8958   context.ucharptr = 0;
8959 #endif
8960   do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
8961   return cc;
8962   }
8963 
8964 /* A non-fixed length character will be checked if length == 0. */
8965 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
8966 }
8967 
8968 /* Forward definitions. */
8969 static void compile_matchingpath(compiler_common *, PCRE2_SPTR, PCRE2_SPTR, backtrack_common *);
8970 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
8971 
8972 #define PUSH_BACKTRACK(size, ccstart, error) \
8973   do \
8974     { \
8975     backtrack = sljit_alloc_memory(compiler, (size)); \
8976     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
8977       return error; \
8978     memset(backtrack, 0, size); \
8979     backtrack->prev = parent->top; \
8980     backtrack->cc = (ccstart); \
8981     parent->top = backtrack; \
8982     } \
8983   while (0)
8984 
8985 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
8986   do \
8987     { \
8988     backtrack = sljit_alloc_memory(compiler, (size)); \
8989     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
8990       return; \
8991     memset(backtrack, 0, size); \
8992     backtrack->prev = parent->top; \
8993     backtrack->cc = (ccstart); \
8994     parent->top = backtrack; \
8995     } \
8996   while (0)
8997 
8998 #define BACKTRACK_AS(type) ((type *)backtrack)
8999 
compile_dnref_search(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks)9000 static void compile_dnref_search(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
9001 {
9002 /* The OVECTOR offset goes to TMP2. */
9003 DEFINE_COMPILER;
9004 int count = GET2(cc, 1 + IMM2_SIZE);
9005 PCRE2_SPTR slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
9006 unsigned int offset;
9007 jump_list *found = NULL;
9008 
9009 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
9010 
9011 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
9012 
9013 count--;
9014 while (count-- > 0)
9015   {
9016   offset = GET2(slot, 0) << 1;
9017   GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9018   add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9019   slot += common->name_entry_size;
9020   }
9021 
9022 offset = GET2(slot, 0) << 1;
9023 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9024 if (backtracks != NULL && !common->unset_backref)
9025   add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9026 
9027 set_jumps(found, LABEL());
9028 }
9029 
compile_ref_matchingpath(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks,BOOL withchecks,BOOL emptyfail)9030 static void compile_ref_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
9031 {
9032 DEFINE_COMPILER;
9033 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9034 int offset = 0;
9035 struct sljit_jump *jump = NULL;
9036 struct sljit_jump *partial;
9037 struct sljit_jump *nopartial;
9038 #if defined SUPPORT_UNICODE
9039 struct sljit_label *loop;
9040 struct sljit_label *caseless_loop;
9041 jump_list *no_match = NULL;
9042 int source_reg = COUNT_MATCH;
9043 int source_end_reg = ARGUMENTS;
9044 int char1_reg = STACK_LIMIT;
9045 #endif /* SUPPORT_UNICODE */
9046 
9047 if (ref)
9048   {
9049   offset = GET2(cc, 1) << 1;
9050   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9051   /* OVECTOR(1) contains the "string begin - 1" constant. */
9052   if (withchecks && !common->unset_backref)
9053     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9054   }
9055 else
9056   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9057 
9058 #if defined SUPPORT_UNICODE
9059 if (common->utf && *cc == OP_REFI)
9060   {
9061   SLJIT_ASSERT(common->iref_ptr != 0);
9062 
9063   if (ref)
9064     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9065   else
9066     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9067 
9068   if (withchecks && emptyfail)
9069     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0));
9070 
9071   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr, source_reg, 0);
9072   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw), source_end_reg, 0);
9073   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2, char1_reg, 0);
9074 
9075   OP1(SLJIT_MOV, source_reg, 0, TMP1, 0);
9076   OP1(SLJIT_MOV, source_end_reg, 0, TMP2, 0);
9077 
9078   loop = LABEL();
9079   jump = CMP(SLJIT_GREATER_EQUAL, source_reg, 0, source_end_reg, 0);
9080   partial = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
9081 
9082   /* Read original character. It must be a valid UTF character. */
9083   OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
9084   OP1(SLJIT_MOV, STR_PTR, 0, source_reg, 0);
9085 
9086   read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR | READ_CHAR_VALID_UTF);
9087 
9088   OP1(SLJIT_MOV, source_reg, 0, STR_PTR, 0);
9089   OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
9090   OP1(SLJIT_MOV, char1_reg, 0, TMP1, 0);
9091 
9092   /* Read second character. */
9093   read_char(common, 0, READ_CHAR_MAX, &no_match, READ_CHAR_UPDATE_STR_PTR);
9094 
9095   CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9096 
9097   OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
9098 
9099   add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
9100 
9101   OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
9102   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
9103   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
9104 
9105   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records));
9106 
9107   OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, other_case));
9108   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, caseset));
9109   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
9110   CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9111 
9112   add_jump(compiler, &no_match, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9113   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
9114   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_caseless_sets));
9115 
9116   caseless_loop = LABEL();
9117   OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9118   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(uint32_t));
9119   OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, char1_reg, 0);
9120   JUMPTO(SLJIT_EQUAL, loop);
9121   JUMPTO(SLJIT_LESS, caseless_loop);
9122 
9123   set_jumps(no_match, LABEL());
9124   if (common->mode == PCRE2_JIT_COMPLETE)
9125     JUMPHERE(partial);
9126 
9127   OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9128   OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9129   OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9130   add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9131 
9132   if (common->mode != PCRE2_JIT_COMPLETE)
9133     {
9134     JUMPHERE(partial);
9135     OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9136     OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9137     OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9138 
9139     check_partial(common, FALSE);
9140     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9141     }
9142 
9143   JUMPHERE(jump);
9144   OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9145   OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9146   OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9147   return;
9148   }
9149 else
9150 #endif /* SUPPORT_UNICODE */
9151   {
9152   if (ref)
9153     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
9154   else
9155     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
9156 
9157   if (withchecks)
9158     jump = JUMP(SLJIT_ZERO);
9159 
9160   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
9161   partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
9162   if (common->mode == PCRE2_JIT_COMPLETE)
9163     add_jump(compiler, backtracks, partial);
9164 
9165   add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9166   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9167 
9168   if (common->mode != PCRE2_JIT_COMPLETE)
9169     {
9170     nopartial = JUMP(SLJIT_JUMP);
9171     JUMPHERE(partial);
9172     /* TMP2 -= STR_END - STR_PTR */
9173     OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
9174     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
9175     partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
9176     OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
9177     add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9178     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9179     JUMPHERE(partial);
9180     check_partial(common, FALSE);
9181     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9182     JUMPHERE(nopartial);
9183     }
9184   }
9185 
9186 if (jump != NULL)
9187   {
9188   if (emptyfail)
9189     add_jump(compiler, backtracks, jump);
9190   else
9191     JUMPHERE(jump);
9192   }
9193 }
9194 
compile_ref_iterator_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9195 static SLJIT_INLINE PCRE2_SPTR compile_ref_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9196 {
9197 DEFINE_COMPILER;
9198 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9199 backtrack_common *backtrack;
9200 PCRE2_UCHAR type;
9201 int offset = 0;
9202 struct sljit_label *label;
9203 struct sljit_jump *zerolength;
9204 struct sljit_jump *jump = NULL;
9205 PCRE2_SPTR ccbegin = cc;
9206 int min = 0, max = 0;
9207 BOOL minimize;
9208 
9209 PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
9210 
9211 if (ref)
9212   offset = GET2(cc, 1) << 1;
9213 else
9214   cc += IMM2_SIZE;
9215 type = cc[1 + IMM2_SIZE];
9216 
9217 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
9218 minimize = (type & 0x1) != 0;
9219 switch(type)
9220   {
9221   case OP_CRSTAR:
9222   case OP_CRMINSTAR:
9223   min = 0;
9224   max = 0;
9225   cc += 1 + IMM2_SIZE + 1;
9226   break;
9227   case OP_CRPLUS:
9228   case OP_CRMINPLUS:
9229   min = 1;
9230   max = 0;
9231   cc += 1 + IMM2_SIZE + 1;
9232   break;
9233   case OP_CRQUERY:
9234   case OP_CRMINQUERY:
9235   min = 0;
9236   max = 1;
9237   cc += 1 + IMM2_SIZE + 1;
9238   break;
9239   case OP_CRRANGE:
9240   case OP_CRMINRANGE:
9241   min = GET2(cc, 1 + IMM2_SIZE + 1);
9242   max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
9243   cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
9244   break;
9245   default:
9246   SLJIT_UNREACHABLE();
9247   break;
9248   }
9249 
9250 if (!minimize)
9251   {
9252   if (min == 0)
9253     {
9254     allocate_stack(common, 2);
9255     if (ref)
9256       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9257     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9258     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9259     /* Temporary release of STR_PTR. */
9260     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9261     /* Handles both invalid and empty cases. Since the minimum repeat,
9262     is zero the invalid case is basically the same as an empty case. */
9263     if (ref)
9264       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9265     else
9266       {
9267       compile_dnref_search(common, ccbegin, NULL);
9268       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9269       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9270       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9271       }
9272     /* Restore if not zero length. */
9273     OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9274     }
9275   else
9276     {
9277     allocate_stack(common, 1);
9278     if (ref)
9279       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9280     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9281     if (ref)
9282       {
9283       add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9284       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9285       }
9286     else
9287       {
9288       compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
9289       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9290       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9291       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9292       }
9293     }
9294 
9295   if (min > 1 || max > 1)
9296     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
9297 
9298   label = LABEL();
9299   if (!ref)
9300     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
9301   compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
9302 
9303   if (min > 1 || max > 1)
9304     {
9305     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
9306     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9307     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
9308     if (min > 1)
9309       CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
9310     if (max > 1)
9311       {
9312       jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
9313       allocate_stack(common, 1);
9314       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9315       JUMPTO(SLJIT_JUMP, label);
9316       JUMPHERE(jump);
9317       }
9318     }
9319 
9320   if (max == 0)
9321     {
9322     /* Includes min > 1 case as well. */
9323     allocate_stack(common, 1);
9324     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9325     JUMPTO(SLJIT_JUMP, label);
9326     }
9327 
9328   JUMPHERE(zerolength);
9329   BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9330 
9331   count_match(common);
9332   return cc;
9333   }
9334 
9335 allocate_stack(common, ref ? 2 : 3);
9336 if (ref)
9337   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9338 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9339 if (type != OP_CRMINSTAR)
9340   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9341 
9342 if (min == 0)
9343   {
9344   /* Handles both invalid and empty cases. Since the minimum repeat,
9345   is zero the invalid case is basically the same as an empty case. */
9346   if (ref)
9347     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9348   else
9349     {
9350     compile_dnref_search(common, ccbegin, NULL);
9351     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9352     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9353     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9354     }
9355   /* Length is non-zero, we can match real repeats. */
9356   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9357   jump = JUMP(SLJIT_JUMP);
9358   }
9359 else
9360   {
9361   if (ref)
9362     {
9363     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9364     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9365     }
9366   else
9367     {
9368     compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
9369     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9370     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9371     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9372     }
9373   }
9374 
9375 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9376 if (max > 0)
9377   add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
9378 
9379 if (!ref)
9380   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
9381 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
9382 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9383 
9384 if (min > 1)
9385   {
9386   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9387   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9388   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9389   CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
9390   }
9391 else if (max > 0)
9392   OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
9393 
9394 if (jump != NULL)
9395   JUMPHERE(jump);
9396 JUMPHERE(zerolength);
9397 
9398 count_match(common);
9399 return cc;
9400 }
9401 
compile_recurse_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9402 static SLJIT_INLINE PCRE2_SPTR compile_recurse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9403 {
9404 DEFINE_COMPILER;
9405 backtrack_common *backtrack;
9406 recurse_entry *entry = common->entries;
9407 recurse_entry *prev = NULL;
9408 sljit_sw start = GET(cc, 1);
9409 PCRE2_SPTR start_cc;
9410 BOOL needs_control_head;
9411 
9412 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
9413 
9414 /* Inlining simple patterns. */
9415 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
9416   {
9417   start_cc = common->start + start;
9418   compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
9419   BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
9420   return cc + 1 + LINK_SIZE;
9421   }
9422 
9423 while (entry != NULL)
9424   {
9425   if (entry->start == start)
9426     break;
9427   prev = entry;
9428   entry = entry->next;
9429   }
9430 
9431 if (entry == NULL)
9432   {
9433   entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
9434   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9435     return NULL;
9436   entry->next = NULL;
9437   entry->entry_label = NULL;
9438   entry->backtrack_label = NULL;
9439   entry->entry_calls = NULL;
9440   entry->backtrack_calls = NULL;
9441   entry->start = start;
9442 
9443   if (prev != NULL)
9444     prev->next = entry;
9445   else
9446     common->entries = entry;
9447   }
9448 
9449 BACKTRACK_AS(recurse_backtrack)->entry = entry;
9450 
9451 if (entry->entry_label == NULL)
9452   add_jump(compiler, &entry->entry_calls, JUMP(SLJIT_FAST_CALL));
9453 else
9454   JUMPTO(SLJIT_FAST_CALL, entry->entry_label);
9455 /* Leave if the match is failed. */
9456 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
9457 BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL();
9458 return cc + 1 + LINK_SIZE;
9459 }
9460 
do_callout(struct jit_arguments * arguments,pcre2_callout_block * callout_block,PCRE2_SPTR * jit_ovector)9461 static sljit_s32 SLJIT_FUNC do_callout(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
9462 {
9463 PCRE2_SPTR begin;
9464 PCRE2_SIZE *ovector;
9465 sljit_u32 oveccount, capture_top;
9466 
9467 if (arguments->callout == NULL)
9468   return 0;
9469 
9470 SLJIT_COMPILE_ASSERT(sizeof (PCRE2_SIZE) <= sizeof (sljit_sw), pcre2_size_must_be_lower_than_sljit_sw_size);
9471 
9472 begin = arguments->begin;
9473 ovector = (PCRE2_SIZE*)(callout_block + 1);
9474 oveccount = callout_block->capture_top;
9475 
9476 SLJIT_ASSERT(oveccount >= 1);
9477 
9478 callout_block->version = 2;
9479 callout_block->callout_flags = 0;
9480 
9481 /* Offsets in subject. */
9482 callout_block->subject_length = arguments->end - arguments->begin;
9483 callout_block->start_match = jit_ovector[0] - begin;
9484 callout_block->current_position = (PCRE2_SPTR)callout_block->offset_vector - begin;
9485 callout_block->subject = begin;
9486 
9487 /* Convert and copy the JIT offset vector to the ovector array. */
9488 callout_block->capture_top = 1;
9489 callout_block->offset_vector = ovector;
9490 
9491 ovector[0] = PCRE2_UNSET;
9492 ovector[1] = PCRE2_UNSET;
9493 ovector += 2;
9494 jit_ovector += 2;
9495 capture_top = 1;
9496 
9497 /* Convert pointers to sizes. */
9498 while (--oveccount != 0)
9499   {
9500   capture_top++;
9501 
9502   ovector[0] = (PCRE2_SIZE)(jit_ovector[0] - begin);
9503   ovector[1] = (PCRE2_SIZE)(jit_ovector[1] - begin);
9504 
9505   if (ovector[0] != PCRE2_UNSET)
9506     callout_block->capture_top = capture_top;
9507 
9508   ovector += 2;
9509   jit_ovector += 2;
9510   }
9511 
9512 return (arguments->callout)(callout_block, arguments->callout_data);
9513 }
9514 
9515 #define CALLOUT_ARG_OFFSET(arg) \
9516     SLJIT_OFFSETOF(pcre2_callout_block, arg)
9517 
compile_callout_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9518 static SLJIT_INLINE PCRE2_SPTR compile_callout_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9519 {
9520 DEFINE_COMPILER;
9521 backtrack_common *backtrack;
9522 sljit_s32 mov_opcode;
9523 unsigned int callout_length = (*cc == OP_CALLOUT)
9524     ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);
9525 sljit_sw value1;
9526 sljit_sw value2;
9527 sljit_sw value3;
9528 sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * sizeof(sljit_sw);
9529 
9530 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
9531 
9532 callout_arg_size = (sizeof(pcre2_callout_block) + callout_arg_size + sizeof(sljit_sw) - 1) / sizeof(sljit_sw);
9533 
9534 allocate_stack(common, callout_arg_size);
9535 
9536 SLJIT_ASSERT(common->capture_last_ptr != 0);
9537 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
9538 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9539 value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0;
9540 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1);
9541 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
9542 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_top), SLJIT_IMM, common->re->top_bracket + 1);
9543 
9544 /* These pointer sized fields temporarly stores internal variables. */
9545 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
9546 
9547 if (common->mark_ptr != 0)
9548   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
9549 mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
9550 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 1));
9551 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 1 + LINK_SIZE));
9552 
9553 if (*cc == OP_CALLOUT)
9554   {
9555   value1 = 0;
9556   value2 = 0;
9557   value3 = 0;
9558   }
9559 else
9560   {
9561   value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1);
9562   value2 = (callout_length - (1 + 4*LINK_SIZE + 2));
9563   value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE));
9564   }
9565 
9566 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);
9567 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);
9568 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3);
9569 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
9570 
9571 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
9572 
9573 /* Needed to save important temporary registers. */
9574 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
9575 /* SLJIT_R0 = arguments */
9576 OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
9577 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
9578 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(S32) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
9579 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
9580 free_stack(common, callout_arg_size);
9581 
9582 /* Check return value. */
9583 OP2(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
9584 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER));
9585 if (common->abort_label == NULL)
9586   add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL) /* SIG_LESS */);
9587 else
9588   JUMPTO(SLJIT_NOT_EQUAL /* SIG_LESS */, common->abort_label);
9589 return cc + callout_length;
9590 }
9591 
9592 #undef CALLOUT_ARG_SIZE
9593 #undef CALLOUT_ARG_OFFSET
9594 
assert_needs_str_ptr_saving(PCRE2_SPTR cc)9595 static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(PCRE2_SPTR cc)
9596 {
9597 while (TRUE)
9598   {
9599   switch (*cc)
9600     {
9601     case OP_CALLOUT_STR:
9602     cc += GET(cc, 1 + 2*LINK_SIZE);
9603     break;
9604 
9605     case OP_NOT_WORD_BOUNDARY:
9606     case OP_WORD_BOUNDARY:
9607     case OP_CIRC:
9608     case OP_CIRCM:
9609     case OP_DOLL:
9610     case OP_DOLLM:
9611     case OP_CALLOUT:
9612     case OP_ALT:
9613     cc += PRIV(OP_lengths)[*cc];
9614     break;
9615 
9616     case OP_KET:
9617     return FALSE;
9618 
9619     default:
9620     return TRUE;
9621     }
9622   }
9623 }
9624 
compile_assert_matchingpath(compiler_common * common,PCRE2_SPTR cc,assert_backtrack * backtrack,BOOL conditional)9625 static PCRE2_SPTR compile_assert_matchingpath(compiler_common *common, PCRE2_SPTR cc, assert_backtrack *backtrack, BOOL conditional)
9626 {
9627 DEFINE_COMPILER;
9628 int framesize;
9629 int extrasize;
9630 BOOL local_quit_available = FALSE;
9631 BOOL needs_control_head;
9632 int private_data_ptr;
9633 backtrack_common altbacktrack;
9634 PCRE2_SPTR ccbegin;
9635 PCRE2_UCHAR opcode;
9636 PCRE2_UCHAR bra = OP_BRA;
9637 jump_list *tmp = NULL;
9638 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
9639 jump_list **found;
9640 /* Saving previous accept variables. */
9641 BOOL save_local_quit_available = common->local_quit_available;
9642 BOOL save_in_positive_assertion = common->in_positive_assertion;
9643 then_trap_backtrack *save_then_trap = common->then_trap;
9644 struct sljit_label *save_quit_label = common->quit_label;
9645 struct sljit_label *save_accept_label = common->accept_label;
9646 jump_list *save_quit = common->quit;
9647 jump_list *save_positive_assertion_quit = common->positive_assertion_quit;
9648 jump_list *save_accept = common->accept;
9649 struct sljit_jump *jump;
9650 struct sljit_jump *brajump = NULL;
9651 
9652 /* Assert captures then. */
9653 common->then_trap = NULL;
9654 
9655 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
9656   {
9657   SLJIT_ASSERT(!conditional);
9658   bra = *cc;
9659   cc++;
9660   }
9661 private_data_ptr = PRIVATE_DATA(cc);
9662 SLJIT_ASSERT(private_data_ptr != 0);
9663 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
9664 backtrack->framesize = framesize;
9665 backtrack->private_data_ptr = private_data_ptr;
9666 opcode = *cc;
9667 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
9668 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
9669 ccbegin = cc;
9670 cc += GET(cc, 1);
9671 
9672 if (bra == OP_BRAMINZERO)
9673   {
9674   /* This is a braminzero backtrack path. */
9675   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9676   free_stack(common, 1);
9677   brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9678   }
9679 
9680 if (framesize < 0)
9681   {
9682   extrasize = 1;
9683   if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
9684     extrasize = 0;
9685 
9686   if (needs_control_head)
9687     extrasize++;
9688 
9689   if (framesize == no_frame)
9690     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
9691 
9692   if (extrasize > 0)
9693     allocate_stack(common, extrasize);
9694 
9695   if (needs_control_head)
9696     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9697 
9698   if (extrasize > 0)
9699     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9700 
9701   if (needs_control_head)
9702     {
9703     SLJIT_ASSERT(extrasize == 2);
9704     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9705     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9706     }
9707   }
9708 else
9709   {
9710   extrasize = needs_control_head ? 3 : 2;
9711   allocate_stack(common, framesize + extrasize);
9712 
9713   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9714   OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
9715   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
9716   if (needs_control_head)
9717     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9718   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9719 
9720   if (needs_control_head)
9721     {
9722     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
9723     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
9724     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9725     }
9726   else
9727     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9728 
9729   init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize);
9730   }
9731 
9732 memset(&altbacktrack, 0, sizeof(backtrack_common));
9733 if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT))
9734   {
9735   /* Control verbs cannot escape from these asserts. */
9736   local_quit_available = TRUE;
9737   common->local_quit_available = TRUE;
9738   common->quit_label = NULL;
9739   common->quit = NULL;
9740   }
9741 
9742 common->in_positive_assertion = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK);
9743 common->positive_assertion_quit = NULL;
9744 
9745 while (1)
9746   {
9747   common->accept_label = NULL;
9748   common->accept = NULL;
9749   altbacktrack.top = NULL;
9750   altbacktrack.topbacktracks = NULL;
9751 
9752   if (*ccbegin == OP_ALT && extrasize > 0)
9753     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9754 
9755   altbacktrack.cc = ccbegin;
9756   compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
9757   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9758     {
9759     if (local_quit_available)
9760       {
9761       common->local_quit_available = save_local_quit_available;
9762       common->quit_label = save_quit_label;
9763       common->quit = save_quit;
9764       }
9765     common->in_positive_assertion = save_in_positive_assertion;
9766     common->then_trap = save_then_trap;
9767     common->accept_label = save_accept_label;
9768     common->positive_assertion_quit = save_positive_assertion_quit;
9769     common->accept = save_accept;
9770     return NULL;
9771     }
9772   common->accept_label = LABEL();
9773   if (common->accept != NULL)
9774     set_jumps(common->accept, common->accept_label);
9775 
9776   /* Reset stack. */
9777   if (framesize < 0)
9778     {
9779     if (framesize == no_frame)
9780       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9781     else if (extrasize > 0)
9782       free_stack(common, extrasize);
9783 
9784     if (needs_control_head)
9785       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
9786     }
9787   else
9788     {
9789     if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
9790       {
9791       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
9792       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
9793       if (needs_control_head)
9794         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
9795       }
9796     else
9797       {
9798       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9799       if (needs_control_head)
9800         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));
9801       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9802       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
9803       }
9804     }
9805 
9806   if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
9807     {
9808     /* We know that STR_PTR was stored on the top of the stack. */
9809     if (conditional)
9810       {
9811       if (extrasize > 0)
9812         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? STACK(-2) : STACK(-1));
9813       }
9814     else if (bra == OP_BRAZERO)
9815       {
9816       if (framesize < 0)
9817         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
9818       else
9819         {
9820         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
9821         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize));
9822         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9823         }
9824       OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9825       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9826       }
9827     else if (framesize >= 0)
9828       {
9829       /* For OP_BRA and OP_BRAMINZERO. */
9830       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
9831       }
9832     }
9833   add_jump(compiler, found, JUMP(SLJIT_JUMP));
9834 
9835   compile_backtrackingpath(common, altbacktrack.top);
9836   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9837     {
9838     if (local_quit_available)
9839       {
9840       common->local_quit_available = save_local_quit_available;
9841       common->quit_label = save_quit_label;
9842       common->quit = save_quit;
9843       }
9844     common->in_positive_assertion = save_in_positive_assertion;
9845     common->then_trap = save_then_trap;
9846     common->accept_label = save_accept_label;
9847     common->positive_assertion_quit = save_positive_assertion_quit;
9848     common->accept = save_accept;
9849     return NULL;
9850     }
9851   set_jumps(altbacktrack.topbacktracks, LABEL());
9852 
9853   if (*cc != OP_ALT)
9854     break;
9855 
9856   ccbegin = cc;
9857   cc += GET(cc, 1);
9858   }
9859 
9860 if (local_quit_available)
9861   {
9862   SLJIT_ASSERT(common->positive_assertion_quit == NULL);
9863   /* Makes the check less complicated below. */
9864   common->positive_assertion_quit = common->quit;
9865   }
9866 
9867 /* None of them matched. */
9868 if (common->positive_assertion_quit != NULL)
9869   {
9870   jump = JUMP(SLJIT_JUMP);
9871   set_jumps(common->positive_assertion_quit, LABEL());
9872   SLJIT_ASSERT(framesize != no_stack);
9873   if (framesize < 0)
9874     OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
9875   else
9876     {
9877     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9878     add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9879     OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (extrasize + 1) * sizeof(sljit_sw));
9880     }
9881   JUMPHERE(jump);
9882   }
9883 
9884 if (needs_control_head)
9885   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
9886 
9887 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
9888   {
9889   /* Assert is failed. */
9890   if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
9891     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9892 
9893   if (framesize < 0)
9894     {
9895     /* The topmost item should be 0. */
9896     if (bra == OP_BRAZERO)
9897       {
9898       if (extrasize == 2)
9899         free_stack(common, 1);
9900       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9901       }
9902     else if (extrasize > 0)
9903       free_stack(common, extrasize);
9904     }
9905   else
9906     {
9907     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
9908     /* The topmost item should be 0. */
9909     if (bra == OP_BRAZERO)
9910       {
9911       free_stack(common, framesize + extrasize - 1);
9912       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9913       }
9914     else
9915       free_stack(common, framesize + extrasize);
9916     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9917     }
9918   jump = JUMP(SLJIT_JUMP);
9919   if (bra != OP_BRAZERO)
9920     add_jump(compiler, target, jump);
9921 
9922   /* Assert is successful. */
9923   set_jumps(tmp, LABEL());
9924   if (framesize < 0)
9925     {
9926     /* We know that STR_PTR was stored on the top of the stack. */
9927     if (extrasize > 0)
9928       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
9929 
9930     /* Keep the STR_PTR on the top of the stack. */
9931     if (bra == OP_BRAZERO)
9932       {
9933       OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9934       if (extrasize == 2)
9935         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9936       }
9937     else if (bra == OP_BRAMINZERO)
9938       {
9939       OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9940       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9941       }
9942     }
9943   else
9944     {
9945     if (bra == OP_BRA)
9946       {
9947       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
9948       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
9949       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
9950       }
9951     else
9952       {
9953       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
9954       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
9955       if (extrasize == 2)
9956         {
9957         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9958         if (bra == OP_BRAMINZERO)
9959           OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9960         }
9961       else
9962         {
9963         SLJIT_ASSERT(extrasize == 3);
9964         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
9965         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
9966         }
9967       }
9968     }
9969 
9970   if (bra == OP_BRAZERO)
9971     {
9972     backtrack->matchingpath = LABEL();
9973     SET_LABEL(jump, backtrack->matchingpath);
9974     }
9975   else if (bra == OP_BRAMINZERO)
9976     {
9977     JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
9978     JUMPHERE(brajump);
9979     if (framesize >= 0)
9980       {
9981       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9982       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9983       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
9984       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
9985       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9986       }
9987     set_jumps(backtrack->common.topbacktracks, LABEL());
9988     }
9989   }
9990 else
9991   {
9992   /* AssertNot is successful. */
9993   if (framesize < 0)
9994     {
9995     if (extrasize > 0)
9996       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9997 
9998     if (bra != OP_BRA)
9999       {
10000       if (extrasize == 2)
10001         free_stack(common, 1);
10002       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10003       }
10004     else if (extrasize > 0)
10005       free_stack(common, extrasize);
10006     }
10007   else
10008     {
10009     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10010     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
10011     /* The topmost item should be 0. */
10012     if (bra != OP_BRA)
10013       {
10014       free_stack(common, framesize + extrasize - 1);
10015       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10016       }
10017     else
10018       free_stack(common, framesize + extrasize);
10019     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10020     }
10021 
10022   if (bra == OP_BRAZERO)
10023     backtrack->matchingpath = LABEL();
10024   else if (bra == OP_BRAMINZERO)
10025     {
10026     JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
10027     JUMPHERE(brajump);
10028     }
10029 
10030   if (bra != OP_BRA)
10031     {
10032     SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
10033     set_jumps(backtrack->common.topbacktracks, LABEL());
10034     backtrack->common.topbacktracks = NULL;
10035     }
10036   }
10037 
10038 if (local_quit_available)
10039   {
10040   common->local_quit_available = save_local_quit_available;
10041   common->quit_label = save_quit_label;
10042   common->quit = save_quit;
10043   }
10044 common->in_positive_assertion = save_in_positive_assertion;
10045 common->then_trap = save_then_trap;
10046 common->accept_label = save_accept_label;
10047 common->positive_assertion_quit = save_positive_assertion_quit;
10048 common->accept = save_accept;
10049 return cc + 1 + LINK_SIZE;
10050 }
10051 
match_once_common(compiler_common * common,PCRE2_UCHAR ket,int framesize,int private_data_ptr,BOOL has_alternatives,BOOL needs_control_head)10052 static SLJIT_INLINE void match_once_common(compiler_common *common, PCRE2_UCHAR ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
10053 {
10054 DEFINE_COMPILER;
10055 int stacksize;
10056 
10057 if (framesize < 0)
10058   {
10059   if (framesize == no_frame)
10060     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10061   else
10062     {
10063     stacksize = needs_control_head ? 1 : 0;
10064     if (ket != OP_KET || has_alternatives)
10065       stacksize++;
10066 
10067     if (stacksize > 0)
10068       free_stack(common, stacksize);
10069     }
10070 
10071   if (needs_control_head)
10072     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1));
10073 
10074   /* TMP2 which is set here used by OP_KETRMAX below. */
10075   if (ket == OP_KETRMAX)
10076     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10077   else if (ket == OP_KETRMIN)
10078     {
10079     /* Move the STR_PTR to the private_data_ptr. */
10080     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10081     }
10082   }
10083 else
10084   {
10085   stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
10086   OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
10087   if (needs_control_head)
10088     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10089 
10090   if (ket == OP_KETRMAX)
10091     {
10092     /* TMP2 which is set here used by OP_KETRMAX below. */
10093     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10094     }
10095   }
10096 if (needs_control_head)
10097   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
10098 }
10099 
match_capture_common(compiler_common * common,int stacksize,int offset,int private_data_ptr)10100 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
10101 {
10102 DEFINE_COMPILER;
10103 
10104 if (common->capture_last_ptr != 0)
10105   {
10106   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
10107   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10108   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10109   stacksize++;
10110   }
10111 if (common->optimized_cbracket[offset >> 1] == 0)
10112   {
10113   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
10114   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
10115   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10116   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10117   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
10118   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10119   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10120   stacksize += 2;
10121   }
10122 return stacksize;
10123 }
10124 
do_script_run(PCRE2_SPTR ptr,PCRE2_SPTR endptr)10125 static PCRE2_SPTR SLJIT_FUNC do_script_run(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10126 {
10127   if (PRIV(script_run)(ptr, endptr, FALSE))
10128     return endptr;
10129   return NULL;
10130 }
10131 
10132 #ifdef SUPPORT_UNICODE
10133 
do_script_run_utf(PCRE2_SPTR ptr,PCRE2_SPTR endptr)10134 static PCRE2_SPTR SLJIT_FUNC do_script_run_utf(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10135 {
10136   if (PRIV(script_run)(ptr, endptr, TRUE))
10137     return endptr;
10138   return NULL;
10139 }
10140 
10141 #endif /* SUPPORT_UNICODE */
10142 
match_script_run_common(compiler_common * common,int private_data_ptr,backtrack_common * parent)10143 static SLJIT_INLINE void match_script_run_common(compiler_common *common, int private_data_ptr, backtrack_common *parent)
10144 {
10145 DEFINE_COMPILER;
10146 
10147 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
10148 
10149 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10150 #ifdef SUPPORT_UNICODE
10151 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
10152   common->utf ? SLJIT_FUNC_OFFSET(do_script_run_utf) : SLJIT_FUNC_OFFSET(do_script_run));
10153 #else
10154 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_script_run));
10155 #endif
10156 
10157 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
10158 add_jump(compiler, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
10159 }
10160 
10161 /*
10162   Handling bracketed expressions is probably the most complex part.
10163 
10164   Stack layout naming characters:
10165     S - Push the current STR_PTR
10166     0 - Push a 0 (NULL)
10167     A - Push the current STR_PTR. Needed for restoring the STR_PTR
10168         before the next alternative. Not pushed if there are no alternatives.
10169     M - Any values pushed by the current alternative. Can be empty, or anything.
10170     C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
10171     L - Push the previous local (pointed by localptr) to the stack
10172    () - opional values stored on the stack
10173   ()* - optonal, can be stored multiple times
10174 
10175   The following list shows the regular expression templates, their PCRE byte codes
10176   and stack layout supported by pcre-sljit.
10177 
10178   (?:)                     OP_BRA     | OP_KET                A M
10179   ()                       OP_CBRA    | OP_KET                C M
10180   (?:)+                    OP_BRA     | OP_KETRMAX        0   A M S   ( A M S )*
10181                            OP_SBRA    | OP_KETRMAX        0   L M S   ( L M S )*
10182   (?:)+?                   OP_BRA     | OP_KETRMIN        0   A M S   ( A M S )*
10183                            OP_SBRA    | OP_KETRMIN        0   L M S   ( L M S )*
10184   ()+                      OP_CBRA    | OP_KETRMAX        0   C M S   ( C M S )*
10185                            OP_SCBRA   | OP_KETRMAX        0   C M S   ( C M S )*
10186   ()+?                     OP_CBRA    | OP_KETRMIN        0   C M S   ( C M S )*
10187                            OP_SCBRA   | OP_KETRMIN        0   C M S   ( C M S )*
10188   (?:)?    OP_BRAZERO    | OP_BRA     | OP_KET            S ( A M 0 )
10189   (?:)??   OP_BRAMINZERO | OP_BRA     | OP_KET            S ( A M 0 )
10190   ()?      OP_BRAZERO    | OP_CBRA    | OP_KET            S ( C M 0 )
10191   ()??     OP_BRAMINZERO | OP_CBRA    | OP_KET            S ( C M 0 )
10192   (?:)*    OP_BRAZERO    | OP_BRA     | OP_KETRMAX      S 0 ( A M S )*
10193            OP_BRAZERO    | OP_SBRA    | OP_KETRMAX      S 0 ( L M S )*
10194   (?:)*?   OP_BRAMINZERO | OP_BRA     | OP_KETRMIN      S 0 ( A M S )*
10195            OP_BRAMINZERO | OP_SBRA    | OP_KETRMIN      S 0 ( L M S )*
10196   ()*      OP_BRAZERO    | OP_CBRA    | OP_KETRMAX      S 0 ( C M S )*
10197            OP_BRAZERO    | OP_SCBRA   | OP_KETRMAX      S 0 ( C M S )*
10198   ()*?     OP_BRAMINZERO | OP_CBRA    | OP_KETRMIN      S 0 ( C M S )*
10199            OP_BRAMINZERO | OP_SCBRA   | OP_KETRMIN      S 0 ( C M S )*
10200 
10201 
10202   Stack layout naming characters:
10203     A - Push the alternative index (starting from 0) on the stack.
10204         Not pushed if there is no alternatives.
10205     M - Any values pushed by the current alternative. Can be empty, or anything.
10206 
10207   The next list shows the possible content of a bracket:
10208   (|)     OP_*BRA    | OP_ALT ...         M A
10209   (?()|)  OP_*COND   | OP_ALT             M A
10210   (?>|)   OP_ONCE    | OP_ALT ...         [stack trace] M A
10211                                           Or nothing, if trace is unnecessary
10212 */
10213 
compile_bracket_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)10214 static PCRE2_SPTR compile_bracket_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10215 {
10216 DEFINE_COMPILER;
10217 backtrack_common *backtrack;
10218 PCRE2_UCHAR opcode;
10219 int private_data_ptr = 0;
10220 int offset = 0;
10221 int i, stacksize;
10222 int repeat_ptr = 0, repeat_length = 0;
10223 int repeat_type = 0, repeat_count = 0;
10224 PCRE2_SPTR ccbegin;
10225 PCRE2_SPTR matchingpath;
10226 PCRE2_SPTR slot;
10227 PCRE2_UCHAR bra = OP_BRA;
10228 PCRE2_UCHAR ket;
10229 assert_backtrack *assert;
10230 BOOL has_alternatives;
10231 BOOL needs_control_head = FALSE;
10232 struct sljit_jump *jump;
10233 struct sljit_jump *skip;
10234 struct sljit_label *rmax_label = NULL;
10235 struct sljit_jump *braminzero = NULL;
10236 
10237 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
10238 
10239 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
10240   {
10241   bra = *cc;
10242   cc++;
10243   opcode = *cc;
10244   }
10245 
10246 opcode = *cc;
10247 ccbegin = cc;
10248 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
10249 ket = *matchingpath;
10250 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
10251   {
10252   repeat_ptr = PRIVATE_DATA(matchingpath);
10253   repeat_length = PRIVATE_DATA(matchingpath + 1);
10254   repeat_type = PRIVATE_DATA(matchingpath + 2);
10255   repeat_count = PRIVATE_DATA(matchingpath + 3);
10256   SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
10257   if (repeat_type == OP_UPTO)
10258     ket = OP_KETRMAX;
10259   if (repeat_type == OP_MINUPTO)
10260     ket = OP_KETRMIN;
10261   }
10262 
10263 matchingpath = ccbegin + 1 + LINK_SIZE;
10264 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
10265 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
10266 cc += GET(cc, 1);
10267 
10268 has_alternatives = *cc == OP_ALT;
10269 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
10270   {
10271   SLJIT_COMPILE_ASSERT(OP_DNRREF == OP_RREF + 1 && OP_FALSE == OP_RREF + 2 && OP_TRUE == OP_RREF + 3,
10272     compile_time_checks_must_be_grouped_together);
10273   has_alternatives = ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL) ? FALSE : TRUE;
10274   }
10275 
10276 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
10277   opcode = OP_SCOND;
10278 
10279 if (opcode == OP_CBRA || opcode == OP_SCBRA)
10280   {
10281   /* Capturing brackets has a pre-allocated space. */
10282   offset = GET2(ccbegin, 1 + LINK_SIZE);
10283   if (common->optimized_cbracket[offset] == 0)
10284     {
10285     private_data_ptr = OVECTOR_PRIV(offset);
10286     offset <<= 1;
10287     }
10288   else
10289     {
10290     offset <<= 1;
10291     private_data_ptr = OVECTOR(offset);
10292     }
10293   BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10294   matchingpath += IMM2_SIZE;
10295   }
10296 else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_ONCE || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
10297   {
10298   /* Other brackets simply allocate the next entry. */
10299   private_data_ptr = PRIVATE_DATA(ccbegin);
10300   SLJIT_ASSERT(private_data_ptr != 0);
10301   BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10302   if (opcode == OP_ONCE)
10303     BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
10304   }
10305 
10306 /* Instructions before the first alternative. */
10307 stacksize = 0;
10308 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10309   stacksize++;
10310 if (bra == OP_BRAZERO)
10311   stacksize++;
10312 
10313 if (stacksize > 0)
10314   allocate_stack(common, stacksize);
10315 
10316 stacksize = 0;
10317 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10318   {
10319   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10320   stacksize++;
10321   }
10322 
10323 if (bra == OP_BRAZERO)
10324   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10325 
10326 if (bra == OP_BRAMINZERO)
10327   {
10328   /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
10329   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10330   if (ket != OP_KETRMIN)
10331     {
10332     free_stack(common, 1);
10333     braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10334     }
10335   else if (opcode == OP_ONCE || opcode >= OP_SBRA)
10336     {
10337     jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10338     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10339     /* Nothing stored during the first run. */
10340     skip = JUMP(SLJIT_JUMP);
10341     JUMPHERE(jump);
10342     /* Checking zero-length iteration. */
10343     if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10344       {
10345       /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
10346       braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10347       }
10348     else
10349       {
10350       /* Except when the whole stack frame must be saved. */
10351       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10352       braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));
10353       }
10354     JUMPHERE(skip);
10355     }
10356   else
10357     {
10358     jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10359     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10360     JUMPHERE(jump);
10361     }
10362   }
10363 
10364 if (repeat_type != 0)
10365   {
10366   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
10367   if (repeat_type == OP_EXACT)
10368     rmax_label = LABEL();
10369   }
10370 
10371 if (ket == OP_KETRMIN)
10372   BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10373 
10374 if (ket == OP_KETRMAX)
10375   {
10376   rmax_label = LABEL();
10377   if (has_alternatives && opcode >= OP_BRA && opcode < OP_SBRA && repeat_type == 0)
10378     BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
10379   }
10380 
10381 /* Handling capturing brackets and alternatives. */
10382 if (opcode == OP_ONCE)
10383   {
10384   stacksize = 0;
10385   if (needs_control_head)
10386     {
10387     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10388     stacksize++;
10389     }
10390 
10391   if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10392     {
10393     /* Neither capturing brackets nor recursions are found in the block. */
10394     if (ket == OP_KETRMIN)
10395       {
10396       stacksize += 2;
10397       if (!needs_control_head)
10398         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10399       }
10400     else
10401       {
10402       if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
10403         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10404       if (ket == OP_KETRMAX || has_alternatives)
10405         stacksize++;
10406       }
10407 
10408     if (stacksize > 0)
10409       allocate_stack(common, stacksize);
10410 
10411     stacksize = 0;
10412     if (needs_control_head)
10413       {
10414       stacksize++;
10415       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10416       }
10417 
10418     if (ket == OP_KETRMIN)
10419       {
10420       if (needs_control_head)
10421         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10422       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10423       if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
10424         OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
10425       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
10426       }
10427     else if (ket == OP_KETRMAX || has_alternatives)
10428       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10429     }
10430   else
10431     {
10432     if (ket != OP_KET || has_alternatives)
10433       stacksize++;
10434 
10435     stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
10436     allocate_stack(common, stacksize);
10437 
10438     if (needs_control_head)
10439       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10440 
10441     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10442     OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10443 
10444     stacksize = needs_control_head ? 1 : 0;
10445     if (ket != OP_KET || has_alternatives)
10446       {
10447       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10448       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10449       stacksize++;
10450       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10451       }
10452     else
10453       {
10454       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10455       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10456       }
10457     init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1);
10458     }
10459   }
10460 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
10461   {
10462   /* Saving the previous values. */
10463   if (common->optimized_cbracket[offset >> 1] != 0)
10464     {
10465     SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
10466     allocate_stack(common, 2);
10467     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10468     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
10469     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10470     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
10471     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
10472     }
10473   else
10474     {
10475     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10476     allocate_stack(common, 1);
10477     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10478     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10479     }
10480   }
10481 else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
10482   {
10483   /* Saving the previous value. */
10484   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10485   allocate_stack(common, 1);
10486   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10487   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10488   }
10489 else if (has_alternatives)
10490   {
10491   /* Pushing the starting string pointer. */
10492   allocate_stack(common, 1);
10493   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10494   }
10495 
10496 /* Generating code for the first alternative. */
10497 if (opcode == OP_COND || opcode == OP_SCOND)
10498   {
10499   if (*matchingpath == OP_CREF)
10500     {
10501     SLJIT_ASSERT(has_alternatives);
10502     add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
10503       CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
10504     matchingpath += 1 + IMM2_SIZE;
10505     }
10506   else if (*matchingpath == OP_DNCREF)
10507     {
10508     SLJIT_ASSERT(has_alternatives);
10509 
10510     i = GET2(matchingpath, 1 + IMM2_SIZE);
10511     slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
10512     OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
10513     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
10514     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
10515     slot += common->name_entry_size;
10516     i--;
10517     while (i-- > 0)
10518       {
10519       OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
10520       OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);
10521       slot += common->name_entry_size;
10522       }
10523     OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
10524     add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
10525     matchingpath += 1 + 2 * IMM2_SIZE;
10526     }
10527   else if ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL)
10528     {
10529     /* Never has other case. */
10530     BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
10531     SLJIT_ASSERT(!has_alternatives);
10532 
10533     if (*matchingpath == OP_TRUE)
10534       {
10535       stacksize = 1;
10536       matchingpath++;
10537       }
10538     else if (*matchingpath == OP_FALSE || *matchingpath == OP_FAIL)
10539       stacksize = 0;
10540     else if (*matchingpath == OP_RREF)
10541       {
10542       stacksize = GET2(matchingpath, 1);
10543       if (common->currententry == NULL)
10544         stacksize = 0;
10545       else if (stacksize == RREF_ANY)
10546         stacksize = 1;
10547       else if (common->currententry->start == 0)
10548         stacksize = stacksize == 0;
10549       else
10550         stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
10551 
10552       if (stacksize != 0)
10553         matchingpath += 1 + IMM2_SIZE;
10554       }
10555     else
10556       {
10557       if (common->currententry == NULL || common->currententry->start == 0)
10558         stacksize = 0;
10559       else
10560         {
10561         stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
10562         slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
10563         i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
10564         while (stacksize > 0)
10565           {
10566           if ((int)GET2(slot, 0) == i)
10567             break;
10568           slot += common->name_entry_size;
10569           stacksize--;
10570           }
10571         }
10572 
10573       if (stacksize != 0)
10574         matchingpath += 1 + 2 * IMM2_SIZE;
10575       }
10576 
10577       /* The stacksize == 0 is a common "else" case. */
10578       if (stacksize == 0)
10579         {
10580         if (*cc == OP_ALT)
10581           {
10582           matchingpath = cc + 1 + LINK_SIZE;
10583           cc += GET(cc, 1);
10584           }
10585         else
10586           matchingpath = cc;
10587         }
10588     }
10589   else
10590     {
10591     SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
10592     /* Similar code as PUSH_BACKTRACK macro. */
10593     assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
10594     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10595       return NULL;
10596     memset(assert, 0, sizeof(assert_backtrack));
10597     assert->common.cc = matchingpath;
10598     BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
10599     matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
10600     }
10601   }
10602 
10603 compile_matchingpath(common, matchingpath, cc, backtrack);
10604 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10605   return NULL;
10606 
10607 if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA)
10608   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10609 
10610 if (opcode == OP_ONCE)
10611   match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
10612 
10613 if (opcode == OP_SCRIPT_RUN)
10614   match_script_run_common(common, private_data_ptr, backtrack);
10615 
10616 stacksize = 0;
10617 if (repeat_type == OP_MINUPTO)
10618   {
10619   /* We need to preserve the counter. TMP2 will be used below. */
10620   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10621   stacksize++;
10622   }
10623 if (ket != OP_KET || bra != OP_BRA)
10624   stacksize++;
10625 if (offset != 0)
10626   {
10627   if (common->capture_last_ptr != 0)
10628     stacksize++;
10629   if (common->optimized_cbracket[offset >> 1] == 0)
10630     stacksize += 2;
10631   }
10632 if (has_alternatives && opcode != OP_ONCE)
10633   stacksize++;
10634 
10635 if (stacksize > 0)
10636   allocate_stack(common, stacksize);
10637 
10638 stacksize = 0;
10639 if (repeat_type == OP_MINUPTO)
10640   {
10641   /* TMP2 was set above. */
10642   OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
10643   stacksize++;
10644   }
10645 
10646 if (ket != OP_KET || bra != OP_BRA)
10647   {
10648   if (ket != OP_KET)
10649     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10650   else
10651     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10652   stacksize++;
10653   }
10654 
10655 if (offset != 0)
10656   stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
10657 
10658 /* Skip and count the other alternatives. */
10659 i = 1;
10660 while (*cc == OP_ALT)
10661   {
10662   cc += GET(cc, 1);
10663   i++;
10664   }
10665 
10666 if (has_alternatives)
10667   {
10668   if (opcode != OP_ONCE)
10669     {
10670     if (i <= 3)
10671       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10672     else
10673       BACKTRACK_AS(bracket_backtrack)->u.matching_put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
10674     }
10675   if (ket != OP_KETRMAX)
10676     BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10677   }
10678 
10679 /* Must be after the matchingpath label. */
10680 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
10681   {
10682   SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
10683   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10684   }
10685 
10686 if (ket == OP_KETRMAX)
10687   {
10688   if (repeat_type != 0)
10689     {
10690     if (has_alternatives)
10691       BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10692     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10693     JUMPTO(SLJIT_NOT_ZERO, rmax_label);
10694     /* Drop STR_PTR for greedy plus quantifier. */
10695     if (opcode != OP_ONCE)
10696       free_stack(common, 1);
10697     }
10698   else if (opcode < OP_BRA || opcode >= OP_SBRA)
10699     {
10700     if (has_alternatives)
10701       BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10702 
10703     /* Checking zero-length iteration. */
10704     if (opcode != OP_ONCE)
10705       {
10706       /* This case includes opcodes such as OP_SCRIPT_RUN. */
10707       CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
10708       /* Drop STR_PTR for greedy plus quantifier. */
10709       if (bra != OP_BRAZERO)
10710         free_stack(common, 1);
10711       }
10712     else
10713       /* TMP2 must contain the starting STR_PTR. */
10714       CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
10715     }
10716   else
10717     JUMPTO(SLJIT_JUMP, rmax_label);
10718   BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10719   }
10720 
10721 if (repeat_type == OP_EXACT)
10722   {
10723   count_match(common);
10724   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10725   JUMPTO(SLJIT_NOT_ZERO, rmax_label);
10726   }
10727 else if (repeat_type == OP_UPTO)
10728   {
10729   /* We need to preserve the counter. */
10730   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10731   allocate_stack(common, 1);
10732   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10733   }
10734 
10735 if (bra == OP_BRAZERO)
10736   BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
10737 
10738 if (bra == OP_BRAMINZERO)
10739   {
10740   /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
10741   JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
10742   if (braminzero != NULL)
10743     {
10744     JUMPHERE(braminzero);
10745     /* We need to release the end pointer to perform the
10746     backtrack for the zero-length iteration. When
10747     framesize is < 0, OP_ONCE will do the release itself. */
10748     if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
10749       {
10750       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10751       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10752       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
10753       }
10754     else if (ket == OP_KETRMIN && opcode != OP_ONCE)
10755       free_stack(common, 1);
10756     }
10757   /* Continue to the normal backtrack. */
10758   }
10759 
10760 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
10761   count_match(common);
10762 
10763 cc += 1 + LINK_SIZE;
10764 
10765 if (opcode == OP_ONCE)
10766   {
10767   /* We temporarily encode the needs_control_head in the lowest bit.
10768      Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns
10769      the same value for small signed numbers (including negative numbers). */
10770   BACKTRACK_AS(bracket_backtrack)->u.framesize = (int)((unsigned)BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
10771   }
10772 return cc + repeat_length;
10773 }
10774 
compile_bracketpos_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)10775 static PCRE2_SPTR compile_bracketpos_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10776 {
10777 DEFINE_COMPILER;
10778 backtrack_common *backtrack;
10779 PCRE2_UCHAR opcode;
10780 int private_data_ptr;
10781 int cbraprivptr = 0;
10782 BOOL needs_control_head;
10783 int framesize;
10784 int stacksize;
10785 int offset = 0;
10786 BOOL zero = FALSE;
10787 PCRE2_SPTR ccbegin = NULL;
10788 int stack; /* Also contains the offset of control head. */
10789 struct sljit_label *loop = NULL;
10790 struct jump_list *emptymatch = NULL;
10791 
10792 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
10793 if (*cc == OP_BRAPOSZERO)
10794   {
10795   zero = TRUE;
10796   cc++;
10797   }
10798 
10799 opcode = *cc;
10800 private_data_ptr = PRIVATE_DATA(cc);
10801 SLJIT_ASSERT(private_data_ptr != 0);
10802 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
10803 switch(opcode)
10804   {
10805   case OP_BRAPOS:
10806   case OP_SBRAPOS:
10807   ccbegin = cc + 1 + LINK_SIZE;
10808   break;
10809 
10810   case OP_CBRAPOS:
10811   case OP_SCBRAPOS:
10812   offset = GET2(cc, 1 + LINK_SIZE);
10813   /* This case cannot be optimized in the same was as
10814   normal capturing brackets. */
10815   SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
10816   cbraprivptr = OVECTOR_PRIV(offset);
10817   offset <<= 1;
10818   ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
10819   break;
10820 
10821   default:
10822   SLJIT_UNREACHABLE();
10823   break;
10824   }
10825 
10826 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
10827 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
10828 if (framesize < 0)
10829   {
10830   if (offset != 0)
10831     {
10832     stacksize = 2;
10833     if (common->capture_last_ptr != 0)
10834       stacksize++;
10835     }
10836   else
10837     stacksize = 1;
10838 
10839   if (needs_control_head)
10840     stacksize++;
10841   if (!zero)
10842     stacksize++;
10843 
10844   BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
10845   allocate_stack(common, stacksize);
10846   if (framesize == no_frame)
10847     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10848 
10849   stack = 0;
10850   if (offset != 0)
10851     {
10852     stack = 2;
10853     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
10854     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
10855     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
10856     if (common->capture_last_ptr != 0)
10857       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
10858     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
10859     if (needs_control_head)
10860       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10861     if (common->capture_last_ptr != 0)
10862       {
10863       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
10864       stack = 3;
10865       }
10866     }
10867   else
10868     {
10869     if (needs_control_head)
10870       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10871     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10872     stack = 1;
10873     }
10874 
10875   if (needs_control_head)
10876     stack++;
10877   if (!zero)
10878     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
10879   if (needs_control_head)
10880     {
10881     stack--;
10882     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
10883     }
10884   }
10885 else
10886   {
10887   stacksize = framesize + 1;
10888   if (!zero)
10889     stacksize++;
10890   if (needs_control_head)
10891     stacksize++;
10892   if (offset == 0)
10893     stacksize++;
10894   BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
10895 
10896   allocate_stack(common, stacksize);
10897   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10898   if (needs_control_head)
10899     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10900   OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10901 
10902   stack = 0;
10903   if (!zero)
10904     {
10905     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
10906     stack = 1;
10907     }
10908   if (needs_control_head)
10909     {
10910     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
10911     stack++;
10912     }
10913   if (offset == 0)
10914     {
10915     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
10916     stack++;
10917     }
10918   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
10919   init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize);
10920   stack -= 1 + (offset == 0);
10921   }
10922 
10923 if (offset != 0)
10924   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
10925 
10926 loop = LABEL();
10927 while (*cc != OP_KETRPOS)
10928   {
10929   backtrack->top = NULL;
10930   backtrack->topbacktracks = NULL;
10931   cc += GET(cc, 1);
10932 
10933   compile_matchingpath(common, ccbegin, cc, backtrack);
10934   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10935     return NULL;
10936 
10937   if (framesize < 0)
10938     {
10939     if (framesize == no_frame)
10940       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10941 
10942     if (offset != 0)
10943       {
10944       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
10945       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10946       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
10947       if (common->capture_last_ptr != 0)
10948         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10949       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10950       }
10951     else
10952       {
10953       if (opcode == OP_SBRAPOS)
10954         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10955       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10956       }
10957 
10958     /* Even if the match is empty, we need to reset the control head. */
10959     if (needs_control_head)
10960       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
10961 
10962     if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
10963       add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
10964 
10965     if (!zero)
10966       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
10967     }
10968   else
10969     {
10970     if (offset != 0)
10971       {
10972       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10973       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
10974       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10975       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
10976       if (common->capture_last_ptr != 0)
10977         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10978       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10979       }
10980     else
10981       {
10982       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10983       OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10984       if (opcode == OP_SBRAPOS)
10985         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
10986       OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0);
10987       }
10988 
10989     /* Even if the match is empty, we need to reset the control head. */
10990     if (needs_control_head)
10991       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
10992 
10993     if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
10994       add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
10995 
10996     if (!zero)
10997       {
10998       if (framesize < 0)
10999         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
11000       else
11001         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
11002       }
11003     }
11004 
11005   JUMPTO(SLJIT_JUMP, loop);
11006   flush_stubs(common);
11007 
11008   compile_backtrackingpath(common, backtrack->top);
11009   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11010     return NULL;
11011   set_jumps(backtrack->topbacktracks, LABEL());
11012 
11013   if (framesize < 0)
11014     {
11015     if (offset != 0)
11016       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11017     else
11018       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11019     }
11020   else
11021     {
11022     if (offset != 0)
11023       {
11024       /* Last alternative. */
11025       if (*cc == OP_KETRPOS)
11026         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11027       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11028       }
11029     else
11030       {
11031       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11032       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
11033       }
11034     }
11035 
11036   if (*cc == OP_KETRPOS)
11037     break;
11038   ccbegin = cc + 1 + LINK_SIZE;
11039   }
11040 
11041 /* We don't have to restore the control head in case of a failed match. */
11042 
11043 backtrack->topbacktracks = NULL;
11044 if (!zero)
11045   {
11046   if (framesize < 0)
11047     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
11048   else /* TMP2 is set to [private_data_ptr] above. */
11049     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));
11050   }
11051 
11052 /* None of them matched. */
11053 set_jumps(emptymatch, LABEL());
11054 count_match(common);
11055 return cc + 1 + LINK_SIZE;
11056 }
11057 
get_iterator_parameters(compiler_common * common,PCRE2_SPTR cc,PCRE2_UCHAR * opcode,PCRE2_UCHAR * type,sljit_u32 * max,sljit_u32 * exact,PCRE2_SPTR * end)11058 static SLJIT_INLINE PCRE2_SPTR get_iterator_parameters(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *opcode, PCRE2_UCHAR *type, sljit_u32 *max, sljit_u32 *exact, PCRE2_SPTR *end)
11059 {
11060 int class_len;
11061 
11062 *opcode = *cc;
11063 *exact = 0;
11064 
11065 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
11066   {
11067   cc++;
11068   *type = OP_CHAR;
11069   }
11070 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
11071   {
11072   cc++;
11073   *type = OP_CHARI;
11074   *opcode -= OP_STARI - OP_STAR;
11075   }
11076 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
11077   {
11078   cc++;
11079   *type = OP_NOT;
11080   *opcode -= OP_NOTSTAR - OP_STAR;
11081   }
11082 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
11083   {
11084   cc++;
11085   *type = OP_NOTI;
11086   *opcode -= OP_NOTSTARI - OP_STAR;
11087   }
11088 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
11089   {
11090   cc++;
11091   *opcode -= OP_TYPESTAR - OP_STAR;
11092   *type = OP_END;
11093   }
11094 else
11095   {
11096   SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
11097   *type = *opcode;
11098   cc++;
11099   class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(PCRE2_UCHAR))) : GET(cc, 0);
11100   *opcode = cc[class_len - 1];
11101 
11102   if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
11103     {
11104     *opcode -= OP_CRSTAR - OP_STAR;
11105     *end = cc + class_len;
11106 
11107     if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
11108       {
11109       *exact = 1;
11110       *opcode -= OP_PLUS - OP_STAR;
11111       }
11112     }
11113   else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
11114     {
11115     *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
11116     *end = cc + class_len;
11117 
11118     if (*opcode == OP_POSPLUS)
11119       {
11120       *exact = 1;
11121       *opcode = OP_POSSTAR;
11122       }
11123     }
11124   else
11125     {
11126     SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
11127     *max = GET2(cc, (class_len + IMM2_SIZE));
11128     *exact = GET2(cc, class_len);
11129 
11130     if (*max == 0)
11131       {
11132       if (*opcode == OP_CRPOSRANGE)
11133         *opcode = OP_POSSTAR;
11134       else
11135         *opcode -= OP_CRRANGE - OP_STAR;
11136       }
11137     else
11138       {
11139       *max -= *exact;
11140       if (*max == 0)
11141         *opcode = OP_EXACT;
11142       else if (*max == 1)
11143         {
11144         if (*opcode == OP_CRPOSRANGE)
11145           *opcode = OP_POSQUERY;
11146         else
11147           *opcode -= OP_CRRANGE - OP_QUERY;
11148         }
11149       else
11150         {
11151         if (*opcode == OP_CRPOSRANGE)
11152           *opcode = OP_POSUPTO;
11153         else
11154           *opcode -= OP_CRRANGE - OP_UPTO;
11155         }
11156       }
11157     *end = cc + class_len + 2 * IMM2_SIZE;
11158     }
11159   return cc;
11160   }
11161 
11162 switch(*opcode)
11163   {
11164   case OP_EXACT:
11165   *exact = GET2(cc, 0);
11166   cc += IMM2_SIZE;
11167   break;
11168 
11169   case OP_PLUS:
11170   case OP_MINPLUS:
11171   *exact = 1;
11172   *opcode -= OP_PLUS - OP_STAR;
11173   break;
11174 
11175   case OP_POSPLUS:
11176   *exact = 1;
11177   *opcode = OP_POSSTAR;
11178   break;
11179 
11180   case OP_UPTO:
11181   case OP_MINUPTO:
11182   case OP_POSUPTO:
11183   *max = GET2(cc, 0);
11184   cc += IMM2_SIZE;
11185   break;
11186   }
11187 
11188 if (*type == OP_END)
11189   {
11190   *type = *cc;
11191   *end = next_opcode(common, cc);
11192   cc++;
11193   return cc;
11194   }
11195 
11196 *end = cc + 1;
11197 #ifdef SUPPORT_UNICODE
11198 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
11199 #endif
11200 return cc;
11201 }
11202 
compile_iterator_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11203 static PCRE2_SPTR compile_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11204 {
11205 DEFINE_COMPILER;
11206 backtrack_common *backtrack;
11207 PCRE2_UCHAR opcode;
11208 PCRE2_UCHAR type;
11209 sljit_u32 max = 0, exact;
11210 sljit_s32 early_fail_ptr = PRIVATE_DATA(cc + 1);
11211 sljit_s32 early_fail_type;
11212 BOOL charpos_enabled;
11213 PCRE2_UCHAR charpos_char;
11214 unsigned int charpos_othercasebit;
11215 PCRE2_SPTR end;
11216 jump_list *no_match = NULL;
11217 jump_list *no_char1_match = NULL;
11218 struct sljit_jump *jump = NULL;
11219 struct sljit_label *label;
11220 int private_data_ptr = PRIVATE_DATA(cc);
11221 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
11222 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
11223 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
11224 int tmp_base, tmp_offset;
11225 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11226 BOOL use_tmp;
11227 #endif
11228 
11229 PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL);
11230 
11231 early_fail_type = (early_fail_ptr & 0x7);
11232 early_fail_ptr >>= 3;
11233 
11234 /* During recursion, these optimizations are disabled. */
11235 if (common->early_fail_start_ptr == 0 && common->fast_forward_bc_ptr == NULL)
11236   {
11237   early_fail_ptr = 0;
11238   early_fail_type = type_skip;
11239   }
11240 
11241 SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || early_fail_ptr == 0
11242   || (early_fail_ptr >= common->early_fail_start_ptr && early_fail_ptr <= common->early_fail_end_ptr));
11243 
11244 if (early_fail_type == type_fail)
11245   add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr));
11246 
11247 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
11248 
11249 if (type != OP_EXTUNI)
11250   {
11251   tmp_base = TMP3;
11252   tmp_offset = 0;
11253   }
11254 else
11255   {
11256   tmp_base = SLJIT_MEM1(SLJIT_SP);
11257   tmp_offset = POSSESSIVE0;
11258   }
11259 
11260 /* Handle fixed part first. */
11261 if (exact > 1)
11262   {
11263   SLJIT_ASSERT(early_fail_ptr == 0);
11264 
11265   if (common->mode == PCRE2_JIT_COMPLETE
11266 #ifdef SUPPORT_UNICODE
11267       && !common->utf
11268 #endif
11269       && type != OP_ANYNL && type != OP_EXTUNI)
11270     {
11271     OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
11272     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
11273     OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11274     label = LABEL();
11275     compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
11276     OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11277     JUMPTO(SLJIT_NOT_ZERO, label);
11278     }
11279   else
11280     {
11281     OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11282     label = LABEL();
11283     compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
11284     OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11285     JUMPTO(SLJIT_NOT_ZERO, label);
11286     }
11287   }
11288 else if (exact == 1)
11289   compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
11290 
11291 if (early_fail_type == type_fail_range)
11292   {
11293   /* Range end first, followed by range start. */
11294   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr);
11295   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw));
11296   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0);
11297   OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0);
11298   add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0));
11299 
11300   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11301   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw), STR_PTR, 0);
11302   }
11303 
11304 switch(opcode)
11305   {
11306   case OP_STAR:
11307   case OP_UPTO:
11308   SLJIT_ASSERT(early_fail_ptr == 0 || opcode == OP_STAR);
11309 
11310   if (type == OP_ANYNL || type == OP_EXTUNI)
11311     {
11312     SLJIT_ASSERT(private_data_ptr == 0);
11313     SLJIT_ASSERT(early_fail_ptr == 0);
11314 
11315     allocate_stack(common, 2);
11316     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11317     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
11318 
11319     if (opcode == OP_UPTO)
11320       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max);
11321 
11322     label = LABEL();
11323     compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
11324     if (opcode == OP_UPTO)
11325       {
11326       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
11327       OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
11328       jump = JUMP(SLJIT_ZERO);
11329       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
11330       }
11331 
11332     /* We cannot use TMP3 because of allocate_stack. */
11333     allocate_stack(common, 1);
11334     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11335     JUMPTO(SLJIT_JUMP, label);
11336     if (jump != NULL)
11337       JUMPHERE(jump);
11338     BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11339     break;
11340     }
11341 #ifdef SUPPORT_UNICODE
11342   else if (type == OP_ALLANY && !common->invalid_utf)
11343 #else
11344   else if (type == OP_ALLANY)
11345 #endif
11346     {
11347     if (opcode == OP_STAR)
11348       {
11349       if (private_data_ptr == 0)
11350         allocate_stack(common, 2);
11351 
11352       OP1(SLJIT_MOV, base, offset0, STR_END, 0);
11353       OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11354 
11355       OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
11356       process_partial_match(common);
11357 
11358       if (early_fail_ptr != 0)
11359         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
11360       BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11361       break;
11362       }
11363 #ifdef SUPPORT_UNICODE
11364     else if (!common->utf)
11365 #else
11366     else
11367 #endif
11368       {
11369       if (private_data_ptr == 0)
11370         allocate_stack(common, 2);
11371 
11372       OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11373       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
11374 
11375       if (common->mode == PCRE2_JIT_COMPLETE)
11376         {
11377         OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
11378         CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
11379         }
11380       else
11381         {
11382         jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
11383         process_partial_match(common);
11384         JUMPHERE(jump);
11385         }
11386 
11387       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11388 
11389       if (early_fail_ptr != 0)
11390         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11391       BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11392       break;
11393       }
11394     }
11395 
11396   charpos_enabled = FALSE;
11397   charpos_char = 0;
11398   charpos_othercasebit = 0;
11399 
11400   if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
11401     {
11402 #ifdef SUPPORT_UNICODE
11403     charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
11404 #else
11405     charpos_enabled = TRUE;
11406 #endif
11407     if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
11408       {
11409       charpos_othercasebit = char_get_othercase_bit(common, end + 1);
11410       if (charpos_othercasebit == 0)
11411         charpos_enabled = FALSE;
11412       }
11413 
11414     if (charpos_enabled)
11415       {
11416       charpos_char = end[1];
11417       /* Consume the OP_CHAR opcode. */
11418       end += 2;
11419 #if PCRE2_CODE_UNIT_WIDTH == 8
11420       SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
11421 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
11422       SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
11423       if ((charpos_othercasebit & 0x100) != 0)
11424         charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
11425 #endif
11426       if (charpos_othercasebit != 0)
11427         charpos_char |= charpos_othercasebit;
11428 
11429       BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE;
11430       BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char;
11431       BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit;
11432       }
11433     }
11434 
11435   if (charpos_enabled)
11436     {
11437     if (opcode == OP_UPTO)
11438       OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1);
11439 
11440     /* Search the first instance of charpos_char. */
11441     jump = JUMP(SLJIT_JUMP);
11442     label = LABEL();
11443     if (opcode == OP_UPTO)
11444       {
11445       OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11446       add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO));
11447       }
11448     compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
11449     if (early_fail_ptr != 0)
11450       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11451     JUMPHERE(jump);
11452 
11453     detect_partial_match(common, &backtrack->topbacktracks);
11454     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
11455     if (charpos_othercasebit != 0)
11456       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
11457     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
11458 
11459     if (private_data_ptr == 0)
11460       allocate_stack(common, 2);
11461     OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11462     OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11463 
11464     if (opcode == OP_UPTO)
11465       {
11466       OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11467       add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11468       }
11469 
11470     /* Search the last instance of charpos_char. */
11471     label = LABEL();
11472     compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11473     if (early_fail_ptr != 0)
11474       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11475     detect_partial_match(common, &no_match);
11476     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
11477     if (charpos_othercasebit != 0)
11478       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
11479 
11480     if (opcode == OP_STAR)
11481       {
11482       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
11483       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11484       JUMPTO(SLJIT_JUMP, label);
11485       }
11486     else
11487       {
11488       jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char);
11489       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11490       JUMPHERE(jump);
11491       OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11492       JUMPTO(SLJIT_NOT_ZERO, label);
11493       }
11494 
11495     set_jumps(no_match, LABEL());
11496     OP2(SLJIT_ADD, STR_PTR, 0, base, offset0, SLJIT_IMM, IN_UCHARS(1));
11497     OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11498     }
11499   else
11500     {
11501     if (private_data_ptr == 0)
11502       allocate_stack(common, 2);
11503 
11504     OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11505 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11506     use_tmp = (!HAS_VIRTUAL_REGISTERS && opcode == OP_STAR);
11507     SLJIT_ASSERT(!use_tmp || tmp_base == TMP3);
11508 
11509     if (common->utf)
11510       OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
11511 #endif
11512     if (opcode == OP_UPTO)
11513       OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11514 
11515     detect_partial_match(common, &no_match);
11516     label = LABEL();
11517     compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11518 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11519     if (common->utf)
11520       OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
11521 #endif
11522 
11523     if (opcode == OP_UPTO)
11524       {
11525       OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11526       add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11527       }
11528 
11529     detect_partial_match_to(common, label);
11530     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11531 
11532     set_jumps(no_char1_match, LABEL());
11533 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11534     if (common->utf)
11535       {
11536       set_jumps(no_match, LABEL());
11537       if (use_tmp)
11538         {
11539         OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
11540         OP1(SLJIT_MOV, base, offset0, TMP3, 0);
11541         }
11542       else
11543         OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
11544       }
11545     else
11546 #endif
11547       {
11548       OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11549       set_jumps(no_match, LABEL());
11550       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11551       }
11552 
11553     if (early_fail_ptr != 0)
11554       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11555     }
11556 
11557   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11558   break;
11559 
11560   case OP_MINSTAR:
11561   if (private_data_ptr == 0)
11562     allocate_stack(common, 1);
11563   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11564   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11565   if (early_fail_ptr != 0)
11566     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11567   break;
11568 
11569   case OP_MINUPTO:
11570   SLJIT_ASSERT(early_fail_ptr == 0);
11571   if (private_data_ptr == 0)
11572     allocate_stack(common, 2);
11573   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11574   OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
11575   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11576   break;
11577 
11578   case OP_QUERY:
11579   case OP_MINQUERY:
11580   SLJIT_ASSERT(early_fail_ptr == 0);
11581   if (private_data_ptr == 0)
11582     allocate_stack(common, 1);
11583   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11584   if (opcode == OP_QUERY)
11585     compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
11586   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11587   break;
11588 
11589   case OP_EXACT:
11590   break;
11591 
11592   case OP_POSSTAR:
11593 #if defined SUPPORT_UNICODE
11594   if (type == OP_ALLANY && !common->invalid_utf)
11595 #else
11596   if (type == OP_ALLANY)
11597 #endif
11598     {
11599     OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
11600     process_partial_match(common);
11601     if (early_fail_ptr != 0)
11602       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
11603     break;
11604     }
11605 
11606 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11607   if (common->utf)
11608     {
11609     OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11610     detect_partial_match(common, &no_match);
11611     label = LABEL();
11612     compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11613     OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11614     detect_partial_match_to(common, label);
11615 
11616     set_jumps(no_match, LABEL());
11617     OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
11618     if (early_fail_ptr != 0)
11619       {
11620       if (!HAS_VIRTUAL_REGISTERS && tmp_base == TMP3)
11621         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, TMP3, 0);
11622       else
11623         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11624       }
11625     break;
11626     }
11627 #endif
11628 
11629   detect_partial_match(common, &no_match);
11630   label = LABEL();
11631   compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11632   detect_partial_match_to(common, label);
11633   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11634 
11635   set_jumps(no_char1_match, LABEL());
11636   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11637   set_jumps(no_match, LABEL());
11638   if (early_fail_ptr != 0)
11639     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11640   break;
11641 
11642   case OP_POSUPTO:
11643   SLJIT_ASSERT(early_fail_ptr == 0);
11644 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11645   if (common->utf)
11646     {
11647     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
11648     OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11649 
11650     detect_partial_match(common, &no_match);
11651     label = LABEL();
11652     compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11653     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
11654     OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11655     add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11656     detect_partial_match_to(common, label);
11657 
11658     set_jumps(no_match, LABEL());
11659     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
11660     break;
11661     }
11662 #endif
11663 
11664   if (type == OP_ALLANY)
11665     {
11666     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
11667 
11668     if (common->mode == PCRE2_JIT_COMPLETE)
11669       {
11670       OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
11671       CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
11672       }
11673     else
11674       {
11675       jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
11676       process_partial_match(common);
11677       JUMPHERE(jump);
11678       }
11679     break;
11680     }
11681 
11682   OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11683 
11684   detect_partial_match(common, &no_match);
11685   label = LABEL();
11686   compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11687   OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11688   add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11689   detect_partial_match_to(common, label);
11690   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11691 
11692   set_jumps(no_char1_match, LABEL());
11693   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11694   set_jumps(no_match, LABEL());
11695   break;
11696 
11697   case OP_POSQUERY:
11698   SLJIT_ASSERT(early_fail_ptr == 0);
11699   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11700   compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
11701   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11702   set_jumps(no_match, LABEL());
11703   OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
11704   break;
11705 
11706   default:
11707   SLJIT_UNREACHABLE();
11708   break;
11709   }
11710 
11711 count_match(common);
11712 return end;
11713 }
11714 
compile_fail_accept_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11715 static SLJIT_INLINE PCRE2_SPTR compile_fail_accept_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11716 {
11717 DEFINE_COMPILER;
11718 backtrack_common *backtrack;
11719 
11720 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
11721 
11722 if (*cc == OP_FAIL)
11723   {
11724   add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
11725   return cc + 1;
11726   }
11727 
11728 if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0)
11729   add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
11730 
11731 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
11732   {
11733   /* No need to check notempty conditions. */
11734   if (common->accept_label == NULL)
11735     add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
11736   else
11737     JUMPTO(SLJIT_JUMP, common->accept_label);
11738   return cc + 1;
11739   }
11740 
11741 if (common->accept_label == NULL)
11742   add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
11743 else
11744   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
11745 
11746 if (HAS_VIRTUAL_REGISTERS)
11747   {
11748   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11749   OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
11750   }
11751 else
11752   OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options));
11753 
11754 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
11755 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_NOT_ZERO));
11756 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
11757 if (common->accept_label == NULL)
11758   add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO));
11759 else
11760   JUMPTO(SLJIT_ZERO, common->accept_label);
11761 
11762 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
11763 if (common->accept_label == NULL)
11764   add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
11765 else
11766   CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
11767 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
11768 return cc + 1;
11769 }
11770 
compile_close_matchingpath(compiler_common * common,PCRE2_SPTR cc)11771 static SLJIT_INLINE PCRE2_SPTR compile_close_matchingpath(compiler_common *common, PCRE2_SPTR cc)
11772 {
11773 DEFINE_COMPILER;
11774 int offset = GET2(cc, 1);
11775 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
11776 
11777 /* Data will be discarded anyway... */
11778 if (common->currententry != NULL)
11779   return cc + 1 + IMM2_SIZE;
11780 
11781 if (!optimized_cbracket)
11782   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
11783 offset <<= 1;
11784 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11785 if (!optimized_cbracket)
11786   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11787 return cc + 1 + IMM2_SIZE;
11788 }
11789 
compile_control_verb_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11790 static SLJIT_INLINE PCRE2_SPTR compile_control_verb_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11791 {
11792 DEFINE_COMPILER;
11793 backtrack_common *backtrack;
11794 PCRE2_UCHAR opcode = *cc;
11795 PCRE2_SPTR ccend = cc + 1;
11796 
11797 if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG ||
11798     opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
11799   ccend += 2 + cc[1];
11800 
11801 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
11802 
11803 if (opcode == OP_SKIP)
11804   {
11805   allocate_stack(common, 1);
11806   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11807   return ccend;
11808   }
11809 
11810 if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
11811   {
11812   if (HAS_VIRTUAL_REGISTERS)
11813     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11814   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
11815   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
11816   OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
11817   }
11818 
11819 return ccend;
11820 }
11821 
11822 static PCRE2_UCHAR then_trap_opcode[1] = { OP_THEN_TRAP };
11823 
compile_then_trap_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,backtrack_common * parent)11824 static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
11825 {
11826 DEFINE_COMPILER;
11827 backtrack_common *backtrack;
11828 BOOL needs_control_head;
11829 int size;
11830 
11831 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
11832 common->then_trap = BACKTRACK_AS(then_trap_backtrack);
11833 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
11834 BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
11835 BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
11836 
11837 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
11838 size = 3 + (size < 0 ? 0 : size);
11839 
11840 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11841 allocate_stack(common, size);
11842 if (size > 3)
11843   OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
11844 else
11845   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
11846 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
11847 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
11848 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
11849 
11850 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
11851 if (size >= 0)
11852   init_frame(common, cc, ccend, size - 1, 0);
11853 }
11854 
compile_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,backtrack_common * parent)11855 static void compile_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
11856 {
11857 DEFINE_COMPILER;
11858 backtrack_common *backtrack;
11859 BOOL has_then_trap = FALSE;
11860 then_trap_backtrack *save_then_trap = NULL;
11861 
11862 SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
11863 
11864 if (common->has_then && common->then_offsets[cc - common->start] != 0)
11865   {
11866   SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
11867   has_then_trap = TRUE;
11868   save_then_trap = common->then_trap;
11869   /* Tail item on backtrack. */
11870   compile_then_trap_matchingpath(common, cc, ccend, parent);
11871   }
11872 
11873 while (cc < ccend)
11874   {
11875   switch(*cc)
11876     {
11877     case OP_SOD:
11878     case OP_SOM:
11879     case OP_NOT_WORD_BOUNDARY:
11880     case OP_WORD_BOUNDARY:
11881     case OP_EODN:
11882     case OP_EOD:
11883     case OP_DOLL:
11884     case OP_DOLLM:
11885     case OP_CIRC:
11886     case OP_CIRCM:
11887     case OP_REVERSE:
11888     cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
11889     break;
11890 
11891     case OP_NOT_DIGIT:
11892     case OP_DIGIT:
11893     case OP_NOT_WHITESPACE:
11894     case OP_WHITESPACE:
11895     case OP_NOT_WORDCHAR:
11896     case OP_WORDCHAR:
11897     case OP_ANY:
11898     case OP_ALLANY:
11899     case OP_ANYBYTE:
11900     case OP_NOTPROP:
11901     case OP_PROP:
11902     case OP_ANYNL:
11903     case OP_NOT_HSPACE:
11904     case OP_HSPACE:
11905     case OP_NOT_VSPACE:
11906     case OP_VSPACE:
11907     case OP_EXTUNI:
11908     case OP_NOT:
11909     case OP_NOTI:
11910     cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
11911     break;
11912 
11913     case OP_SET_SOM:
11914     PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
11915     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
11916     allocate_stack(common, 1);
11917     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
11918     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11919     cc++;
11920     break;
11921 
11922     case OP_CHAR:
11923     case OP_CHARI:
11924     if (common->mode == PCRE2_JIT_COMPLETE)
11925       cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
11926     else
11927       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
11928     break;
11929 
11930     case OP_STAR:
11931     case OP_MINSTAR:
11932     case OP_PLUS:
11933     case OP_MINPLUS:
11934     case OP_QUERY:
11935     case OP_MINQUERY:
11936     case OP_UPTO:
11937     case OP_MINUPTO:
11938     case OP_EXACT:
11939     case OP_POSSTAR:
11940     case OP_POSPLUS:
11941     case OP_POSQUERY:
11942     case OP_POSUPTO:
11943     case OP_STARI:
11944     case OP_MINSTARI:
11945     case OP_PLUSI:
11946     case OP_MINPLUSI:
11947     case OP_QUERYI:
11948     case OP_MINQUERYI:
11949     case OP_UPTOI:
11950     case OP_MINUPTOI:
11951     case OP_EXACTI:
11952     case OP_POSSTARI:
11953     case OP_POSPLUSI:
11954     case OP_POSQUERYI:
11955     case OP_POSUPTOI:
11956     case OP_NOTSTAR:
11957     case OP_NOTMINSTAR:
11958     case OP_NOTPLUS:
11959     case OP_NOTMINPLUS:
11960     case OP_NOTQUERY:
11961     case OP_NOTMINQUERY:
11962     case OP_NOTUPTO:
11963     case OP_NOTMINUPTO:
11964     case OP_NOTEXACT:
11965     case OP_NOTPOSSTAR:
11966     case OP_NOTPOSPLUS:
11967     case OP_NOTPOSQUERY:
11968     case OP_NOTPOSUPTO:
11969     case OP_NOTSTARI:
11970     case OP_NOTMINSTARI:
11971     case OP_NOTPLUSI:
11972     case OP_NOTMINPLUSI:
11973     case OP_NOTQUERYI:
11974     case OP_NOTMINQUERYI:
11975     case OP_NOTUPTOI:
11976     case OP_NOTMINUPTOI:
11977     case OP_NOTEXACTI:
11978     case OP_NOTPOSSTARI:
11979     case OP_NOTPOSPLUSI:
11980     case OP_NOTPOSQUERYI:
11981     case OP_NOTPOSUPTOI:
11982     case OP_TYPESTAR:
11983     case OP_TYPEMINSTAR:
11984     case OP_TYPEPLUS:
11985     case OP_TYPEMINPLUS:
11986     case OP_TYPEQUERY:
11987     case OP_TYPEMINQUERY:
11988     case OP_TYPEUPTO:
11989     case OP_TYPEMINUPTO:
11990     case OP_TYPEEXACT:
11991     case OP_TYPEPOSSTAR:
11992     case OP_TYPEPOSPLUS:
11993     case OP_TYPEPOSQUERY:
11994     case OP_TYPEPOSUPTO:
11995     cc = compile_iterator_matchingpath(common, cc, parent);
11996     break;
11997 
11998     case OP_CLASS:
11999     case OP_NCLASS:
12000     if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE)
12001       cc = compile_iterator_matchingpath(common, cc, parent);
12002     else
12003       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
12004     break;
12005 
12006 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
12007     case OP_XCLASS:
12008     if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
12009       cc = compile_iterator_matchingpath(common, cc, parent);
12010     else
12011       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
12012     break;
12013 #endif
12014 
12015     case OP_REF:
12016     case OP_REFI:
12017     if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
12018       cc = compile_ref_iterator_matchingpath(common, cc, parent);
12019     else
12020       {
12021       compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
12022       cc += 1 + IMM2_SIZE;
12023       }
12024     break;
12025 
12026     case OP_DNREF:
12027     case OP_DNREFI:
12028     if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
12029       cc = compile_ref_iterator_matchingpath(common, cc, parent);
12030     else
12031       {
12032       compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
12033       compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
12034       cc += 1 + 2 * IMM2_SIZE;
12035       }
12036     break;
12037 
12038     case OP_RECURSE:
12039     cc = compile_recurse_matchingpath(common, cc, parent);
12040     break;
12041 
12042     case OP_CALLOUT:
12043     case OP_CALLOUT_STR:
12044     cc = compile_callout_matchingpath(common, cc, parent);
12045     break;
12046 
12047     case OP_ASSERT:
12048     case OP_ASSERT_NOT:
12049     case OP_ASSERTBACK:
12050     case OP_ASSERTBACK_NOT:
12051     PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12052     cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12053     break;
12054 
12055     case OP_BRAMINZERO:
12056     PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
12057     cc = bracketend(cc + 1);
12058     if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
12059       {
12060       allocate_stack(common, 1);
12061       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
12062       }
12063     else
12064       {
12065       allocate_stack(common, 2);
12066       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12067       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
12068       }
12069     BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
12070     count_match(common);
12071     break;
12072 
12073     case OP_ASSERT_NA:
12074     case OP_ASSERTBACK_NA:
12075     case OP_ONCE:
12076     case OP_SCRIPT_RUN:
12077     case OP_BRA:
12078     case OP_CBRA:
12079     case OP_COND:
12080     case OP_SBRA:
12081     case OP_SCBRA:
12082     case OP_SCOND:
12083     cc = compile_bracket_matchingpath(common, cc, parent);
12084     break;
12085 
12086     case OP_BRAZERO:
12087     if (cc[1] > OP_ASSERTBACK_NOT)
12088       cc = compile_bracket_matchingpath(common, cc, parent);
12089     else
12090       {
12091       PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12092       cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12093       }
12094     break;
12095 
12096     case OP_BRAPOS:
12097     case OP_CBRAPOS:
12098     case OP_SBRAPOS:
12099     case OP_SCBRAPOS:
12100     case OP_BRAPOSZERO:
12101     cc = compile_bracketpos_matchingpath(common, cc, parent);
12102     break;
12103 
12104     case OP_MARK:
12105     PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
12106     SLJIT_ASSERT(common->mark_ptr != 0);
12107     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
12108     allocate_stack(common, common->has_skip_arg ? 5 : 1);
12109     if (HAS_VIRTUAL_REGISTERS)
12110       OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12111     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
12112     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
12113     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
12114     OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
12115     if (common->has_skip_arg)
12116       {
12117       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12118       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
12119       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
12120       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
12121       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
12122       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
12123       }
12124     cc += 1 + 2 + cc[1];
12125     break;
12126 
12127     case OP_PRUNE:
12128     case OP_PRUNE_ARG:
12129     case OP_SKIP:
12130     case OP_SKIP_ARG:
12131     case OP_THEN:
12132     case OP_THEN_ARG:
12133     case OP_COMMIT:
12134     case OP_COMMIT_ARG:
12135     cc = compile_control_verb_matchingpath(common, cc, parent);
12136     break;
12137 
12138     case OP_FAIL:
12139     case OP_ACCEPT:
12140     case OP_ASSERT_ACCEPT:
12141     cc = compile_fail_accept_matchingpath(common, cc, parent);
12142     break;
12143 
12144     case OP_CLOSE:
12145     cc = compile_close_matchingpath(common, cc);
12146     break;
12147 
12148     case OP_SKIPZERO:
12149     cc = bracketend(cc + 1);
12150     break;
12151 
12152     default:
12153     SLJIT_UNREACHABLE();
12154     return;
12155     }
12156   if (cc == NULL)
12157     return;
12158   }
12159 
12160 if (has_then_trap)
12161   {
12162   /* Head item on backtrack. */
12163   PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
12164   BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
12165   BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
12166   common->then_trap = save_then_trap;
12167   }
12168 SLJIT_ASSERT(cc == ccend);
12169 }
12170 
12171 #undef PUSH_BACKTRACK
12172 #undef PUSH_BACKTRACK_NOVALUE
12173 #undef BACKTRACK_AS
12174 
12175 #define COMPILE_BACKTRACKINGPATH(current) \
12176   do \
12177     { \
12178     compile_backtrackingpath(common, (current)); \
12179     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
12180       return; \
12181     } \
12182   while (0)
12183 
12184 #define CURRENT_AS(type) ((type *)current)
12185 
compile_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)12186 static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12187 {
12188 DEFINE_COMPILER;
12189 PCRE2_SPTR cc = current->cc;
12190 PCRE2_UCHAR opcode;
12191 PCRE2_UCHAR type;
12192 sljit_u32 max = 0, exact;
12193 struct sljit_label *label = NULL;
12194 struct sljit_jump *jump = NULL;
12195 jump_list *jumplist = NULL;
12196 PCRE2_SPTR end;
12197 int private_data_ptr = PRIVATE_DATA(cc);
12198 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
12199 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
12200 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
12201 
12202 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
12203 
12204 switch(opcode)
12205   {
12206   case OP_STAR:
12207   case OP_UPTO:
12208   if (type == OP_ANYNL || type == OP_EXTUNI)
12209     {
12210     SLJIT_ASSERT(private_data_ptr == 0);
12211     set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12212     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12213     free_stack(common, 1);
12214     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12215     }
12216   else
12217     {
12218     if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled)
12219       {
12220       OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12221       OP1(SLJIT_MOV, TMP2, 0, base, offset1);
12222       OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12223 
12224       jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
12225       label = LABEL();
12226       OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
12227       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12228       if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0)
12229         OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit);
12230       CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12231       move_back(common, NULL, TRUE);
12232       CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label);
12233       }
12234     else
12235       {
12236       OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12237       jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
12238       move_back(common, NULL, TRUE);
12239       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12240       JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12241       }
12242     JUMPHERE(jump);
12243     if (private_data_ptr == 0)
12244       free_stack(common, 2);
12245     }
12246   break;
12247 
12248   case OP_MINSTAR:
12249   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12250   compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12251   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12252   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12253   set_jumps(jumplist, LABEL());
12254   if (private_data_ptr == 0)
12255     free_stack(common, 1);
12256   break;
12257 
12258   case OP_MINUPTO:
12259   OP1(SLJIT_MOV, TMP1, 0, base, offset1);
12260   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12261   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
12262   add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
12263 
12264   OP1(SLJIT_MOV, base, offset1, TMP1, 0);
12265   compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12266   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12267   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12268 
12269   set_jumps(jumplist, LABEL());
12270   if (private_data_ptr == 0)
12271     free_stack(common, 2);
12272   break;
12273 
12274   case OP_QUERY:
12275   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12276   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12277   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12278   jump = JUMP(SLJIT_JUMP);
12279   set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12280   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12281   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12282   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12283   JUMPHERE(jump);
12284   if (private_data_ptr == 0)
12285     free_stack(common, 1);
12286   break;
12287 
12288   case OP_MINQUERY:
12289   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12290   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12291   jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12292   compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12293   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12294   set_jumps(jumplist, LABEL());
12295   JUMPHERE(jump);
12296   if (private_data_ptr == 0)
12297     free_stack(common, 1);
12298   break;
12299 
12300   case OP_EXACT:
12301   case OP_POSSTAR:
12302   case OP_POSQUERY:
12303   case OP_POSUPTO:
12304   break;
12305 
12306   default:
12307   SLJIT_UNREACHABLE();
12308   break;
12309   }
12310 
12311 set_jumps(current->topbacktracks, LABEL());
12312 }
12313 
compile_ref_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)12314 static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12315 {
12316 DEFINE_COMPILER;
12317 PCRE2_SPTR cc = current->cc;
12318 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
12319 PCRE2_UCHAR type;
12320 
12321 type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
12322 
12323 if ((type & 0x1) == 0)
12324   {
12325   /* Maximize case. */
12326   set_jumps(current->topbacktracks, LABEL());
12327   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12328   free_stack(common, 1);
12329   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12330   return;
12331   }
12332 
12333 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12334 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12335 set_jumps(current->topbacktracks, LABEL());
12336 free_stack(common, ref ? 2 : 3);
12337 }
12338 
compile_recurse_backtrackingpath(compiler_common * common,struct backtrack_common * current)12339 static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12340 {
12341 DEFINE_COMPILER;
12342 recurse_entry *entry;
12343 
12344 if (!CURRENT_AS(recurse_backtrack)->inlined_pattern)
12345   {
12346   entry = CURRENT_AS(recurse_backtrack)->entry;
12347   if (entry->backtrack_label == NULL)
12348     add_jump(compiler, &entry->backtrack_calls, JUMP(SLJIT_FAST_CALL));
12349   else
12350     JUMPTO(SLJIT_FAST_CALL, entry->backtrack_label);
12351   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(recurse_backtrack)->matchingpath);
12352   }
12353 else
12354   compile_backtrackingpath(common, current->top);
12355 
12356 set_jumps(current->topbacktracks, LABEL());
12357 }
12358 
compile_assert_backtrackingpath(compiler_common * common,struct backtrack_common * current)12359 static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12360 {
12361 DEFINE_COMPILER;
12362 PCRE2_SPTR cc = current->cc;
12363 PCRE2_UCHAR bra = OP_BRA;
12364 struct sljit_jump *brajump = NULL;
12365 
12366 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
12367 if (*cc == OP_BRAZERO)
12368   {
12369   bra = *cc;
12370   cc++;
12371   }
12372 
12373 if (bra == OP_BRAZERO)
12374   {
12375   SLJIT_ASSERT(current->topbacktracks == NULL);
12376   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12377   }
12378 
12379 if (CURRENT_AS(assert_backtrack)->framesize < 0)
12380   {
12381   set_jumps(current->topbacktracks, LABEL());
12382 
12383   if (bra == OP_BRAZERO)
12384     {
12385     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12386     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12387     free_stack(common, 1);
12388     }
12389   return;
12390   }
12391 
12392 if (bra == OP_BRAZERO)
12393   {
12394   if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
12395     {
12396     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12397     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12398     free_stack(common, 1);
12399     return;
12400     }
12401   free_stack(common, 1);
12402   brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12403   }
12404 
12405 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
12406   {
12407   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
12408   add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12409   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12410   OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(assert_backtrack)->framesize - 1) * sizeof(sljit_sw));
12411   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, TMP1, 0);
12412 
12413   set_jumps(current->topbacktracks, LABEL());
12414   }
12415 else
12416   set_jumps(current->topbacktracks, LABEL());
12417 
12418 if (bra == OP_BRAZERO)
12419   {
12420   /* We know there is enough place on the stack. */
12421   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
12422   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12423   JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
12424   JUMPHERE(brajump);
12425   }
12426 }
12427 
compile_bracket_backtrackingpath(compiler_common * common,struct backtrack_common * current)12428 static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12429 {
12430 DEFINE_COMPILER;
12431 int opcode, stacksize, alt_count, alt_max;
12432 int offset = 0;
12433 int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
12434 int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
12435 PCRE2_SPTR cc = current->cc;
12436 PCRE2_SPTR ccbegin;
12437 PCRE2_SPTR ccprev;
12438 PCRE2_UCHAR bra = OP_BRA;
12439 PCRE2_UCHAR ket;
12440 assert_backtrack *assert;
12441 BOOL has_alternatives;
12442 BOOL needs_control_head = FALSE;
12443 struct sljit_jump *brazero = NULL;
12444 struct sljit_jump *next_alt = NULL;
12445 struct sljit_jump *once = NULL;
12446 struct sljit_jump *cond = NULL;
12447 struct sljit_label *rmin_label = NULL;
12448 struct sljit_label *exact_label = NULL;
12449 struct sljit_put_label *put_label = NULL;
12450 
12451 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
12452   {
12453   bra = *cc;
12454   cc++;
12455   }
12456 
12457 opcode = *cc;
12458 ccbegin = bracketend(cc) - 1 - LINK_SIZE;
12459 ket = *ccbegin;
12460 if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
12461   {
12462   repeat_ptr = PRIVATE_DATA(ccbegin);
12463   repeat_type = PRIVATE_DATA(ccbegin + 2);
12464   repeat_count = PRIVATE_DATA(ccbegin + 3);
12465   SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
12466   if (repeat_type == OP_UPTO)
12467     ket = OP_KETRMAX;
12468   if (repeat_type == OP_MINUPTO)
12469     ket = OP_KETRMIN;
12470   }
12471 ccbegin = cc;
12472 cc += GET(cc, 1);
12473 has_alternatives = *cc == OP_ALT;
12474 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12475   has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
12476 if (opcode == OP_CBRA || opcode == OP_SCBRA)
12477   offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
12478 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
12479   opcode = OP_SCOND;
12480 
12481 alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
12482 
12483 /* Decoding the needs_control_head in framesize. */
12484 if (opcode == OP_ONCE)
12485   {
12486   needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
12487   CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
12488   }
12489 
12490 if (ket != OP_KET && repeat_type != 0)
12491   {
12492   /* TMP1 is used in OP_KETRMIN below. */
12493   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12494   free_stack(common, 1);
12495   if (repeat_type == OP_UPTO)
12496     OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
12497   else
12498     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
12499   }
12500 
12501 if (ket == OP_KETRMAX)
12502   {
12503   if (bra == OP_BRAZERO)
12504     {
12505     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12506     free_stack(common, 1);
12507     brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12508     }
12509   }
12510 else if (ket == OP_KETRMIN)
12511   {
12512   if (bra != OP_BRAMINZERO)
12513     {
12514     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12515     if (repeat_type != 0)
12516       {
12517       /* TMP1 was set a few lines above. */
12518       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12519       /* Drop STR_PTR for non-greedy plus quantifier. */
12520       if (opcode != OP_ONCE)
12521         free_stack(common, 1);
12522       }
12523     else if (opcode >= OP_SBRA || opcode == OP_ONCE)
12524       {
12525       /* Checking zero-length iteration. */
12526       if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
12527         CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12528       else
12529         {
12530         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12531         CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12532         }
12533       /* Drop STR_PTR for non-greedy plus quantifier. */
12534       if (opcode != OP_ONCE)
12535         free_stack(common, 1);
12536       }
12537     else
12538       JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12539     }
12540   rmin_label = LABEL();
12541   if (repeat_type != 0)
12542     OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12543   }
12544 else if (bra == OP_BRAZERO)
12545   {
12546   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12547   free_stack(common, 1);
12548   brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12549   }
12550 else if (repeat_type == OP_EXACT)
12551   {
12552   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12553   exact_label = LABEL();
12554   }
12555 
12556 if (offset != 0)
12557   {
12558   if (common->capture_last_ptr != 0)
12559     {
12560     SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
12561     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12562     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12563     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
12564     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12565     free_stack(common, 3);
12566     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
12567     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
12568     }
12569   else if (common->optimized_cbracket[offset >> 1] == 0)
12570     {
12571     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12572     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12573     free_stack(common, 2);
12574     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12575     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12576     }
12577   }
12578 
12579 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
12580   {
12581   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12582     {
12583     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12584     add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12585     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
12586     }
12587   once = JUMP(SLJIT_JUMP);
12588   }
12589 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12590   {
12591   if (has_alternatives)
12592     {
12593     /* Always exactly one alternative. */
12594     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12595     free_stack(common, 1);
12596 
12597     alt_max = 2;
12598     next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12599     }
12600   }
12601 else if (has_alternatives)
12602   {
12603   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12604   free_stack(common, 1);
12605 
12606   if (alt_max > 3)
12607     {
12608     sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
12609 
12610     SLJIT_ASSERT(CURRENT_AS(bracket_backtrack)->u.matching_put_label);
12611     sljit_set_put_label(CURRENT_AS(bracket_backtrack)->u.matching_put_label, LABEL());
12612     sljit_emit_op0(compiler, SLJIT_ENDBR);
12613     }
12614   else
12615     next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12616   }
12617 
12618 COMPILE_BACKTRACKINGPATH(current->top);
12619 if (current->topbacktracks)
12620   set_jumps(current->topbacktracks, LABEL());
12621 
12622 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12623   {
12624   /* Conditional block always has at most one alternative. */
12625   if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
12626     {
12627     SLJIT_ASSERT(has_alternatives);
12628     assert = CURRENT_AS(bracket_backtrack)->u.assert;
12629     if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
12630       {
12631       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
12632       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12633       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12634       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
12635       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
12636       }
12637     cond = JUMP(SLJIT_JUMP);
12638     set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
12639     }
12640   else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
12641     {
12642     SLJIT_ASSERT(has_alternatives);
12643     cond = JUMP(SLJIT_JUMP);
12644     set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
12645     }
12646   else
12647     SLJIT_ASSERT(!has_alternatives);
12648   }
12649 
12650 if (has_alternatives)
12651   {
12652   alt_count = 1;
12653   do
12654     {
12655     current->top = NULL;
12656     current->topbacktracks = NULL;
12657     current->nextbacktracks = NULL;
12658     /* Conditional blocks always have an additional alternative, even if it is empty. */
12659     if (*cc == OP_ALT)
12660       {
12661       ccprev = cc + 1 + LINK_SIZE;
12662       cc += GET(cc, 1);
12663       if (opcode != OP_COND && opcode != OP_SCOND)
12664         {
12665         if (opcode != OP_ONCE)
12666           {
12667           if (private_data_ptr != 0)
12668             OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12669           else
12670             OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12671           }
12672         else
12673           OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
12674         }
12675       compile_matchingpath(common, ccprev, cc, current);
12676       if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
12677         return;
12678 
12679       if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA)
12680         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12681 
12682       if (opcode == OP_SCRIPT_RUN)
12683         match_script_run_common(common, private_data_ptr, current);
12684       }
12685 
12686     /* Instructions after the current alternative is successfully matched. */
12687     /* There is a similar code in compile_bracket_matchingpath. */
12688     if (opcode == OP_ONCE)
12689       match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
12690 
12691     stacksize = 0;
12692     if (repeat_type == OP_MINUPTO)
12693       {
12694       /* We need to preserve the counter. TMP2 will be used below. */
12695       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
12696       stacksize++;
12697       }
12698     if (ket != OP_KET || bra != OP_BRA)
12699       stacksize++;
12700     if (offset != 0)
12701       {
12702       if (common->capture_last_ptr != 0)
12703         stacksize++;
12704       if (common->optimized_cbracket[offset >> 1] == 0)
12705         stacksize += 2;
12706       }
12707     if (opcode != OP_ONCE)
12708       stacksize++;
12709 
12710     if (stacksize > 0)
12711       allocate_stack(common, stacksize);
12712 
12713     stacksize = 0;
12714     if (repeat_type == OP_MINUPTO)
12715       {
12716       /* TMP2 was set above. */
12717       OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
12718       stacksize++;
12719       }
12720 
12721     if (ket != OP_KET || bra != OP_BRA)
12722       {
12723       if (ket != OP_KET)
12724         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
12725       else
12726         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
12727       stacksize++;
12728       }
12729 
12730     if (offset != 0)
12731       stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
12732 
12733     if (opcode != OP_ONCE)
12734       {
12735       if (alt_max <= 3)
12736         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
12737       else
12738         put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
12739       }
12740 
12741     if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
12742       {
12743       /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
12744       SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
12745       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
12746       }
12747 
12748     JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
12749 
12750     if (opcode != OP_ONCE)
12751       {
12752       if (alt_max <= 3)
12753         {
12754         JUMPHERE(next_alt);
12755         alt_count++;
12756         if (alt_count < alt_max)
12757           {
12758           SLJIT_ASSERT(alt_count == 2 && alt_max == 3);
12759           next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
12760           }
12761         }
12762       else
12763         {
12764         sljit_set_put_label(put_label, LABEL());
12765         sljit_emit_op0(compiler, SLJIT_ENDBR);
12766         }
12767       }
12768 
12769     COMPILE_BACKTRACKINGPATH(current->top);
12770     if (current->topbacktracks)
12771       set_jumps(current->topbacktracks, LABEL());
12772     SLJIT_ASSERT(!current->nextbacktracks);
12773     }
12774   while (*cc == OP_ALT);
12775 
12776   if (cond != NULL)
12777     {
12778     SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
12779     assert = CURRENT_AS(bracket_backtrack)->u.assert;
12780     if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
12781       {
12782       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
12783       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12784       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12785       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
12786       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
12787       }
12788     JUMPHERE(cond);
12789     }
12790 
12791   /* Free the STR_PTR. */
12792   if (private_data_ptr == 0)
12793     free_stack(common, 1);
12794   }
12795 
12796 if (offset != 0)
12797   {
12798   /* Using both tmp register is better for instruction scheduling. */
12799   if (common->optimized_cbracket[offset >> 1] != 0)
12800     {
12801     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12802     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12803     free_stack(common, 2);
12804     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12805     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12806     }
12807   else
12808     {
12809     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12810     free_stack(common, 1);
12811     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
12812     }
12813   }
12814 else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
12815   {
12816   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
12817   free_stack(common, 1);
12818   }
12819 else if (opcode == OP_ONCE)
12820   {
12821   cc = ccbegin + GET(ccbegin, 1);
12822   stacksize = needs_control_head ? 1 : 0;
12823 
12824   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12825     {
12826     /* Reset head and drop saved frame. */
12827     stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
12828     }
12829   else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
12830     {
12831     /* The STR_PTR must be released. */
12832     stacksize++;
12833     }
12834 
12835   if (stacksize > 0)
12836     free_stack(common, stacksize);
12837 
12838   JUMPHERE(once);
12839   /* Restore previous private_data_ptr */
12840   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12841     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1));
12842   else if (ket == OP_KETRMIN)
12843     {
12844     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12845     /* See the comment below. */
12846     free_stack(common, 2);
12847     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
12848     }
12849   }
12850 
12851 if (repeat_type == OP_EXACT)
12852   {
12853   OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12854   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
12855   CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
12856   }
12857 else if (ket == OP_KETRMAX)
12858   {
12859   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12860   if (bra != OP_BRAZERO)
12861     free_stack(common, 1);
12862 
12863   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12864   if (bra == OP_BRAZERO)
12865     {
12866     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12867     JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
12868     JUMPHERE(brazero);
12869     free_stack(common, 1);
12870     }
12871   }
12872 else if (ket == OP_KETRMIN)
12873   {
12874   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12875 
12876   /* OP_ONCE removes everything in case of a backtrack, so we don't
12877   need to explicitly release the STR_PTR. The extra release would
12878   affect badly the free_stack(2) above. */
12879   if (opcode != OP_ONCE)
12880     free_stack(common, 1);
12881   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
12882   if (opcode == OP_ONCE)
12883     free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
12884   else if (bra == OP_BRAMINZERO)
12885     free_stack(common, 1);
12886   }
12887 else if (bra == OP_BRAZERO)
12888   {
12889   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12890   JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
12891   JUMPHERE(brazero);
12892   }
12893 }
12894 
compile_bracketpos_backtrackingpath(compiler_common * common,struct backtrack_common * current)12895 static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12896 {
12897 DEFINE_COMPILER;
12898 int offset;
12899 struct sljit_jump *jump;
12900 
12901 if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
12902   {
12903   if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
12904     {
12905     offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
12906     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12907     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12908     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12909     if (common->capture_last_ptr != 0)
12910       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12911     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12912     if (common->capture_last_ptr != 0)
12913       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
12914     }
12915   set_jumps(current->topbacktracks, LABEL());
12916   free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
12917   return;
12918   }
12919 
12920 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
12921 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12922 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracketpos_backtrack)->framesize - 1) * sizeof(sljit_sw));
12923 
12924 if (current->topbacktracks)
12925   {
12926   jump = JUMP(SLJIT_JUMP);
12927   set_jumps(current->topbacktracks, LABEL());
12928   /* Drop the stack frame. */
12929   free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
12930   JUMPHERE(jump);
12931   }
12932 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1));
12933 }
12934 
compile_braminzero_backtrackingpath(compiler_common * common,struct backtrack_common * current)12935 static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12936 {
12937 assert_backtrack backtrack;
12938 
12939 current->top = NULL;
12940 current->topbacktracks = NULL;
12941 current->nextbacktracks = NULL;
12942 if (current->cc[1] > OP_ASSERTBACK_NOT)
12943   {
12944   /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
12945   compile_bracket_matchingpath(common, current->cc, current);
12946   compile_bracket_backtrackingpath(common, current->top);
12947   }
12948 else
12949   {
12950   memset(&backtrack, 0, sizeof(backtrack));
12951   backtrack.common.cc = current->cc;
12952   backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
12953   /* Manual call of compile_assert_matchingpath. */
12954   compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
12955   }
12956 SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
12957 }
12958 
compile_control_verb_backtrackingpath(compiler_common * common,struct backtrack_common * current)12959 static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12960 {
12961 DEFINE_COMPILER;
12962 PCRE2_UCHAR opcode = *current->cc;
12963 struct sljit_label *loop;
12964 struct sljit_jump *jump;
12965 
12966 if (opcode == OP_THEN || opcode == OP_THEN_ARG)
12967   {
12968   if (common->then_trap != NULL)
12969     {
12970     SLJIT_ASSERT(common->control_head_ptr != 0);
12971 
12972     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12973     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
12974     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
12975     jump = JUMP(SLJIT_JUMP);
12976 
12977     loop = LABEL();
12978     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12979     JUMPHERE(jump);
12980     CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop);
12981     CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop);
12982     add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
12983     return;
12984     }
12985   else if (!common->local_quit_available && common->in_positive_assertion)
12986     {
12987     add_jump(compiler, &common->positive_assertion_quit, JUMP(SLJIT_JUMP));
12988     return;
12989     }
12990   }
12991 
12992 if (common->local_quit_available)
12993   {
12994   /* Abort match with a fail. */
12995   if (common->quit_label == NULL)
12996     add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
12997   else
12998     JUMPTO(SLJIT_JUMP, common->quit_label);
12999   return;
13000   }
13001 
13002 if (opcode == OP_SKIP_ARG)
13003   {
13004   SLJIT_ASSERT(common->control_head_ptr != 0 && TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
13005   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
13006   OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
13007   sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark));
13008 
13009   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0);
13010   add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0));
13011   return;
13012   }
13013 
13014 if (opcode == OP_SKIP)
13015   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13016 else
13017   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
13018 add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
13019 }
13020 
compile_then_trap_backtrackingpath(compiler_common * common,struct backtrack_common * current)13021 static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13022 {
13023 DEFINE_COMPILER;
13024 struct sljit_jump *jump;
13025 int size;
13026 
13027 if (CURRENT_AS(then_trap_backtrack)->then_trap)
13028   {
13029   common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
13030   return;
13031   }
13032 
13033 size = CURRENT_AS(then_trap_backtrack)->framesize;
13034 size = 3 + (size < 0 ? 0 : size);
13035 
13036 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
13037 free_stack(common, size);
13038 jump = JUMP(SLJIT_JUMP);
13039 
13040 set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
13041 /* STACK_TOP is set by THEN. */
13042 if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
13043   {
13044   add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13045   OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(then_trap_backtrack)->framesize - 1) * sizeof(sljit_sw));
13046   }
13047 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13048 free_stack(common, 3);
13049 
13050 JUMPHERE(jump);
13051 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
13052 }
13053 
compile_backtrackingpath(compiler_common * common,struct backtrack_common * current)13054 static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13055 {
13056 DEFINE_COMPILER;
13057 then_trap_backtrack *save_then_trap = common->then_trap;
13058 
13059 while (current)
13060   {
13061   if (current->nextbacktracks != NULL)
13062     set_jumps(current->nextbacktracks, LABEL());
13063   switch(*current->cc)
13064     {
13065     case OP_SET_SOM:
13066     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13067     free_stack(common, 1);
13068     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
13069     break;
13070 
13071     case OP_STAR:
13072     case OP_MINSTAR:
13073     case OP_PLUS:
13074     case OP_MINPLUS:
13075     case OP_QUERY:
13076     case OP_MINQUERY:
13077     case OP_UPTO:
13078     case OP_MINUPTO:
13079     case OP_EXACT:
13080     case OP_POSSTAR:
13081     case OP_POSPLUS:
13082     case OP_POSQUERY:
13083     case OP_POSUPTO:
13084     case OP_STARI:
13085     case OP_MINSTARI:
13086     case OP_PLUSI:
13087     case OP_MINPLUSI:
13088     case OP_QUERYI:
13089     case OP_MINQUERYI:
13090     case OP_UPTOI:
13091     case OP_MINUPTOI:
13092     case OP_EXACTI:
13093     case OP_POSSTARI:
13094     case OP_POSPLUSI:
13095     case OP_POSQUERYI:
13096     case OP_POSUPTOI:
13097     case OP_NOTSTAR:
13098     case OP_NOTMINSTAR:
13099     case OP_NOTPLUS:
13100     case OP_NOTMINPLUS:
13101     case OP_NOTQUERY:
13102     case OP_NOTMINQUERY:
13103     case OP_NOTUPTO:
13104     case OP_NOTMINUPTO:
13105     case OP_NOTEXACT:
13106     case OP_NOTPOSSTAR:
13107     case OP_NOTPOSPLUS:
13108     case OP_NOTPOSQUERY:
13109     case OP_NOTPOSUPTO:
13110     case OP_NOTSTARI:
13111     case OP_NOTMINSTARI:
13112     case OP_NOTPLUSI:
13113     case OP_NOTMINPLUSI:
13114     case OP_NOTQUERYI:
13115     case OP_NOTMINQUERYI:
13116     case OP_NOTUPTOI:
13117     case OP_NOTMINUPTOI:
13118     case OP_NOTEXACTI:
13119     case OP_NOTPOSSTARI:
13120     case OP_NOTPOSPLUSI:
13121     case OP_NOTPOSQUERYI:
13122     case OP_NOTPOSUPTOI:
13123     case OP_TYPESTAR:
13124     case OP_TYPEMINSTAR:
13125     case OP_TYPEPLUS:
13126     case OP_TYPEMINPLUS:
13127     case OP_TYPEQUERY:
13128     case OP_TYPEMINQUERY:
13129     case OP_TYPEUPTO:
13130     case OP_TYPEMINUPTO:
13131     case OP_TYPEEXACT:
13132     case OP_TYPEPOSSTAR:
13133     case OP_TYPEPOSPLUS:
13134     case OP_TYPEPOSQUERY:
13135     case OP_TYPEPOSUPTO:
13136     case OP_CLASS:
13137     case OP_NCLASS:
13138 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
13139     case OP_XCLASS:
13140 #endif
13141     compile_iterator_backtrackingpath(common, current);
13142     break;
13143 
13144     case OP_REF:
13145     case OP_REFI:
13146     case OP_DNREF:
13147     case OP_DNREFI:
13148     compile_ref_iterator_backtrackingpath(common, current);
13149     break;
13150 
13151     case OP_RECURSE:
13152     compile_recurse_backtrackingpath(common, current);
13153     break;
13154 
13155     case OP_ASSERT:
13156     case OP_ASSERT_NOT:
13157     case OP_ASSERTBACK:
13158     case OP_ASSERTBACK_NOT:
13159     compile_assert_backtrackingpath(common, current);
13160     break;
13161 
13162     case OP_ASSERT_NA:
13163     case OP_ASSERTBACK_NA:
13164     case OP_ONCE:
13165     case OP_SCRIPT_RUN:
13166     case OP_BRA:
13167     case OP_CBRA:
13168     case OP_COND:
13169     case OP_SBRA:
13170     case OP_SCBRA:
13171     case OP_SCOND:
13172     compile_bracket_backtrackingpath(common, current);
13173     break;
13174 
13175     case OP_BRAZERO:
13176     if (current->cc[1] > OP_ASSERTBACK_NOT)
13177       compile_bracket_backtrackingpath(common, current);
13178     else
13179       compile_assert_backtrackingpath(common, current);
13180     break;
13181 
13182     case OP_BRAPOS:
13183     case OP_CBRAPOS:
13184     case OP_SBRAPOS:
13185     case OP_SCBRAPOS:
13186     case OP_BRAPOSZERO:
13187     compile_bracketpos_backtrackingpath(common, current);
13188     break;
13189 
13190     case OP_BRAMINZERO:
13191     compile_braminzero_backtrackingpath(common, current);
13192     break;
13193 
13194     case OP_MARK:
13195     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
13196     if (common->has_skip_arg)
13197       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13198     free_stack(common, common->has_skip_arg ? 5 : 1);
13199     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
13200     if (common->has_skip_arg)
13201       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
13202     break;
13203 
13204     case OP_THEN:
13205     case OP_THEN_ARG:
13206     case OP_PRUNE:
13207     case OP_PRUNE_ARG:
13208     case OP_SKIP:
13209     case OP_SKIP_ARG:
13210     compile_control_verb_backtrackingpath(common, current);
13211     break;
13212 
13213     case OP_COMMIT:
13214     case OP_COMMIT_ARG:
13215     if (!common->local_quit_available)
13216       OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13217     if (common->quit_label == NULL)
13218       add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
13219     else
13220       JUMPTO(SLJIT_JUMP, common->quit_label);
13221     break;
13222 
13223     case OP_CALLOUT:
13224     case OP_CALLOUT_STR:
13225     case OP_FAIL:
13226     case OP_ACCEPT:
13227     case OP_ASSERT_ACCEPT:
13228     set_jumps(current->topbacktracks, LABEL());
13229     break;
13230 
13231     case OP_THEN_TRAP:
13232     /* A virtual opcode for then traps. */
13233     compile_then_trap_backtrackingpath(common, current);
13234     break;
13235 
13236     default:
13237     SLJIT_UNREACHABLE();
13238     break;
13239     }
13240   current = current->prev;
13241   }
13242 common->then_trap = save_then_trap;
13243 }
13244 
compile_recurse(compiler_common * common)13245 static SLJIT_INLINE void compile_recurse(compiler_common *common)
13246 {
13247 DEFINE_COMPILER;
13248 PCRE2_SPTR cc = common->start + common->currententry->start;
13249 PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
13250 PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE);
13251 BOOL needs_control_head;
13252 BOOL has_quit;
13253 BOOL has_accept;
13254 int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &needs_control_head, &has_quit, &has_accept);
13255 int alt_count, alt_max, local_size;
13256 backtrack_common altbacktrack;
13257 jump_list *match = NULL;
13258 struct sljit_jump *next_alt = NULL;
13259 struct sljit_jump *accept_exit = NULL;
13260 struct sljit_label *quit;
13261 struct sljit_put_label *put_label = NULL;
13262 
13263 /* Recurse captures then. */
13264 common->then_trap = NULL;
13265 
13266 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
13267 
13268 alt_max = no_alternatives(cc);
13269 alt_count = 0;
13270 
13271 /* Matching path. */
13272 SLJIT_ASSERT(common->currententry->entry_label == NULL && common->recursive_head_ptr != 0);
13273 common->currententry->entry_label = LABEL();
13274 set_jumps(common->currententry->entry_calls, common->currententry->entry_label);
13275 
13276 sljit_emit_fast_enter(compiler, TMP2, 0);
13277 count_match(common);
13278 
13279 local_size = (alt_max > 1) ? 2 : 1;
13280 
13281 /* (Reversed) stack layout:
13282    [private data][return address][optional: str ptr] ... [optional: alternative index][recursive_head_ptr] */
13283 
13284 allocate_stack(common, private_data_size + local_size);
13285 /* Save return address. */
13286 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0);
13287 
13288 copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, has_quit);
13289 
13290 /* This variable is saved and restored all time when we enter or exit from a recursive context. */
13291 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
13292 
13293 if (needs_control_head)
13294   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13295 
13296 if (alt_max > 1)
13297   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
13298 
13299 memset(&altbacktrack, 0, sizeof(backtrack_common));
13300 common->quit_label = NULL;
13301 common->accept_label = NULL;
13302 common->quit = NULL;
13303 common->accept = NULL;
13304 altbacktrack.cc = ccbegin;
13305 cc += GET(cc, 1);
13306 while (1)
13307   {
13308   altbacktrack.top = NULL;
13309   altbacktrack.topbacktracks = NULL;
13310 
13311   if (altbacktrack.cc != ccbegin)
13312     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13313 
13314   compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
13315   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13316     return;
13317 
13318   allocate_stack(common, (alt_max > 1 || has_accept) ? 2 : 1);
13319   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13320 
13321   if (alt_max > 1 || has_accept)
13322     {
13323     if (alt_max > 3)
13324       put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(1));
13325     else
13326       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
13327     }
13328 
13329   add_jump(compiler, &match, JUMP(SLJIT_JUMP));
13330 
13331   if (alt_count == 0)
13332     {
13333     /* Backtracking path entry. */
13334     SLJIT_ASSERT(common->currententry->backtrack_label == NULL);
13335     common->currententry->backtrack_label = LABEL();
13336     set_jumps(common->currententry->backtrack_calls, common->currententry->backtrack_label);
13337 
13338     sljit_emit_fast_enter(compiler, TMP1, 0);
13339 
13340     if (has_accept)
13341       accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
13342 
13343     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13344     /* Save return address. */
13345     OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0);
13346 
13347     copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit);
13348 
13349     if (alt_max > 1)
13350       {
13351       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13352       free_stack(common, 2);
13353 
13354       if (alt_max > 3)
13355         {
13356         sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
13357         sljit_set_put_label(put_label, LABEL());
13358         sljit_emit_op0(compiler, SLJIT_ENDBR);
13359         }
13360       else
13361         next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13362       }
13363     else
13364       free_stack(common, has_accept ? 2 : 1);
13365     }
13366   else if (alt_max > 3)
13367     {
13368     sljit_set_put_label(put_label, LABEL());
13369     sljit_emit_op0(compiler, SLJIT_ENDBR);
13370     }
13371   else
13372     {
13373     JUMPHERE(next_alt);
13374     if (alt_count + 1 < alt_max)
13375       {
13376       SLJIT_ASSERT(alt_count == 1 && alt_max == 3);
13377       next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
13378       }
13379     }
13380 
13381   alt_count++;
13382 
13383   compile_backtrackingpath(common, altbacktrack.top);
13384   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13385     return;
13386   set_jumps(altbacktrack.topbacktracks, LABEL());
13387 
13388   if (*cc != OP_ALT)
13389     break;
13390 
13391   altbacktrack.cc = cc + 1 + LINK_SIZE;
13392   cc += GET(cc, 1);
13393   }
13394 
13395 /* No alternative is matched. */
13396 
13397 quit = LABEL();
13398 
13399 copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, has_quit);
13400 
13401 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
13402 free_stack(common, private_data_size + local_size);
13403 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
13404 OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13405 
13406 if (common->quit != NULL)
13407   {
13408   SLJIT_ASSERT(has_quit);
13409 
13410   set_jumps(common->quit, LABEL());
13411   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13412   copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, has_quit);
13413   JUMPTO(SLJIT_JUMP, quit);
13414   }
13415 
13416 if (has_accept)
13417   {
13418   JUMPHERE(accept_exit);
13419   free_stack(common, 2);
13420 
13421   /* Save return address. */
13422   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0);
13423 
13424   copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, has_quit);
13425 
13426   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
13427   free_stack(common, private_data_size + local_size);
13428   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
13429   OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13430   }
13431 
13432 if (common->accept != NULL)
13433   {
13434   SLJIT_ASSERT(has_accept);
13435 
13436   set_jumps(common->accept, LABEL());
13437 
13438   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13439   OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
13440 
13441   allocate_stack(common, 2);
13442   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
13443   }
13444 
13445 set_jumps(match, LABEL());
13446 
13447 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
13448 
13449 copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit);
13450 
13451 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1));
13452 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
13453 OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13454 }
13455 
13456 #undef COMPILE_BACKTRACKINGPATH
13457 #undef CURRENT_AS
13458 
13459 #define PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS \
13460   (PCRE2_JIT_INVALID_UTF)
13461 
jit_compile(pcre2_code * code,sljit_u32 mode)13462 static int jit_compile(pcre2_code *code, sljit_u32 mode)
13463 {
13464 pcre2_real_code *re = (pcre2_real_code *)code;
13465 struct sljit_compiler *compiler;
13466 backtrack_common rootbacktrack;
13467 compiler_common common_data;
13468 compiler_common *common = &common_data;
13469 const sljit_u8 *tables = re->tables;
13470 void *allocator_data = &re->memctl;
13471 int private_data_size;
13472 PCRE2_SPTR ccend;
13473 executable_functions *functions;
13474 void *executable_func;
13475 sljit_uw executable_size;
13476 sljit_uw total_length;
13477 struct sljit_label *mainloop_label = NULL;
13478 struct sljit_label *continue_match_label;
13479 struct sljit_label *empty_match_found_label = NULL;
13480 struct sljit_label *empty_match_backtrack_label = NULL;
13481 struct sljit_label *reset_match_label;
13482 struct sljit_label *quit_label;
13483 struct sljit_jump *jump;
13484 struct sljit_jump *minlength_check_failed = NULL;
13485 struct sljit_jump *empty_match = NULL;
13486 struct sljit_jump *end_anchor_failed = NULL;
13487 jump_list *reqcu_not_found = NULL;
13488 
13489 SLJIT_ASSERT(tables);
13490 
13491 #if HAS_VIRTUAL_REGISTERS == 1
13492 SLJIT_ASSERT(sljit_get_register_index(TMP3) < 0 && sljit_get_register_index(ARGUMENTS) < 0 && sljit_get_register_index(RETURN_ADDR) < 0);
13493 #elif HAS_VIRTUAL_REGISTERS == 0
13494 SLJIT_ASSERT(sljit_get_register_index(TMP3) >= 0 && sljit_get_register_index(ARGUMENTS) >= 0 && sljit_get_register_index(RETURN_ADDR) >= 0);
13495 #else
13496 #error "Invalid value for HAS_VIRTUAL_REGISTERS"
13497 #endif
13498 
13499 memset(&rootbacktrack, 0, sizeof(backtrack_common));
13500 memset(common, 0, sizeof(compiler_common));
13501 common->re = re;
13502 common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
13503 rootbacktrack.cc = common->name_table + re->name_count * re->name_entry_size;
13504 
13505 #ifdef SUPPORT_UNICODE
13506 common->invalid_utf = (mode & PCRE2_JIT_INVALID_UTF) != 0;
13507 #endif /* SUPPORT_UNICODE */
13508 mode &= ~PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS;
13509 
13510 common->start = rootbacktrack.cc;
13511 common->read_only_data_head = NULL;
13512 common->fcc = tables + fcc_offset;
13513 common->lcc = (sljit_sw)(tables + lcc_offset);
13514 common->mode = mode;
13515 common->might_be_empty = (re->minlength == 0) || (re->flags & PCRE2_MATCH_EMPTY);
13516 common->allow_empty_partial = (re->max_lookbehind > 0) || (re->flags & PCRE2_MATCH_EMPTY);
13517 common->nltype = NLTYPE_FIXED;
13518 switch(re->newline_convention)
13519   {
13520   case PCRE2_NEWLINE_CR: common->newline = CHAR_CR; break;
13521   case PCRE2_NEWLINE_LF: common->newline = CHAR_NL; break;
13522   case PCRE2_NEWLINE_CRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
13523   case PCRE2_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
13524   case PCRE2_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
13525   case PCRE2_NEWLINE_NUL: common->newline = CHAR_NUL; break;
13526   default: return PCRE2_ERROR_INTERNAL;
13527   }
13528 common->nlmax = READ_CHAR_MAX;
13529 common->nlmin = 0;
13530 if (re->bsr_convention == PCRE2_BSR_UNICODE)
13531   common->bsr_nltype = NLTYPE_ANY;
13532 else if (re->bsr_convention == PCRE2_BSR_ANYCRLF)
13533   common->bsr_nltype = NLTYPE_ANYCRLF;
13534 else
13535   {
13536 #ifdef BSR_ANYCRLF
13537   common->bsr_nltype = NLTYPE_ANYCRLF;
13538 #else
13539   common->bsr_nltype = NLTYPE_ANY;
13540 #endif
13541   }
13542 common->bsr_nlmax = READ_CHAR_MAX;
13543 common->bsr_nlmin = 0;
13544 common->endonly = (re->overall_options & PCRE2_DOLLAR_ENDONLY) != 0;
13545 common->ctypes = (sljit_sw)(tables + ctypes_offset);
13546 common->name_count = re->name_count;
13547 common->name_entry_size = re->name_entry_size;
13548 common->unset_backref = (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) != 0;
13549 common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0;
13550 #ifdef SUPPORT_UNICODE
13551 /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
13552 common->utf = (re->overall_options & PCRE2_UTF) != 0;
13553 common->ucp = (re->overall_options & PCRE2_UCP) != 0;
13554 if (common->utf)
13555   {
13556   if (common->nltype == NLTYPE_ANY)
13557     common->nlmax = 0x2029;
13558   else if (common->nltype == NLTYPE_ANYCRLF)
13559     common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
13560   else
13561     {
13562     /* We only care about the first newline character. */
13563     common->nlmax = common->newline & 0xff;
13564     }
13565 
13566   if (common->nltype == NLTYPE_FIXED)
13567     common->nlmin = common->newline & 0xff;
13568   else
13569     common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
13570 
13571   if (common->bsr_nltype == NLTYPE_ANY)
13572     common->bsr_nlmax = 0x2029;
13573   else
13574     common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
13575   common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
13576   }
13577 else
13578   common->invalid_utf = FALSE;
13579 #endif /* SUPPORT_UNICODE */
13580 ccend = bracketend(common->start);
13581 
13582 /* Calculate the local space size on the stack. */
13583 common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
13584 common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, allocator_data);
13585 if (!common->optimized_cbracket)
13586   return PCRE2_ERROR_NOMEMORY;
13587 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
13588 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
13589 #else
13590 memset(common->optimized_cbracket, 1, re->top_bracket + 1);
13591 #endif
13592 
13593 SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
13594 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
13595 common->capture_last_ptr = common->ovector_start;
13596 common->ovector_start += sizeof(sljit_sw);
13597 #endif
13598 if (!check_opcode_types(common, common->start, ccend))
13599   {
13600   SLJIT_FREE(common->optimized_cbracket, allocator_data);
13601   return PCRE2_ERROR_NOMEMORY;
13602   }
13603 
13604 /* Checking flags and updating ovector_start. */
13605 if (mode == PCRE2_JIT_COMPLETE && (re->flags & PCRE2_LASTSET) != 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13606   {
13607   common->req_char_ptr = common->ovector_start;
13608   common->ovector_start += sizeof(sljit_sw);
13609   }
13610 if (mode != PCRE2_JIT_COMPLETE)
13611   {
13612   common->start_used_ptr = common->ovector_start;
13613   common->ovector_start += sizeof(sljit_sw);
13614   if (mode == PCRE2_JIT_PARTIAL_SOFT)
13615     {
13616     common->hit_start = common->ovector_start;
13617     common->ovector_start += sizeof(sljit_sw);
13618     }
13619   }
13620 if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0)
13621   {
13622   common->match_end_ptr = common->ovector_start;
13623   common->ovector_start += sizeof(sljit_sw);
13624   }
13625 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
13626 common->control_head_ptr = 1;
13627 #endif
13628 if (common->control_head_ptr != 0)
13629   {
13630   common->control_head_ptr = common->ovector_start;
13631   common->ovector_start += sizeof(sljit_sw);
13632   }
13633 if (common->has_set_som)
13634   {
13635   /* Saving the real start pointer is necessary. */
13636   common->start_ptr = common->ovector_start;
13637   common->ovector_start += sizeof(sljit_sw);
13638   }
13639 
13640 /* Aligning ovector to even number of sljit words. */
13641 if ((common->ovector_start & sizeof(sljit_sw)) != 0)
13642   common->ovector_start += sizeof(sljit_sw);
13643 
13644 if (common->start_ptr == 0)
13645   common->start_ptr = OVECTOR(0);
13646 
13647 /* Capturing brackets cannot be optimized if callouts are allowed. */
13648 if (common->capture_last_ptr != 0)
13649   memset(common->optimized_cbracket, 0, re->top_bracket + 1);
13650 
13651 SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
13652 common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
13653 
13654 total_length = ccend - common->start;
13655 common->private_data_ptrs = (sljit_s32 *)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data);
13656 if (!common->private_data_ptrs)
13657   {
13658   SLJIT_FREE(common->optimized_cbracket, allocator_data);
13659   return PCRE2_ERROR_NOMEMORY;
13660   }
13661 memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
13662 
13663 private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
13664 
13665 if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && !common->has_skip_in_assert_back)
13666   detect_early_fail(common, common->start, &private_data_size, 0, 0, TRUE);
13667 
13668 set_private_data_ptrs(common, &private_data_size, ccend);
13669 
13670 SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr);
13671 
13672 if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
13673   {
13674   SLJIT_FREE(common->private_data_ptrs, allocator_data);
13675   SLJIT_FREE(common->optimized_cbracket, allocator_data);
13676   return PCRE2_ERROR_NOMEMORY;
13677   }
13678 
13679 if (common->has_then)
13680   {
13681   common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length);
13682   memset(common->then_offsets, 0, total_length);
13683   set_then_offsets(common, common->start, NULL);
13684   }
13685 
13686 compiler = sljit_create_compiler(allocator_data, NULL);
13687 if (!compiler)
13688   {
13689   SLJIT_FREE(common->optimized_cbracket, allocator_data);
13690   SLJIT_FREE(common->private_data_ptrs, allocator_data);
13691   return PCRE2_ERROR_NOMEMORY;
13692   }
13693 common->compiler = compiler;
13694 
13695 /* Main pcre_jit_exec entry. */
13696 sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 5, 5, 0, 0, private_data_size);
13697 
13698 /* Register init. */
13699 reset_ovector(common, (re->top_bracket + 1) * 2);
13700 if (common->req_char_ptr != 0)
13701   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
13702 
13703 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
13704 OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
13705 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
13706 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
13707 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
13708 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
13709 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, end));
13710 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, start));
13711 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
13712 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
13713 
13714 if (common->early_fail_start_ptr < common->early_fail_end_ptr)
13715   reset_early_fail(common);
13716 
13717 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13718   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
13719 if (common->mark_ptr != 0)
13720   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
13721 if (common->control_head_ptr != 0)
13722   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13723 
13724 /* Main part of the matching */
13725 if ((re->overall_options & PCRE2_ANCHORED) == 0)
13726   {
13727   mainloop_label = mainloop_entry(common);
13728   continue_match_label = LABEL();
13729   /* Forward search if possible. */
13730   if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13731     {
13732     if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common))
13733       ;
13734     else if ((re->flags & PCRE2_FIRSTSET) != 0)
13735       fast_forward_first_char(common);
13736     else if ((re->flags & PCRE2_STARTLINE) != 0)
13737       fast_forward_newline(common);
13738     else if ((re->flags & PCRE2_FIRSTMAPSET) != 0)
13739       fast_forward_start_bits(common);
13740     }
13741   }
13742 else
13743   continue_match_label = LABEL();
13744 
13745 if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13746   {
13747   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13748   OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(re->minlength));
13749   minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
13750   }
13751 if (common->req_char_ptr != 0)
13752   reqcu_not_found = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0);
13753 
13754 /* Store the current STR_PTR in OVECTOR(0). */
13755 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
13756 /* Copy the limit of allowed recursions. */
13757 OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
13758 if (common->capture_last_ptr != 0)
13759   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, 0);
13760 if (common->fast_forward_bc_ptr != NULL)
13761   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3, STR_PTR, 0);
13762 
13763 if (common->start_ptr != OVECTOR(0))
13764   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
13765 
13766 /* Copy the beginning of the string. */
13767 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13768   {
13769   jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
13770   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
13771   JUMPHERE(jump);
13772   }
13773 else if (mode == PCRE2_JIT_PARTIAL_HARD)
13774   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
13775 
13776 compile_matchingpath(common, common->start, ccend, &rootbacktrack);
13777 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13778   {
13779   sljit_free_compiler(compiler);
13780   SLJIT_FREE(common->optimized_cbracket, allocator_data);
13781   SLJIT_FREE(common->private_data_ptrs, allocator_data);
13782   PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13783   return PCRE2_ERROR_NOMEMORY;
13784   }
13785 
13786 if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
13787   end_anchor_failed = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0);
13788 
13789 if (common->might_be_empty)
13790   {
13791   empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
13792   empty_match_found_label = LABEL();
13793   }
13794 
13795 common->accept_label = LABEL();
13796 if (common->accept != NULL)
13797   set_jumps(common->accept, common->accept_label);
13798 
13799 /* This means we have a match. Update the ovector. */
13800 copy_ovector(common, re->top_bracket + 1);
13801 common->quit_label = common->abort_label = LABEL();
13802 if (common->quit != NULL)
13803   set_jumps(common->quit, common->quit_label);
13804 if (common->abort != NULL)
13805   set_jumps(common->abort, common->abort_label);
13806 if (minlength_check_failed != NULL)
13807   SET_LABEL(minlength_check_failed, common->abort_label);
13808 
13809 sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN);
13810 sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
13811 
13812 if (common->failed_match != NULL)
13813   {
13814   SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
13815   set_jumps(common->failed_match, LABEL());
13816   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13817   JUMPTO(SLJIT_JUMP, common->abort_label);
13818   }
13819 
13820 if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
13821   JUMPHERE(end_anchor_failed);
13822 
13823 if (mode != PCRE2_JIT_COMPLETE)
13824   {
13825   common->partialmatchlabel = LABEL();
13826   set_jumps(common->partialmatch, common->partialmatchlabel);
13827   return_with_partial_match(common, common->quit_label);
13828   }
13829 
13830 if (common->might_be_empty)
13831   empty_match_backtrack_label = LABEL();
13832 compile_backtrackingpath(common, rootbacktrack.top);
13833 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13834   {
13835   sljit_free_compiler(compiler);
13836   SLJIT_FREE(common->optimized_cbracket, allocator_data);
13837   SLJIT_FREE(common->private_data_ptrs, allocator_data);
13838   PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13839   return PCRE2_ERROR_NOMEMORY;
13840   }
13841 
13842 SLJIT_ASSERT(rootbacktrack.prev == NULL);
13843 reset_match_label = LABEL();
13844 
13845 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13846   {
13847   /* Update hit_start only in the first time. */
13848   jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
13849   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
13850   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
13851   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
13852   JUMPHERE(jump);
13853   }
13854 
13855 /* Check we have remaining characters. */
13856 if ((re->overall_options & PCRE2_ANCHORED) == 0 && common->match_end_ptr != 0)
13857   {
13858   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
13859   }
13860 
13861 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
13862     (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3) : common->start_ptr);
13863 
13864 if ((re->overall_options & PCRE2_ANCHORED) == 0)
13865   {
13866   if (common->ff_newline_shortcut != NULL)
13867     {
13868     /* There cannot be more newlines if PCRE2_FIRSTLINE is set. */
13869     if ((re->overall_options & PCRE2_FIRSTLINE) == 0)
13870       {
13871       if (common->match_end_ptr != 0)
13872         {
13873         OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
13874         OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
13875         CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, common->ff_newline_shortcut);
13876         OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
13877         }
13878       else
13879         CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
13880       }
13881     }
13882   else
13883     CMPTO(SLJIT_LESS, STR_PTR, 0, (common->match_end_ptr == 0) ? STR_END : TMP1, 0, mainloop_label);
13884   }
13885 
13886 /* No more remaining characters. */
13887 if (reqcu_not_found != NULL)
13888   set_jumps(reqcu_not_found, LABEL());
13889 
13890 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13891   CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
13892 
13893 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13894 JUMPTO(SLJIT_JUMP, common->quit_label);
13895 
13896 flush_stubs(common);
13897 
13898 if (common->might_be_empty)
13899   {
13900   JUMPHERE(empty_match);
13901   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
13902   OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
13903   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
13904   JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label);
13905   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
13906   JUMPTO(SLJIT_ZERO, empty_match_found_label);
13907   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
13908   CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
13909   JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
13910   }
13911 
13912 common->fast_forward_bc_ptr = NULL;
13913 common->early_fail_start_ptr = 0;
13914 common->early_fail_end_ptr = 0;
13915 common->currententry = common->entries;
13916 common->local_quit_available = TRUE;
13917 quit_label = common->quit_label;
13918 while (common->currententry != NULL)
13919   {
13920   /* Might add new entries. */
13921   compile_recurse(common);
13922   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13923     {
13924     sljit_free_compiler(compiler);
13925     SLJIT_FREE(common->optimized_cbracket, allocator_data);
13926     SLJIT_FREE(common->private_data_ptrs, allocator_data);
13927     PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13928     return PCRE2_ERROR_NOMEMORY;
13929     }
13930   flush_stubs(common);
13931   common->currententry = common->currententry->next;
13932   }
13933 common->local_quit_available = FALSE;
13934 common->quit_label = quit_label;
13935 
13936 /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
13937 /* This is a (really) rare case. */
13938 set_jumps(common->stackalloc, LABEL());
13939 /* RETURN_ADDR is not a saved register. */
13940 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
13941 
13942 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
13943 
13944 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
13945 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
13946 OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE);
13947 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack));
13948 OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0);
13949 
13950 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
13951 
13952 jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
13953 OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0);
13954 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0);
13955 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
13956 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
13957 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
13958 
13959 /* Allocation failed. */
13960 JUMPHERE(jump);
13961 /* We break the return address cache here, but this is a really rare case. */
13962 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_JIT_STACKLIMIT);
13963 JUMPTO(SLJIT_JUMP, common->quit_label);
13964 
13965 /* Call limit reached. */
13966 set_jumps(common->calllimit, LABEL());
13967 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_MATCHLIMIT);
13968 JUMPTO(SLJIT_JUMP, common->quit_label);
13969 
13970 if (common->revertframes != NULL)
13971   {
13972   set_jumps(common->revertframes, LABEL());
13973   do_revertframes(common);
13974   }
13975 if (common->wordboundary != NULL)
13976   {
13977   set_jumps(common->wordboundary, LABEL());
13978   check_wordboundary(common);
13979   }
13980 if (common->anynewline != NULL)
13981   {
13982   set_jumps(common->anynewline, LABEL());
13983   check_anynewline(common);
13984   }
13985 if (common->hspace != NULL)
13986   {
13987   set_jumps(common->hspace, LABEL());
13988   check_hspace(common);
13989   }
13990 if (common->vspace != NULL)
13991   {
13992   set_jumps(common->vspace, LABEL());
13993   check_vspace(common);
13994   }
13995 if (common->casefulcmp != NULL)
13996   {
13997   set_jumps(common->casefulcmp, LABEL());
13998   do_casefulcmp(common);
13999   }
14000 if (common->caselesscmp != NULL)
14001   {
14002   set_jumps(common->caselesscmp, LABEL());
14003   do_caselesscmp(common);
14004   }
14005 if (common->reset_match != NULL)
14006   {
14007   set_jumps(common->reset_match, LABEL());
14008   do_reset_match(common, (re->top_bracket + 1) * 2);
14009   CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
14010   OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
14011   JUMPTO(SLJIT_JUMP, reset_match_label);
14012   }
14013 #ifdef SUPPORT_UNICODE
14014 #if PCRE2_CODE_UNIT_WIDTH == 8
14015 if (common->utfreadchar != NULL)
14016   {
14017   set_jumps(common->utfreadchar, LABEL());
14018   do_utfreadchar(common);
14019   }
14020 if (common->utfreadtype8 != NULL)
14021   {
14022   set_jumps(common->utfreadtype8, LABEL());
14023   do_utfreadtype8(common);
14024   }
14025 if (common->utfpeakcharback != NULL)
14026   {
14027   set_jumps(common->utfpeakcharback, LABEL());
14028   do_utfpeakcharback(common);
14029   }
14030 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
14031 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
14032 if (common->utfreadchar_invalid != NULL)
14033   {
14034   set_jumps(common->utfreadchar_invalid, LABEL());
14035   do_utfreadchar_invalid(common);
14036   }
14037 if (common->utfreadnewline_invalid != NULL)
14038   {
14039   set_jumps(common->utfreadnewline_invalid, LABEL());
14040   do_utfreadnewline_invalid(common);
14041   }
14042 if (common->utfmoveback_invalid)
14043   {
14044   set_jumps(common->utfmoveback_invalid, LABEL());
14045   do_utfmoveback_invalid(common);
14046   }
14047 if (common->utfpeakcharback_invalid)
14048   {
14049   set_jumps(common->utfpeakcharback_invalid, LABEL());
14050   do_utfpeakcharback_invalid(common);
14051   }
14052 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 */
14053 if (common->getucd != NULL)
14054   {
14055   set_jumps(common->getucd, LABEL());
14056   do_getucd(common);
14057   }
14058 if (common->getucdtype != NULL)
14059   {
14060   set_jumps(common->getucdtype, LABEL());
14061   do_getucdtype(common);
14062   }
14063 #endif /* SUPPORT_UNICODE */
14064 
14065 SLJIT_FREE(common->optimized_cbracket, allocator_data);
14066 SLJIT_FREE(common->private_data_ptrs, allocator_data);
14067 
14068 executable_func = sljit_generate_code(compiler);
14069 executable_size = sljit_get_generated_code_size(compiler);
14070 sljit_free_compiler(compiler);
14071 
14072 if (executable_func == NULL)
14073   {
14074   PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14075   return PCRE2_ERROR_NOMEMORY;
14076   }
14077 
14078 /* Reuse the function descriptor if possible. */
14079 if (re->executable_jit != NULL)
14080   functions = (executable_functions *)re->executable_jit;
14081 else
14082   {
14083   functions = SLJIT_MALLOC(sizeof(executable_functions), allocator_data);
14084   if (functions == NULL)
14085     {
14086     /* This case is highly unlikely since we just recently
14087     freed a lot of memory. Not impossible though. */
14088     sljit_free_code(executable_func, NULL);
14089     PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14090     return PCRE2_ERROR_NOMEMORY;
14091     }
14092   memset(functions, 0, sizeof(executable_functions));
14093   functions->top_bracket = re->top_bracket + 1;
14094   functions->limit_match = re->limit_match;
14095   re->executable_jit = functions;
14096   }
14097 
14098 /* Turn mode into an index. */
14099 if (mode == PCRE2_JIT_COMPLETE)
14100   mode = 0;
14101 else
14102   mode = (mode == PCRE2_JIT_PARTIAL_SOFT) ? 1 : 2;
14103 
14104 SLJIT_ASSERT(mode < JIT_NUMBER_OF_COMPILE_MODES);
14105 functions->executable_funcs[mode] = executable_func;
14106 functions->read_only_data_heads[mode] = common->read_only_data_head;
14107 functions->executable_sizes[mode] = executable_size;
14108 return 0;
14109 }
14110 
14111 #endif
14112 
14113 /*************************************************
14114 *        JIT compile a Regular Expression        *
14115 *************************************************/
14116 
14117 /* This function used JIT to convert a previously-compiled pattern into machine
14118 code.
14119 
14120 Arguments:
14121   code          a compiled pattern
14122   options       JIT option bits
14123 
14124 Returns:        0: success or (*NOJIT) was used
14125                <0: an error code
14126 */
14127 
14128 #define PUBLIC_JIT_COMPILE_OPTIONS \
14129   (PCRE2_JIT_COMPLETE|PCRE2_JIT_PARTIAL_SOFT|PCRE2_JIT_PARTIAL_HARD|PCRE2_JIT_INVALID_UTF)
14130 
14131 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_jit_compile(pcre2_code * code,uint32_t options)14132 pcre2_jit_compile(pcre2_code *code, uint32_t options)
14133 {
14134 pcre2_real_code *re = (pcre2_real_code *)code;
14135 #ifdef SUPPORT_JIT
14136 executable_functions *functions;
14137 static int executable_allocator_is_working = 0;
14138 #endif
14139 
14140 if (code == NULL)
14141   return PCRE2_ERROR_NULL;
14142 
14143 if ((options & ~PUBLIC_JIT_COMPILE_OPTIONS) != 0)
14144   return PCRE2_ERROR_JIT_BADOPTION;
14145 
14146 /* Support for invalid UTF was first introduced in JIT, with the option
14147 PCRE2_JIT_INVALID_UTF. Later, support was added to the interpreter, and the
14148 compile-time option PCRE2_MATCH_INVALID_UTF was created. This is now the
14149 preferred feature, with the earlier option deprecated. However, for backward
14150 compatibility, if the earlier option is set, it forces the new option so that
14151 if JIT matching falls back to the interpreter, there is still support for
14152 invalid UTF. However, if this function has already been successfully called
14153 without PCRE2_JIT_INVALID_UTF and without PCRE2_MATCH_INVALID_UTF (meaning that
14154 non-invalid-supporting JIT code was compiled), give an error.
14155 
14156 If in the future support for PCRE2_JIT_INVALID_UTF is withdrawn, the following
14157 actions are needed:
14158 
14159   1. Remove the definition from pcre2.h.in and from the list in
14160      PUBLIC_JIT_COMPILE_OPTIONS above.
14161 
14162   2. Replace PCRE2_JIT_INVALID_UTF with a local flag in this module.
14163 
14164   3. Replace PCRE2_JIT_INVALID_UTF in pcre2_jit_test.c.
14165 
14166   4. Delete the following short block of code. The setting of "re" and
14167      "functions" can be moved into the JIT-only block below, but if that is
14168      done, (void)re and (void)functions will be needed in the non-JIT case, to
14169      avoid compiler warnings.
14170 */
14171 
14172 #ifdef SUPPORT_JIT
14173 functions = (executable_functions *)re->executable_jit;
14174 #endif
14175 
14176 if ((options & PCRE2_JIT_INVALID_UTF) != 0)
14177   {
14178   if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) == 0)
14179     {
14180 #ifdef SUPPORT_JIT
14181     if (functions != NULL) return PCRE2_ERROR_JIT_BADOPTION;
14182 #endif
14183     re->overall_options |= PCRE2_MATCH_INVALID_UTF;
14184     }
14185   }
14186 
14187 /* The above tests are run with and without JIT support. This means that
14188 PCRE2_JIT_INVALID_UTF propagates back into the regex options (ensuring
14189 interpreter support) even in the absence of JIT. But now, if there is no JIT
14190 support, give an error return. */
14191 
14192 #ifndef SUPPORT_JIT
14193 return PCRE2_ERROR_JIT_BADOPTION;
14194 #else  /* SUPPORT_JIT */
14195 
14196 /* There is JIT support. Do the necessary. */
14197 
14198 if ((re->flags & PCRE2_NOJIT) != 0) return 0;
14199 
14200 if (executable_allocator_is_working == 0)
14201   {
14202   /* Checks whether the executable allocator is working. This check
14203      might run multiple times in multi-threaded environments, but the
14204      result should not be affected by it. */
14205   void *ptr = SLJIT_MALLOC_EXEC(32, NULL);
14206 
14207   executable_allocator_is_working = -1;
14208 
14209   if (ptr != NULL)
14210     {
14211     SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr), NULL);
14212     executable_allocator_is_working = 1;
14213     }
14214   }
14215 
14216 if (executable_allocator_is_working < 0)
14217   return PCRE2_ERROR_NOMEMORY;
14218 
14219 if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0)
14220   options |= PCRE2_JIT_INVALID_UTF;
14221 
14222 if ((options & PCRE2_JIT_COMPLETE) != 0 && (functions == NULL
14223     || functions->executable_funcs[0] == NULL)) {
14224   uint32_t excluded_options = (PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD);
14225   int result = jit_compile(code, options & ~excluded_options);
14226   if (result != 0)
14227     return result;
14228   }
14229 
14230 if ((options & PCRE2_JIT_PARTIAL_SOFT) != 0 && (functions == NULL
14231     || functions->executable_funcs[1] == NULL)) {
14232   uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_HARD);
14233   int result = jit_compile(code, options & ~excluded_options);
14234   if (result != 0)
14235     return result;
14236   }
14237 
14238 if ((options & PCRE2_JIT_PARTIAL_HARD) != 0 && (functions == NULL
14239     || functions->executable_funcs[2] == NULL)) {
14240   uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT);
14241   int result = jit_compile(code, options & ~excluded_options);
14242   if (result != 0)
14243     return result;
14244   }
14245 
14246 return 0;
14247 
14248 #endif  /* SUPPORT_JIT */
14249 }
14250 
14251 /* JIT compiler uses an all-in-one approach. This improves security,
14252    since the code generator functions are not exported. */
14253 
14254 #define INCLUDED_FROM_PCRE2_JIT_COMPILE
14255 
14256 #include "pcre2_jit_match.c"
14257 #include "pcre2_jit_misc.c"
14258 
14259 /* End of pcre2_jit_compile.c */
14260