xref: /PHP-7.2/ext/pcre/pcrelib/pcre_jit_compile.c (revision 8a287c0e)
1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9            Copyright (c) 1997-2013 University of Cambridge
10 
11   The machine code generator part (this module) was written by Zoltan Herczeg
12                       Copyright (c) 2010-2013
13 
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17 
18     * Redistributions of source code must retain the above copyright notice,
19       this list of conditions and the following disclaimer.
20 
21     * Redistributions in binary form must reproduce the above copyright
22       notice, this list of conditions and the following disclaimer in the
23       documentation and/or other materials provided with the distribution.
24 
25     * Neither the name of the University of Cambridge nor the names of its
26       contributors may be used to endorse or promote products derived from
27       this software without specific prior written permission.
28 
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42 
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46 
47 #include "pcre_internal.h"
48 
49 #if defined SUPPORT_JIT
50 
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54 
55 #define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61 
62 #include "sljit/sljitLir.c"
63 
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67 
68 /* Defines for debugging purposes. */
69 
70 /* 1 - Use unoptimized capturing brackets.
71    2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73 
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76 
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80 
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84 
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89 
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92 
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98 
99   'ab' - 'a' and 'b' regexps are concatenated
100   'a+' - 'a' is the sub-expression of the '+' operator
101 
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107 
108  Greedy star operator (*) :
109    Matching path: match happens.
110    Backtrack path: match failed.
111  Non-greedy star operator (*?) :
112    Matching path: no need to perform a match.
113    Backtrack path: match is required.
114 
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118 
119    A(B|C)D
120 
121 The generated code will be the following:
122 
123  A matching path
124  '(' matching path (pushing arguments to the stack)
125  B matching path
126  ')' matching path (pushing arguments to the stack)
127  D matching path
128  return with successful match
129 
130  D backtrack path
131  ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132  B backtrack path
133  C expected path
134  jump to D matching path
135  C backtrack path
136  A backtrack path
137 
138  Notice, that the order of backtrack code paths are the opposite of the fast
139  code paths. In this way the topmost value on the stack is always belong
140  to the current backtrack code path. The backtrack path must check
141  whether there is a next alternative. If so, it needs to jump back to
142  the matching path eventually. Otherwise it needs to clear out its own stack
143  frame and continue the execution on the backtrack code paths.
144 */
145 
146 /*
147 Saved stack frames:
148 
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153 
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156 
157 Thus we can restore the private data to a particular point in the stack.
158 */
159 
160 typedef struct jit_arguments {
161   /* Pointers first. */
162   struct sljit_stack *stack;
163   const pcre_uchar *str;
164   const pcre_uchar *begin;
165   const pcre_uchar *end;
166   int *offsets;
167   pcre_uchar *uchar_ptr;
168   pcre_uchar *mark_ptr;
169   void *callout_data;
170   /* Everything else after. */
171   sljit_u32 limit_match;
172   int real_offset_count;
173   int offset_count;
174   sljit_u8 notbol;
175   sljit_u8 noteol;
176   sljit_u8 notempty;
177   sljit_u8 notempty_atstart;
178 } jit_arguments;
179 
180 typedef struct executable_functions {
181   void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182   void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
183   sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
184   PUBL(jit_callback) callback;
185   void *userdata;
186   sljit_u32 top_bracket;
187   sljit_u32 limit_match;
188 } executable_functions;
189 
190 typedef struct jump_list {
191   struct sljit_jump *jump;
192   struct jump_list *next;
193 } jump_list;
194 
195 typedef struct stub_list {
196   struct sljit_jump *start;
197   struct sljit_label *quit;
198   struct stub_list *next;
199 } stub_list;
200 
201 typedef struct label_addr_list {
202   struct sljit_label *label;
203   sljit_uw *update_addr;
204   struct label_addr_list *next;
205 } label_addr_list;
206 
207 enum frame_types {
208   no_frame = -1,
209   no_stack = -2
210 };
211 
212 enum control_types {
213   type_mark = 0,
214   type_then_trap = 1
215 };
216 
217 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
218 
219 /* The following structure is the key data type for the recursive
220 code generator. It is allocated by compile_matchingpath, and contains
221 the arguments for compile_backtrackingpath. Must be the first member
222 of its descendants. */
223 typedef struct backtrack_common {
224   /* Concatenation stack. */
225   struct backtrack_common *prev;
226   jump_list *nextbacktracks;
227   /* Internal stack (for component operators). */
228   struct backtrack_common *top;
229   jump_list *topbacktracks;
230   /* Opcode pointer. */
231   pcre_uchar *cc;
232 } backtrack_common;
233 
234 typedef struct assert_backtrack {
235   backtrack_common common;
236   jump_list *condfailed;
237   /* Less than 0 if a frame is not needed. */
238   int framesize;
239   /* Points to our private memory word on the stack. */
240   int private_data_ptr;
241   /* For iterators. */
242   struct sljit_label *matchingpath;
243 } assert_backtrack;
244 
245 typedef struct bracket_backtrack {
246   backtrack_common common;
247   /* Where to coninue if an alternative is successfully matched. */
248   struct sljit_label *alternative_matchingpath;
249   /* For rmin and rmax iterators. */
250   struct sljit_label *recursive_matchingpath;
251   /* For greedy ? operator. */
252   struct sljit_label *zero_matchingpath;
253   /* Contains the branches of a failed condition. */
254   union {
255     /* Both for OP_COND, OP_SCOND. */
256     jump_list *condfailed;
257     assert_backtrack *assert;
258     /* For OP_ONCE. Less than 0 if not needed. */
259     int framesize;
260   } u;
261   /* Points to our private memory word on the stack. */
262   int private_data_ptr;
263 } bracket_backtrack;
264 
265 typedef struct bracketpos_backtrack {
266   backtrack_common common;
267   /* Points to our private memory word on the stack. */
268   int private_data_ptr;
269   /* Reverting stack is needed. */
270   int framesize;
271   /* Allocated stack size. */
272   int stacksize;
273 } bracketpos_backtrack;
274 
275 typedef struct braminzero_backtrack {
276   backtrack_common common;
277   struct sljit_label *matchingpath;
278 } braminzero_backtrack;
279 
280 typedef struct char_iterator_backtrack {
281   backtrack_common common;
282   /* Next iteration. */
283   struct sljit_label *matchingpath;
284   union {
285     jump_list *backtracks;
286     struct {
287       unsigned int othercasebit;
288       pcre_uchar chr;
289       BOOL enabled;
290     } charpos;
291   } u;
292 } char_iterator_backtrack;
293 
294 typedef struct ref_iterator_backtrack {
295   backtrack_common common;
296   /* Next iteration. */
297   struct sljit_label *matchingpath;
298 } ref_iterator_backtrack;
299 
300 typedef struct recurse_entry {
301   struct recurse_entry *next;
302   /* Contains the function entry. */
303   struct sljit_label *entry;
304   /* Collects the calls until the function is not created. */
305   jump_list *calls;
306   /* Points to the starting opcode. */
307   sljit_sw start;
308 } recurse_entry;
309 
310 typedef struct recurse_backtrack {
311   backtrack_common common;
312   BOOL inlined_pattern;
313 } recurse_backtrack;
314 
315 #define OP_THEN_TRAP OP_TABLE_LENGTH
316 
317 typedef struct then_trap_backtrack {
318   backtrack_common common;
319   /* If then_trap is not NULL, this structure contains the real
320   then_trap for the backtracking path. */
321   struct then_trap_backtrack *then_trap;
322   /* Points to the starting opcode. */
323   sljit_sw start;
324   /* Exit point for the then opcodes of this alternative. */
325   jump_list *quit;
326   /* Frame size of the current alternative. */
327   int framesize;
328 } then_trap_backtrack;
329 
330 #define MAX_RANGE_SIZE 4
331 
332 typedef struct compiler_common {
333   /* The sljit ceneric compiler. */
334   struct sljit_compiler *compiler;
335   /* First byte code. */
336   pcre_uchar *start;
337   /* Maps private data offset to each opcode. */
338   sljit_s32 *private_data_ptrs;
339   /* Chain list of read-only data ptrs. */
340   void *read_only_data_head;
341   /* Tells whether the capturing bracket is optimized. */
342   sljit_u8 *optimized_cbracket;
343   /* Tells whether the starting offset is a target of then. */
344   sljit_u8 *then_offsets;
345   /* Current position where a THEN must jump. */
346   then_trap_backtrack *then_trap;
347   /* Starting offset of private data for capturing brackets. */
348   sljit_s32 cbra_ptr;
349   /* Output vector starting point. Must be divisible by 2. */
350   sljit_s32 ovector_start;
351   /* Points to the starting character of the current match. */
352   sljit_s32 start_ptr;
353   /* Last known position of the requested byte. */
354   sljit_s32 req_char_ptr;
355   /* Head of the last recursion. */
356   sljit_s32 recursive_head_ptr;
357   /* First inspected character for partial matching.
358      (Needed for avoiding zero length partial matches.) */
359   sljit_s32 start_used_ptr;
360   /* Starting pointer for partial soft matches. */
361   sljit_s32 hit_start;
362   /* Pointer of the match end position. */
363   sljit_s32 match_end_ptr;
364   /* Points to the marked string. */
365   sljit_s32 mark_ptr;
366   /* Recursive control verb management chain. */
367   sljit_s32 control_head_ptr;
368   /* Points to the last matched capture block index. */
369   sljit_s32 capture_last_ptr;
370   /* Fast forward skipping byte code pointer. */
371   pcre_uchar *fast_forward_bc_ptr;
372   /* Locals used by fast fail optimization. */
373   sljit_s32 fast_fail_start_ptr;
374   sljit_s32 fast_fail_end_ptr;
375 
376   /* Flipped and lower case tables. */
377   const sljit_u8 *fcc;
378   sljit_sw lcc;
379   /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
380   int mode;
381   /* TRUE, when minlength is greater than 0. */
382   BOOL might_be_empty;
383   /* \K is found in the pattern. */
384   BOOL has_set_som;
385   /* (*SKIP:arg) is found in the pattern. */
386   BOOL has_skip_arg;
387   /* (*THEN) is found in the pattern. */
388   BOOL has_then;
389   /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
390   BOOL has_skip_in_assert_back;
391   /* Currently in recurse or negative assert. */
392   BOOL local_exit;
393   /* Currently in a positive assert. */
394   BOOL positive_assert;
395   /* Newline control. */
396   int nltype;
397   sljit_u32 nlmax;
398   sljit_u32 nlmin;
399   int newline;
400   int bsr_nltype;
401   sljit_u32 bsr_nlmax;
402   sljit_u32 bsr_nlmin;
403   /* Dollar endonly. */
404   int endonly;
405   /* Tables. */
406   sljit_sw ctypes;
407   /* Named capturing brackets. */
408   pcre_uchar *name_table;
409   sljit_sw name_count;
410   sljit_sw name_entry_size;
411 
412   /* Labels and jump lists. */
413   struct sljit_label *partialmatchlabel;
414   struct sljit_label *quit_label;
415   struct sljit_label *forced_quit_label;
416   struct sljit_label *accept_label;
417   struct sljit_label *ff_newline_shortcut;
418   stub_list *stubs;
419   label_addr_list *label_addrs;
420   recurse_entry *entries;
421   recurse_entry *currententry;
422   jump_list *partialmatch;
423   jump_list *quit;
424   jump_list *positive_assert_quit;
425   jump_list *forced_quit;
426   jump_list *accept;
427   jump_list *calllimit;
428   jump_list *stackalloc;
429   jump_list *revertframes;
430   jump_list *wordboundary;
431   jump_list *anynewline;
432   jump_list *hspace;
433   jump_list *vspace;
434   jump_list *casefulcmp;
435   jump_list *caselesscmp;
436   jump_list *reset_match;
437   BOOL jscript_compat;
438 #ifdef SUPPORT_UTF
439   BOOL utf;
440 #ifdef SUPPORT_UCP
441   BOOL use_ucp;
442   jump_list *getucd;
443 #endif
444 #ifdef COMPILE_PCRE8
445   jump_list *utfreadchar;
446   jump_list *utfreadchar16;
447   jump_list *utfreadtype8;
448 #endif
449 #endif /* SUPPORT_UTF */
450 } compiler_common;
451 
452 /* For byte_sequence_compare. */
453 
454 typedef struct compare_context {
455   int length;
456   int sourcereg;
457 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
458   int ucharptr;
459   union {
460     sljit_s32 asint;
461     sljit_u16 asushort;
462 #if defined COMPILE_PCRE8
463     sljit_u8 asbyte;
464     sljit_u8 asuchars[4];
465 #elif defined COMPILE_PCRE16
466     sljit_u16 asuchars[2];
467 #elif defined COMPILE_PCRE32
468     sljit_u32 asuchars[1];
469 #endif
470   } c;
471   union {
472     sljit_s32 asint;
473     sljit_u16 asushort;
474 #if defined COMPILE_PCRE8
475     sljit_u8 asbyte;
476     sljit_u8 asuchars[4];
477 #elif defined COMPILE_PCRE16
478     sljit_u16 asuchars[2];
479 #elif defined COMPILE_PCRE32
480     sljit_u32 asuchars[1];
481 #endif
482   } oc;
483 #endif
484 } compare_context;
485 
486 /* Undefine sljit macros. */
487 #undef CMP
488 
489 /* Used for accessing the elements of the stack. */
490 #define STACK(i)      ((i) * (int)sizeof(sljit_sw))
491 
492 #define TMP1          SLJIT_R0
493 #define TMP2          SLJIT_R2
494 #define TMP3          SLJIT_R3
495 #define STR_PTR       SLJIT_S0
496 #define STR_END       SLJIT_S1
497 #define STACK_TOP     SLJIT_R1
498 #define STACK_LIMIT   SLJIT_S2
499 #define COUNT_MATCH   SLJIT_S3
500 #define ARGUMENTS     SLJIT_S4
501 #define RETURN_ADDR   SLJIT_R4
502 
503 /* Local space layout. */
504 /* These two locals can be used by the current opcode. */
505 #define LOCALS0          (0 * sizeof(sljit_sw))
506 #define LOCALS1          (1 * sizeof(sljit_sw))
507 /* Two local variables for possessive quantifiers (char1 cannot use them). */
508 #define POSSESSIVE0      (2 * sizeof(sljit_sw))
509 #define POSSESSIVE1      (3 * sizeof(sljit_sw))
510 /* Max limit of recursions. */
511 #define LIMIT_MATCH      (4 * sizeof(sljit_sw))
512 /* The output vector is stored on the stack, and contains pointers
513 to characters. The vector data is divided into two groups: the first
514 group contains the start / end character pointers, and the second is
515 the start pointers when the end of the capturing group has not yet reached. */
516 #define OVECTOR_START    (common->ovector_start)
517 #define OVECTOR(i)       (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
518 #define OVECTOR_PRIV(i)  (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
519 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
520 
521 #if defined COMPILE_PCRE8
522 #define MOV_UCHAR  SLJIT_MOV_U8
523 #define MOVU_UCHAR SLJIT_MOVU_U8
524 #elif defined COMPILE_PCRE16
525 #define MOV_UCHAR  SLJIT_MOV_U16
526 #define MOVU_UCHAR SLJIT_MOVU_U16
527 #elif defined COMPILE_PCRE32
528 #define MOV_UCHAR  SLJIT_MOV_U32
529 #define MOVU_UCHAR SLJIT_MOVU_U32
530 #else
531 #error Unsupported compiling mode
532 #endif
533 
534 /* Shortcuts. */
535 #define DEFINE_COMPILER \
536   struct sljit_compiler *compiler = common->compiler
537 #define OP1(op, dst, dstw, src, srcw) \
538   sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
539 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
540   sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
541 #define LABEL() \
542   sljit_emit_label(compiler)
543 #define JUMP(type) \
544   sljit_emit_jump(compiler, (type))
545 #define JUMPTO(type, label) \
546   sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
547 #define JUMPHERE(jump) \
548   sljit_set_label((jump), sljit_emit_label(compiler))
549 #define SET_LABEL(jump, label) \
550   sljit_set_label((jump), (label))
551 #define CMP(type, src1, src1w, src2, src2w) \
552   sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
553 #define CMPTO(type, src1, src1w, src2, src2w, label) \
554   sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
555 #define OP_FLAGS(op, dst, dstw, type) \
556   sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
557 #define GET_LOCAL_BASE(dst, dstw, offset) \
558   sljit_get_local_base(compiler, (dst), (dstw), (offset))
559 
560 #define READ_CHAR_MAX 0x7fffffff
561 
562 #define INVALID_UTF_CHAR 888
563 
bracketend(pcre_uchar * cc)564 static pcre_uchar *bracketend(pcre_uchar *cc)
565 {
566 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
567 do cc += GET(cc, 1); while (*cc == OP_ALT);
568 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
569 cc += 1 + LINK_SIZE;
570 return cc;
571 }
572 
no_alternatives(pcre_uchar * cc)573 static int no_alternatives(pcre_uchar *cc)
574 {
575 int count = 0;
576 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
577 do
578   {
579   cc += GET(cc, 1);
580   count++;
581   }
582 while (*cc == OP_ALT);
583 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
584 return count;
585 }
586 
587 /* Functions whose might need modification for all new supported opcodes:
588  next_opcode
589  check_opcode_types
590  set_private_data_ptrs
591  get_framesize
592  init_frame
593  get_private_data_copy_length
594  copy_private_data
595  compile_matchingpath
596  compile_backtrackingpath
597 */
598 
next_opcode(compiler_common * common,pcre_uchar * cc)599 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
600 {
601 SLJIT_UNUSED_ARG(common);
602 switch(*cc)
603   {
604   case OP_SOD:
605   case OP_SOM:
606   case OP_SET_SOM:
607   case OP_NOT_WORD_BOUNDARY:
608   case OP_WORD_BOUNDARY:
609   case OP_NOT_DIGIT:
610   case OP_DIGIT:
611   case OP_NOT_WHITESPACE:
612   case OP_WHITESPACE:
613   case OP_NOT_WORDCHAR:
614   case OP_WORDCHAR:
615   case OP_ANY:
616   case OP_ALLANY:
617   case OP_NOTPROP:
618   case OP_PROP:
619   case OP_ANYNL:
620   case OP_NOT_HSPACE:
621   case OP_HSPACE:
622   case OP_NOT_VSPACE:
623   case OP_VSPACE:
624   case OP_EXTUNI:
625   case OP_EODN:
626   case OP_EOD:
627   case OP_CIRC:
628   case OP_CIRCM:
629   case OP_DOLL:
630   case OP_DOLLM:
631   case OP_CRSTAR:
632   case OP_CRMINSTAR:
633   case OP_CRPLUS:
634   case OP_CRMINPLUS:
635   case OP_CRQUERY:
636   case OP_CRMINQUERY:
637   case OP_CRRANGE:
638   case OP_CRMINRANGE:
639   case OP_CRPOSSTAR:
640   case OP_CRPOSPLUS:
641   case OP_CRPOSQUERY:
642   case OP_CRPOSRANGE:
643   case OP_CLASS:
644   case OP_NCLASS:
645   case OP_REF:
646   case OP_REFI:
647   case OP_DNREF:
648   case OP_DNREFI:
649   case OP_RECURSE:
650   case OP_CALLOUT:
651   case OP_ALT:
652   case OP_KET:
653   case OP_KETRMAX:
654   case OP_KETRMIN:
655   case OP_KETRPOS:
656   case OP_REVERSE:
657   case OP_ASSERT:
658   case OP_ASSERT_NOT:
659   case OP_ASSERTBACK:
660   case OP_ASSERTBACK_NOT:
661   case OP_ONCE:
662   case OP_ONCE_NC:
663   case OP_BRA:
664   case OP_BRAPOS:
665   case OP_CBRA:
666   case OP_CBRAPOS:
667   case OP_COND:
668   case OP_SBRA:
669   case OP_SBRAPOS:
670   case OP_SCBRA:
671   case OP_SCBRAPOS:
672   case OP_SCOND:
673   case OP_CREF:
674   case OP_DNCREF:
675   case OP_RREF:
676   case OP_DNRREF:
677   case OP_DEF:
678   case OP_BRAZERO:
679   case OP_BRAMINZERO:
680   case OP_BRAPOSZERO:
681   case OP_PRUNE:
682   case OP_SKIP:
683   case OP_THEN:
684   case OP_COMMIT:
685   case OP_FAIL:
686   case OP_ACCEPT:
687   case OP_ASSERT_ACCEPT:
688   case OP_CLOSE:
689   case OP_SKIPZERO:
690   return cc + PRIV(OP_lengths)[*cc];
691 
692   case OP_CHAR:
693   case OP_CHARI:
694   case OP_NOT:
695   case OP_NOTI:
696   case OP_STAR:
697   case OP_MINSTAR:
698   case OP_PLUS:
699   case OP_MINPLUS:
700   case OP_QUERY:
701   case OP_MINQUERY:
702   case OP_UPTO:
703   case OP_MINUPTO:
704   case OP_EXACT:
705   case OP_POSSTAR:
706   case OP_POSPLUS:
707   case OP_POSQUERY:
708   case OP_POSUPTO:
709   case OP_STARI:
710   case OP_MINSTARI:
711   case OP_PLUSI:
712   case OP_MINPLUSI:
713   case OP_QUERYI:
714   case OP_MINQUERYI:
715   case OP_UPTOI:
716   case OP_MINUPTOI:
717   case OP_EXACTI:
718   case OP_POSSTARI:
719   case OP_POSPLUSI:
720   case OP_POSQUERYI:
721   case OP_POSUPTOI:
722   case OP_NOTSTAR:
723   case OP_NOTMINSTAR:
724   case OP_NOTPLUS:
725   case OP_NOTMINPLUS:
726   case OP_NOTQUERY:
727   case OP_NOTMINQUERY:
728   case OP_NOTUPTO:
729   case OP_NOTMINUPTO:
730   case OP_NOTEXACT:
731   case OP_NOTPOSSTAR:
732   case OP_NOTPOSPLUS:
733   case OP_NOTPOSQUERY:
734   case OP_NOTPOSUPTO:
735   case OP_NOTSTARI:
736   case OP_NOTMINSTARI:
737   case OP_NOTPLUSI:
738   case OP_NOTMINPLUSI:
739   case OP_NOTQUERYI:
740   case OP_NOTMINQUERYI:
741   case OP_NOTUPTOI:
742   case OP_NOTMINUPTOI:
743   case OP_NOTEXACTI:
744   case OP_NOTPOSSTARI:
745   case OP_NOTPOSPLUSI:
746   case OP_NOTPOSQUERYI:
747   case OP_NOTPOSUPTOI:
748   cc += PRIV(OP_lengths)[*cc];
749 #ifdef SUPPORT_UTF
750   if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
751 #endif
752   return cc;
753 
754   /* Special cases. */
755   case OP_TYPESTAR:
756   case OP_TYPEMINSTAR:
757   case OP_TYPEPLUS:
758   case OP_TYPEMINPLUS:
759   case OP_TYPEQUERY:
760   case OP_TYPEMINQUERY:
761   case OP_TYPEUPTO:
762   case OP_TYPEMINUPTO:
763   case OP_TYPEEXACT:
764   case OP_TYPEPOSSTAR:
765   case OP_TYPEPOSPLUS:
766   case OP_TYPEPOSQUERY:
767   case OP_TYPEPOSUPTO:
768   return cc + PRIV(OP_lengths)[*cc] - 1;
769 
770   case OP_ANYBYTE:
771 #ifdef SUPPORT_UTF
772   if (common->utf) return NULL;
773 #endif
774   return cc + 1;
775 
776 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
777   case OP_XCLASS:
778   return cc + GET(cc, 1);
779 #endif
780 
781   case OP_MARK:
782   case OP_PRUNE_ARG:
783   case OP_SKIP_ARG:
784   case OP_THEN_ARG:
785   return cc + 1 + 2 + cc[1];
786 
787   default:
788   /* All opcodes are supported now! */
789   SLJIT_UNREACHABLE();
790   return NULL;
791   }
792 }
793 
check_opcode_types(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend)794 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
795 {
796 int count;
797 pcre_uchar *slot;
798 pcre_uchar *assert_back_end = cc - 1;
799 
800 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
801 while (cc < ccend)
802   {
803   switch(*cc)
804     {
805     case OP_SET_SOM:
806     common->has_set_som = TRUE;
807     common->might_be_empty = TRUE;
808     cc += 1;
809     break;
810 
811     case OP_REF:
812     case OP_REFI:
813     common->optimized_cbracket[GET2(cc, 1)] = 0;
814     cc += 1 + IMM2_SIZE;
815     break;
816 
817     case OP_CBRAPOS:
818     case OP_SCBRAPOS:
819     common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
820     cc += 1 + LINK_SIZE + IMM2_SIZE;
821     break;
822 
823     case OP_COND:
824     case OP_SCOND:
825     /* Only AUTO_CALLOUT can insert this opcode. We do
826        not intend to support this case. */
827     if (cc[1 + LINK_SIZE] == OP_CALLOUT)
828       return FALSE;
829     cc += 1 + LINK_SIZE;
830     break;
831 
832     case OP_CREF:
833     common->optimized_cbracket[GET2(cc, 1)] = 0;
834     cc += 1 + IMM2_SIZE;
835     break;
836 
837     case OP_DNREF:
838     case OP_DNREFI:
839     case OP_DNCREF:
840     count = GET2(cc, 1 + IMM2_SIZE);
841     slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
842     while (count-- > 0)
843       {
844       common->optimized_cbracket[GET2(slot, 0)] = 0;
845       slot += common->name_entry_size;
846       }
847     cc += 1 + 2 * IMM2_SIZE;
848     break;
849 
850     case OP_RECURSE:
851     /* Set its value only once. */
852     if (common->recursive_head_ptr == 0)
853       {
854       common->recursive_head_ptr = common->ovector_start;
855       common->ovector_start += sizeof(sljit_sw);
856       }
857     cc += 1 + LINK_SIZE;
858     break;
859 
860     case OP_CALLOUT:
861     if (common->capture_last_ptr == 0)
862       {
863       common->capture_last_ptr = common->ovector_start;
864       common->ovector_start += sizeof(sljit_sw);
865       }
866     cc += 2 + 2 * LINK_SIZE;
867     break;
868 
869     case OP_ASSERTBACK:
870     slot = bracketend(cc);
871     if (slot > assert_back_end)
872       assert_back_end = slot;
873     cc += 1 + LINK_SIZE;
874     break;
875 
876     case OP_THEN_ARG:
877     common->has_then = TRUE;
878     common->control_head_ptr = 1;
879     /* Fall through. */
880 
881     case OP_PRUNE_ARG:
882     case OP_MARK:
883     if (common->mark_ptr == 0)
884       {
885       common->mark_ptr = common->ovector_start;
886       common->ovector_start += sizeof(sljit_sw);
887       }
888     cc += 1 + 2 + cc[1];
889     break;
890 
891     case OP_THEN:
892     common->has_then = TRUE;
893     common->control_head_ptr = 1;
894     cc += 1;
895     break;
896 
897     case OP_SKIP:
898     if (cc < assert_back_end)
899       common->has_skip_in_assert_back = TRUE;
900     cc += 1;
901     break;
902 
903     case OP_SKIP_ARG:
904     common->control_head_ptr = 1;
905     common->has_skip_arg = TRUE;
906     if (cc < assert_back_end)
907       common->has_skip_in_assert_back = TRUE;
908     cc += 1 + 2 + cc[1];
909     break;
910 
911     default:
912     cc = next_opcode(common, cc);
913     if (cc == NULL)
914       return FALSE;
915     break;
916     }
917   }
918 return TRUE;
919 }
920 
is_accelerated_repeat(pcre_uchar * cc)921 static BOOL is_accelerated_repeat(pcre_uchar *cc)
922 {
923 switch(*cc)
924   {
925   case OP_TYPESTAR:
926   case OP_TYPEMINSTAR:
927   case OP_TYPEPLUS:
928   case OP_TYPEMINPLUS:
929   case OP_TYPEPOSSTAR:
930   case OP_TYPEPOSPLUS:
931   return (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI);
932 
933   case OP_STAR:
934   case OP_MINSTAR:
935   case OP_PLUS:
936   case OP_MINPLUS:
937   case OP_POSSTAR:
938   case OP_POSPLUS:
939 
940   case OP_STARI:
941   case OP_MINSTARI:
942   case OP_PLUSI:
943   case OP_MINPLUSI:
944   case OP_POSSTARI:
945   case OP_POSPLUSI:
946 
947   case OP_NOTSTAR:
948   case OP_NOTMINSTAR:
949   case OP_NOTPLUS:
950   case OP_NOTMINPLUS:
951   case OP_NOTPOSSTAR:
952   case OP_NOTPOSPLUS:
953 
954   case OP_NOTSTARI:
955   case OP_NOTMINSTARI:
956   case OP_NOTPLUSI:
957   case OP_NOTMINPLUSI:
958   case OP_NOTPOSSTARI:
959   case OP_NOTPOSPLUSI:
960   return TRUE;
961 
962   case OP_CLASS:
963   case OP_NCLASS:
964 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
965   case OP_XCLASS:
966   cc += (*cc == OP_XCLASS) ? GET(cc, 1) : (int)(1 + (32 / sizeof(pcre_uchar)));
967 #else
968   cc += (1 + (32 / sizeof(pcre_uchar)));
969 #endif
970 
971   switch(*cc)
972     {
973     case OP_CRSTAR:
974     case OP_CRMINSTAR:
975     case OP_CRPLUS:
976     case OP_CRMINPLUS:
977     case OP_CRPOSSTAR:
978     case OP_CRPOSPLUS:
979     return TRUE;
980     }
981   break;
982   }
983 return FALSE;
984 }
985 
detect_fast_forward_skip(compiler_common * common,int * private_data_start)986 static SLJIT_INLINE BOOL detect_fast_forward_skip(compiler_common *common, int *private_data_start)
987 {
988 pcre_uchar *cc = common->start;
989 pcre_uchar *end;
990 
991 /* Skip not repeated brackets. */
992 while (TRUE)
993   {
994   switch(*cc)
995     {
996     case OP_SOD:
997     case OP_SOM:
998     case OP_SET_SOM:
999     case OP_NOT_WORD_BOUNDARY:
1000     case OP_WORD_BOUNDARY:
1001     case OP_EODN:
1002     case OP_EOD:
1003     case OP_CIRC:
1004     case OP_CIRCM:
1005     case OP_DOLL:
1006     case OP_DOLLM:
1007     /* Zero width assertions. */
1008     cc++;
1009     continue;
1010     }
1011 
1012   if (*cc != OP_BRA && *cc != OP_CBRA)
1013     break;
1014 
1015   end = cc + GET(cc, 1);
1016   if (*end != OP_KET || PRIVATE_DATA(end) != 0)
1017     return FALSE;
1018   if (*cc == OP_CBRA)
1019     {
1020     if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1021       return FALSE;
1022     cc += IMM2_SIZE;
1023     }
1024   cc += 1 + LINK_SIZE;
1025   }
1026 
1027 if (is_accelerated_repeat(cc))
1028   {
1029   common->fast_forward_bc_ptr = cc;
1030   common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
1031   *private_data_start += sizeof(sljit_sw);
1032   return TRUE;
1033   }
1034 return FALSE;
1035 }
1036 
detect_fast_fail(compiler_common * common,pcre_uchar * cc,int * private_data_start,sljit_s32 depth)1037 static SLJIT_INLINE void detect_fast_fail(compiler_common *common, pcre_uchar *cc, int *private_data_start, sljit_s32 depth)
1038 {
1039   pcre_uchar *next_alt;
1040 
1041   SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA);
1042 
1043   if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1044     return;
1045 
1046   next_alt = bracketend(cc) - (1 + LINK_SIZE);
1047   if (*next_alt != OP_KET || PRIVATE_DATA(next_alt) != 0)
1048     return;
1049 
1050   do
1051     {
1052     next_alt = cc + GET(cc, 1);
1053 
1054     cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1055 
1056     while (TRUE)
1057       {
1058       switch(*cc)
1059         {
1060         case OP_SOD:
1061         case OP_SOM:
1062         case OP_SET_SOM:
1063         case OP_NOT_WORD_BOUNDARY:
1064         case OP_WORD_BOUNDARY:
1065         case OP_EODN:
1066         case OP_EOD:
1067         case OP_CIRC:
1068         case OP_CIRCM:
1069         case OP_DOLL:
1070         case OP_DOLLM:
1071         /* Zero width assertions. */
1072         cc++;
1073         continue;
1074         }
1075       break;
1076       }
1077 
1078     if (depth > 0 && (*cc == OP_BRA || *cc == OP_CBRA))
1079       detect_fast_fail(common, cc, private_data_start, depth - 1);
1080 
1081     if (is_accelerated_repeat(cc))
1082       {
1083       common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
1084 
1085       if (common->fast_fail_start_ptr == 0)
1086         common->fast_fail_start_ptr = *private_data_start;
1087 
1088       *private_data_start += sizeof(sljit_sw);
1089       common->fast_fail_end_ptr = *private_data_start;
1090 
1091       if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1092         return;
1093       }
1094 
1095     cc = next_alt;
1096     }
1097   while (*cc == OP_ALT);
1098 }
1099 
get_class_iterator_size(pcre_uchar * cc)1100 static int get_class_iterator_size(pcre_uchar *cc)
1101 {
1102 sljit_u32 min;
1103 sljit_u32 max;
1104 switch(*cc)
1105   {
1106   case OP_CRSTAR:
1107   case OP_CRPLUS:
1108   return 2;
1109 
1110   case OP_CRMINSTAR:
1111   case OP_CRMINPLUS:
1112   case OP_CRQUERY:
1113   case OP_CRMINQUERY:
1114   return 1;
1115 
1116   case OP_CRRANGE:
1117   case OP_CRMINRANGE:
1118   min = GET2(cc, 1);
1119   max = GET2(cc, 1 + IMM2_SIZE);
1120   if (max == 0)
1121     return (*cc == OP_CRRANGE) ? 2 : 1;
1122   max -= min;
1123   if (max > 2)
1124     max = 2;
1125   return max;
1126 
1127   default:
1128   return 0;
1129   }
1130 }
1131 
detect_repeat(compiler_common * common,pcre_uchar * begin)1132 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
1133 {
1134 pcre_uchar *end = bracketend(begin);
1135 pcre_uchar *next;
1136 pcre_uchar *next_end;
1137 pcre_uchar *max_end;
1138 pcre_uchar type;
1139 sljit_sw length = end - begin;
1140 int min, max, i;
1141 
1142 /* Detect fixed iterations first. */
1143 if (end[-(1 + LINK_SIZE)] != OP_KET)
1144   return FALSE;
1145 
1146 /* Already detected repeat. */
1147 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
1148   return TRUE;
1149 
1150 next = end;
1151 min = 1;
1152 while (1)
1153   {
1154   if (*next != *begin)
1155     break;
1156   next_end = bracketend(next);
1157   if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1158     break;
1159   next = next_end;
1160   min++;
1161   }
1162 
1163 if (min == 2)
1164   return FALSE;
1165 
1166 max = 0;
1167 max_end = next;
1168 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1169   {
1170   type = *next;
1171   while (1)
1172     {
1173     if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1174       break;
1175     next_end = bracketend(next + 2 + LINK_SIZE);
1176     if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1177       break;
1178     next = next_end;
1179     max++;
1180     }
1181 
1182   if (next[0] == type && next[1] == *begin && max >= 1)
1183     {
1184     next_end = bracketend(next + 1);
1185     if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1186       {
1187       for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1188         if (*next_end != OP_KET)
1189           break;
1190 
1191       if (i == max)
1192         {
1193         common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1194         common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1195         /* +2 the original and the last. */
1196         common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1197         if (min == 1)
1198           return TRUE;
1199         min--;
1200         max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1201         }
1202       }
1203     }
1204   }
1205 
1206 if (min >= 3)
1207   {
1208   common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1209   common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1210   common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1211   return TRUE;
1212   }
1213 
1214 return FALSE;
1215 }
1216 
1217 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1218     case OP_MINSTAR: \
1219     case OP_MINPLUS: \
1220     case OP_QUERY: \
1221     case OP_MINQUERY: \
1222     case OP_MINSTARI: \
1223     case OP_MINPLUSI: \
1224     case OP_QUERYI: \
1225     case OP_MINQUERYI: \
1226     case OP_NOTMINSTAR: \
1227     case OP_NOTMINPLUS: \
1228     case OP_NOTQUERY: \
1229     case OP_NOTMINQUERY: \
1230     case OP_NOTMINSTARI: \
1231     case OP_NOTMINPLUSI: \
1232     case OP_NOTQUERYI: \
1233     case OP_NOTMINQUERYI:
1234 
1235 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1236     case OP_STAR: \
1237     case OP_PLUS: \
1238     case OP_STARI: \
1239     case OP_PLUSI: \
1240     case OP_NOTSTAR: \
1241     case OP_NOTPLUS: \
1242     case OP_NOTSTARI: \
1243     case OP_NOTPLUSI:
1244 
1245 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1246     case OP_UPTO: \
1247     case OP_MINUPTO: \
1248     case OP_UPTOI: \
1249     case OP_MINUPTOI: \
1250     case OP_NOTUPTO: \
1251     case OP_NOTMINUPTO: \
1252     case OP_NOTUPTOI: \
1253     case OP_NOTMINUPTOI:
1254 
1255 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1256     case OP_TYPEMINSTAR: \
1257     case OP_TYPEMINPLUS: \
1258     case OP_TYPEQUERY: \
1259     case OP_TYPEMINQUERY:
1260 
1261 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1262     case OP_TYPESTAR: \
1263     case OP_TYPEPLUS:
1264 
1265 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1266     case OP_TYPEUPTO: \
1267     case OP_TYPEMINUPTO:
1268 
set_private_data_ptrs(compiler_common * common,int * private_data_start,pcre_uchar * ccend)1269 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1270 {
1271 pcre_uchar *cc = common->start;
1272 pcre_uchar *alternative;
1273 pcre_uchar *end = NULL;
1274 int private_data_ptr = *private_data_start;
1275 int space, size, bracketlen;
1276 BOOL repeat_check = TRUE;
1277 
1278 while (cc < ccend)
1279   {
1280   space = 0;
1281   size = 0;
1282   bracketlen = 0;
1283   if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1284     break;
1285 
1286   if (repeat_check && (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1287     {
1288     if (detect_repeat(common, cc))
1289       {
1290       /* These brackets are converted to repeats, so no global
1291       based single character repeat is allowed. */
1292       if (cc >= end)
1293         end = bracketend(cc);
1294       }
1295     }
1296   repeat_check = TRUE;
1297 
1298   switch(*cc)
1299     {
1300     case OP_KET:
1301     if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1302       {
1303       common->private_data_ptrs[cc - common->start] = private_data_ptr;
1304       private_data_ptr += sizeof(sljit_sw);
1305       cc += common->private_data_ptrs[cc + 1 - common->start];
1306       }
1307     cc += 1 + LINK_SIZE;
1308     break;
1309 
1310     case OP_ASSERT:
1311     case OP_ASSERT_NOT:
1312     case OP_ASSERTBACK:
1313     case OP_ASSERTBACK_NOT:
1314     case OP_ONCE:
1315     case OP_ONCE_NC:
1316     case OP_BRAPOS:
1317     case OP_SBRA:
1318     case OP_SBRAPOS:
1319     case OP_SCOND:
1320     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1321     private_data_ptr += sizeof(sljit_sw);
1322     bracketlen = 1 + LINK_SIZE;
1323     break;
1324 
1325     case OP_CBRAPOS:
1326     case OP_SCBRAPOS:
1327     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1328     private_data_ptr += sizeof(sljit_sw);
1329     bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1330     break;
1331 
1332     case OP_COND:
1333     /* Might be a hidden SCOND. */
1334     alternative = cc + GET(cc, 1);
1335     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1336       {
1337       common->private_data_ptrs[cc - common->start] = private_data_ptr;
1338       private_data_ptr += sizeof(sljit_sw);
1339       }
1340     bracketlen = 1 + LINK_SIZE;
1341     break;
1342 
1343     case OP_BRA:
1344     bracketlen = 1 + LINK_SIZE;
1345     break;
1346 
1347     case OP_CBRA:
1348     case OP_SCBRA:
1349     bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1350     break;
1351 
1352     case OP_BRAZERO:
1353     case OP_BRAMINZERO:
1354     case OP_BRAPOSZERO:
1355     repeat_check = FALSE;
1356     size = 1;
1357     break;
1358 
1359     CASE_ITERATOR_PRIVATE_DATA_1
1360     space = 1;
1361     size = -2;
1362     break;
1363 
1364     CASE_ITERATOR_PRIVATE_DATA_2A
1365     space = 2;
1366     size = -2;
1367     break;
1368 
1369     CASE_ITERATOR_PRIVATE_DATA_2B
1370     space = 2;
1371     size = -(2 + IMM2_SIZE);
1372     break;
1373 
1374     CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1375     space = 1;
1376     size = 1;
1377     break;
1378 
1379     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1380     if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1381       space = 2;
1382     size = 1;
1383     break;
1384 
1385     case OP_TYPEUPTO:
1386     if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1387       space = 2;
1388     size = 1 + IMM2_SIZE;
1389     break;
1390 
1391     case OP_TYPEMINUPTO:
1392     space = 2;
1393     size = 1 + IMM2_SIZE;
1394     break;
1395 
1396     case OP_CLASS:
1397     case OP_NCLASS:
1398     space = get_class_iterator_size(cc + size);
1399     size = 1 + 32 / sizeof(pcre_uchar);
1400     break;
1401 
1402 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1403     case OP_XCLASS:
1404     space = get_class_iterator_size(cc + size);
1405     size = GET(cc, 1);
1406     break;
1407 #endif
1408 
1409     default:
1410     cc = next_opcode(common, cc);
1411     SLJIT_ASSERT(cc != NULL);
1412     break;
1413     }
1414 
1415   /* Character iterators, which are not inside a repeated bracket,
1416      gets a private slot instead of allocating it on the stack. */
1417   if (space > 0 && cc >= end)
1418     {
1419     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1420     private_data_ptr += sizeof(sljit_sw) * space;
1421     }
1422 
1423   if (size != 0)
1424     {
1425     if (size < 0)
1426       {
1427       cc += -size;
1428 #ifdef SUPPORT_UTF
1429       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1430 #endif
1431       }
1432     else
1433       cc += size;
1434     }
1435 
1436   if (bracketlen > 0)
1437     {
1438     if (cc >= end)
1439       {
1440       end = bracketend(cc);
1441       if (end[-1 - LINK_SIZE] == OP_KET)
1442         end = NULL;
1443       }
1444     cc += bracketlen;
1445     }
1446   }
1447 *private_data_start = private_data_ptr;
1448 }
1449 
1450 /* Returns with a frame_types (always < 0) if no need for frame. */
get_framesize(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,BOOL recursive,BOOL * needs_control_head)1451 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL *needs_control_head)
1452 {
1453 int length = 0;
1454 int possessive = 0;
1455 BOOL stack_restore = FALSE;
1456 BOOL setsom_found = recursive;
1457 BOOL setmark_found = recursive;
1458 /* The last capture is a local variable even for recursions. */
1459 BOOL capture_last_found = FALSE;
1460 
1461 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1462 SLJIT_ASSERT(common->control_head_ptr != 0);
1463 *needs_control_head = TRUE;
1464 #else
1465 *needs_control_head = FALSE;
1466 #endif
1467 
1468 if (ccend == NULL)
1469   {
1470   ccend = bracketend(cc) - (1 + LINK_SIZE);
1471   if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1472     {
1473     possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1474     /* This is correct regardless of common->capture_last_ptr. */
1475     capture_last_found = TRUE;
1476     }
1477   cc = next_opcode(common, cc);
1478   }
1479 
1480 SLJIT_ASSERT(cc != NULL);
1481 while (cc < ccend)
1482   switch(*cc)
1483     {
1484     case OP_SET_SOM:
1485     SLJIT_ASSERT(common->has_set_som);
1486     stack_restore = TRUE;
1487     if (!setsom_found)
1488       {
1489       length += 2;
1490       setsom_found = TRUE;
1491       }
1492     cc += 1;
1493     break;
1494 
1495     case OP_MARK:
1496     case OP_PRUNE_ARG:
1497     case OP_THEN_ARG:
1498     SLJIT_ASSERT(common->mark_ptr != 0);
1499     stack_restore = TRUE;
1500     if (!setmark_found)
1501       {
1502       length += 2;
1503       setmark_found = TRUE;
1504       }
1505     if (common->control_head_ptr != 0)
1506       *needs_control_head = TRUE;
1507     cc += 1 + 2 + cc[1];
1508     break;
1509 
1510     case OP_RECURSE:
1511     stack_restore = TRUE;
1512     if (common->has_set_som && !setsom_found)
1513       {
1514       length += 2;
1515       setsom_found = TRUE;
1516       }
1517     if (common->mark_ptr != 0 && !setmark_found)
1518       {
1519       length += 2;
1520       setmark_found = TRUE;
1521       }
1522     if (common->capture_last_ptr != 0 && !capture_last_found)
1523       {
1524       length += 2;
1525       capture_last_found = TRUE;
1526       }
1527     cc += 1 + LINK_SIZE;
1528     break;
1529 
1530     case OP_CBRA:
1531     case OP_CBRAPOS:
1532     case OP_SCBRA:
1533     case OP_SCBRAPOS:
1534     stack_restore = TRUE;
1535     if (common->capture_last_ptr != 0 && !capture_last_found)
1536       {
1537       length += 2;
1538       capture_last_found = TRUE;
1539       }
1540     length += 3;
1541     cc += 1 + LINK_SIZE + IMM2_SIZE;
1542     break;
1543 
1544     case OP_THEN:
1545     stack_restore = TRUE;
1546     if (common->control_head_ptr != 0)
1547       *needs_control_head = TRUE;
1548     cc ++;
1549     break;
1550 
1551     default:
1552     stack_restore = TRUE;
1553     /* Fall through. */
1554 
1555     case OP_NOT_WORD_BOUNDARY:
1556     case OP_WORD_BOUNDARY:
1557     case OP_NOT_DIGIT:
1558     case OP_DIGIT:
1559     case OP_NOT_WHITESPACE:
1560     case OP_WHITESPACE:
1561     case OP_NOT_WORDCHAR:
1562     case OP_WORDCHAR:
1563     case OP_ANY:
1564     case OP_ALLANY:
1565     case OP_ANYBYTE:
1566     case OP_NOTPROP:
1567     case OP_PROP:
1568     case OP_ANYNL:
1569     case OP_NOT_HSPACE:
1570     case OP_HSPACE:
1571     case OP_NOT_VSPACE:
1572     case OP_VSPACE:
1573     case OP_EXTUNI:
1574     case OP_EODN:
1575     case OP_EOD:
1576     case OP_CIRC:
1577     case OP_CIRCM:
1578     case OP_DOLL:
1579     case OP_DOLLM:
1580     case OP_CHAR:
1581     case OP_CHARI:
1582     case OP_NOT:
1583     case OP_NOTI:
1584 
1585     case OP_EXACT:
1586     case OP_POSSTAR:
1587     case OP_POSPLUS:
1588     case OP_POSQUERY:
1589     case OP_POSUPTO:
1590 
1591     case OP_EXACTI:
1592     case OP_POSSTARI:
1593     case OP_POSPLUSI:
1594     case OP_POSQUERYI:
1595     case OP_POSUPTOI:
1596 
1597     case OP_NOTEXACT:
1598     case OP_NOTPOSSTAR:
1599     case OP_NOTPOSPLUS:
1600     case OP_NOTPOSQUERY:
1601     case OP_NOTPOSUPTO:
1602 
1603     case OP_NOTEXACTI:
1604     case OP_NOTPOSSTARI:
1605     case OP_NOTPOSPLUSI:
1606     case OP_NOTPOSQUERYI:
1607     case OP_NOTPOSUPTOI:
1608 
1609     case OP_TYPEEXACT:
1610     case OP_TYPEPOSSTAR:
1611     case OP_TYPEPOSPLUS:
1612     case OP_TYPEPOSQUERY:
1613     case OP_TYPEPOSUPTO:
1614 
1615     case OP_CLASS:
1616     case OP_NCLASS:
1617     case OP_XCLASS:
1618     case OP_CALLOUT:
1619 
1620     cc = next_opcode(common, cc);
1621     SLJIT_ASSERT(cc != NULL);
1622     break;
1623     }
1624 
1625 /* Possessive quantifiers can use a special case. */
1626 if (SLJIT_UNLIKELY(possessive == length))
1627   return stack_restore ? no_frame : no_stack;
1628 
1629 if (length > 0)
1630   return length + 1;
1631 return stack_restore ? no_frame : no_stack;
1632 }
1633 
init_frame(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,int stackpos,int stacktop,BOOL recursive)1634 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1635 {
1636 DEFINE_COMPILER;
1637 BOOL setsom_found = recursive;
1638 BOOL setmark_found = recursive;
1639 /* The last capture is a local variable even for recursions. */
1640 BOOL capture_last_found = FALSE;
1641 int offset;
1642 
1643 /* >= 1 + shortest item size (2) */
1644 SLJIT_UNUSED_ARG(stacktop);
1645 SLJIT_ASSERT(stackpos >= stacktop + 2);
1646 
1647 stackpos = STACK(stackpos);
1648 if (ccend == NULL)
1649   {
1650   ccend = bracketend(cc) - (1 + LINK_SIZE);
1651   if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1652     cc = next_opcode(common, cc);
1653   }
1654 
1655 SLJIT_ASSERT(cc != NULL);
1656 while (cc < ccend)
1657   switch(*cc)
1658     {
1659     case OP_SET_SOM:
1660     SLJIT_ASSERT(common->has_set_som);
1661     if (!setsom_found)
1662       {
1663       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1664       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1665       stackpos -= (int)sizeof(sljit_sw);
1666       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1667       stackpos -= (int)sizeof(sljit_sw);
1668       setsom_found = TRUE;
1669       }
1670     cc += 1;
1671     break;
1672 
1673     case OP_MARK:
1674     case OP_PRUNE_ARG:
1675     case OP_THEN_ARG:
1676     SLJIT_ASSERT(common->mark_ptr != 0);
1677     if (!setmark_found)
1678       {
1679       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1680       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1681       stackpos -= (int)sizeof(sljit_sw);
1682       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1683       stackpos -= (int)sizeof(sljit_sw);
1684       setmark_found = TRUE;
1685       }
1686     cc += 1 + 2 + cc[1];
1687     break;
1688 
1689     case OP_RECURSE:
1690     if (common->has_set_som && !setsom_found)
1691       {
1692       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1693       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1694       stackpos -= (int)sizeof(sljit_sw);
1695       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1696       stackpos -= (int)sizeof(sljit_sw);
1697       setsom_found = TRUE;
1698       }
1699     if (common->mark_ptr != 0 && !setmark_found)
1700       {
1701       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1702       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1703       stackpos -= (int)sizeof(sljit_sw);
1704       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1705       stackpos -= (int)sizeof(sljit_sw);
1706       setmark_found = TRUE;
1707       }
1708     if (common->capture_last_ptr != 0 && !capture_last_found)
1709       {
1710       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1711       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1712       stackpos -= (int)sizeof(sljit_sw);
1713       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1714       stackpos -= (int)sizeof(sljit_sw);
1715       capture_last_found = TRUE;
1716       }
1717     cc += 1 + LINK_SIZE;
1718     break;
1719 
1720     case OP_CBRA:
1721     case OP_CBRAPOS:
1722     case OP_SCBRA:
1723     case OP_SCBRAPOS:
1724     if (common->capture_last_ptr != 0 && !capture_last_found)
1725       {
1726       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1727       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1728       stackpos -= (int)sizeof(sljit_sw);
1729       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1730       stackpos -= (int)sizeof(sljit_sw);
1731       capture_last_found = TRUE;
1732       }
1733     offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1734     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1735     stackpos -= (int)sizeof(sljit_sw);
1736     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
1737     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
1738     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1739     stackpos -= (int)sizeof(sljit_sw);
1740     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1741     stackpos -= (int)sizeof(sljit_sw);
1742 
1743     cc += 1 + LINK_SIZE + IMM2_SIZE;
1744     break;
1745 
1746     default:
1747     cc = next_opcode(common, cc);
1748     SLJIT_ASSERT(cc != NULL);
1749     break;
1750     }
1751 
1752 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1753 SLJIT_ASSERT(stackpos == STACK(stacktop));
1754 }
1755 
get_private_data_copy_length(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,BOOL needs_control_head)1756 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1757 {
1758 int private_data_length = needs_control_head ? 3 : 2;
1759 int size;
1760 pcre_uchar *alternative;
1761 /* Calculate the sum of the private machine words. */
1762 while (cc < ccend)
1763   {
1764   size = 0;
1765   switch(*cc)
1766     {
1767     case OP_KET:
1768     if (PRIVATE_DATA(cc) != 0)
1769       {
1770       private_data_length++;
1771       SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1772       cc += PRIVATE_DATA(cc + 1);
1773       }
1774     cc += 1 + LINK_SIZE;
1775     break;
1776 
1777     case OP_ASSERT:
1778     case OP_ASSERT_NOT:
1779     case OP_ASSERTBACK:
1780     case OP_ASSERTBACK_NOT:
1781     case OP_ONCE:
1782     case OP_ONCE_NC:
1783     case OP_BRAPOS:
1784     case OP_SBRA:
1785     case OP_SBRAPOS:
1786     case OP_SCOND:
1787     private_data_length++;
1788     SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
1789     cc += 1 + LINK_SIZE;
1790     break;
1791 
1792     case OP_CBRA:
1793     case OP_SCBRA:
1794     if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1795       private_data_length++;
1796     cc += 1 + LINK_SIZE + IMM2_SIZE;
1797     break;
1798 
1799     case OP_CBRAPOS:
1800     case OP_SCBRAPOS:
1801     private_data_length += 2;
1802     cc += 1 + LINK_SIZE + IMM2_SIZE;
1803     break;
1804 
1805     case OP_COND:
1806     /* Might be a hidden SCOND. */
1807     alternative = cc + GET(cc, 1);
1808     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1809       private_data_length++;
1810     cc += 1 + LINK_SIZE;
1811     break;
1812 
1813     CASE_ITERATOR_PRIVATE_DATA_1
1814     if (PRIVATE_DATA(cc))
1815       private_data_length++;
1816     cc += 2;
1817 #ifdef SUPPORT_UTF
1818     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1819 #endif
1820     break;
1821 
1822     CASE_ITERATOR_PRIVATE_DATA_2A
1823     if (PRIVATE_DATA(cc))
1824       private_data_length += 2;
1825     cc += 2;
1826 #ifdef SUPPORT_UTF
1827     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1828 #endif
1829     break;
1830 
1831     CASE_ITERATOR_PRIVATE_DATA_2B
1832     if (PRIVATE_DATA(cc))
1833       private_data_length += 2;
1834     cc += 2 + IMM2_SIZE;
1835 #ifdef SUPPORT_UTF
1836     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1837 #endif
1838     break;
1839 
1840     CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1841     if (PRIVATE_DATA(cc))
1842       private_data_length++;
1843     cc += 1;
1844     break;
1845 
1846     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1847     if (PRIVATE_DATA(cc))
1848       private_data_length += 2;
1849     cc += 1;
1850     break;
1851 
1852     CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1853     if (PRIVATE_DATA(cc))
1854       private_data_length += 2;
1855     cc += 1 + IMM2_SIZE;
1856     break;
1857 
1858     case OP_CLASS:
1859     case OP_NCLASS:
1860 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1861     case OP_XCLASS:
1862     size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1863 #else
1864     size = 1 + 32 / (int)sizeof(pcre_uchar);
1865 #endif
1866     if (PRIVATE_DATA(cc))
1867       private_data_length += get_class_iterator_size(cc + size);
1868     cc += size;
1869     break;
1870 
1871     default:
1872     cc = next_opcode(common, cc);
1873     SLJIT_ASSERT(cc != NULL);
1874     break;
1875     }
1876   }
1877 SLJIT_ASSERT(cc == ccend);
1878 return private_data_length;
1879 }
1880 
copy_private_data(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,BOOL save,int stackptr,int stacktop,BOOL needs_control_head)1881 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1882   BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1883 {
1884 DEFINE_COMPILER;
1885 int srcw[2];
1886 int count, size;
1887 BOOL tmp1next = TRUE;
1888 BOOL tmp1empty = TRUE;
1889 BOOL tmp2empty = TRUE;
1890 pcre_uchar *alternative;
1891 enum {
1892   loop,
1893   end
1894 } status;
1895 
1896 status = loop;
1897 stackptr = STACK(stackptr);
1898 stacktop = STACK(stacktop - 1);
1899 
1900 if (!save)
1901   {
1902   stacktop -= (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1903   if (stackptr < stacktop)
1904     {
1905     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1906     stackptr += sizeof(sljit_sw);
1907     tmp1empty = FALSE;
1908     }
1909   if (stackptr < stacktop)
1910     {
1911     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1912     stackptr += sizeof(sljit_sw);
1913     tmp2empty = FALSE;
1914     }
1915   /* The tmp1next must be TRUE in either way. */
1916   }
1917 
1918 SLJIT_ASSERT(common->recursive_head_ptr != 0);
1919 
1920 do
1921   {
1922   count = 0;
1923   if (cc >= ccend)
1924     {
1925     if (!save)
1926       break;
1927 
1928     count = 1;
1929     srcw[0] = common->recursive_head_ptr;
1930     if (needs_control_head)
1931       {
1932       SLJIT_ASSERT(common->control_head_ptr != 0);
1933       count = 2;
1934       srcw[0] = common->control_head_ptr;
1935       srcw[1] = common->recursive_head_ptr;
1936       }
1937     status = end;
1938     }
1939   else switch(*cc)
1940     {
1941     case OP_KET:
1942     if (PRIVATE_DATA(cc) != 0)
1943       {
1944       count = 1;
1945       srcw[0] = PRIVATE_DATA(cc);
1946       SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1947       cc += PRIVATE_DATA(cc + 1);
1948       }
1949     cc += 1 + LINK_SIZE;
1950     break;
1951 
1952     case OP_ASSERT:
1953     case OP_ASSERT_NOT:
1954     case OP_ASSERTBACK:
1955     case OP_ASSERTBACK_NOT:
1956     case OP_ONCE:
1957     case OP_ONCE_NC:
1958     case OP_BRAPOS:
1959     case OP_SBRA:
1960     case OP_SBRAPOS:
1961     case OP_SCOND:
1962     count = 1;
1963     srcw[0] = PRIVATE_DATA(cc);
1964     SLJIT_ASSERT(srcw[0] != 0);
1965     cc += 1 + LINK_SIZE;
1966     break;
1967 
1968     case OP_CBRA:
1969     case OP_SCBRA:
1970     if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1971       {
1972       count = 1;
1973       srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1974       }
1975     cc += 1 + LINK_SIZE + IMM2_SIZE;
1976     break;
1977 
1978     case OP_CBRAPOS:
1979     case OP_SCBRAPOS:
1980     count = 2;
1981     srcw[0] = PRIVATE_DATA(cc);
1982     srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1983     SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1984     cc += 1 + LINK_SIZE + IMM2_SIZE;
1985     break;
1986 
1987     case OP_COND:
1988     /* Might be a hidden SCOND. */
1989     alternative = cc + GET(cc, 1);
1990     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1991       {
1992       count = 1;
1993       srcw[0] = PRIVATE_DATA(cc);
1994       SLJIT_ASSERT(srcw[0] != 0);
1995       }
1996     cc += 1 + LINK_SIZE;
1997     break;
1998 
1999     CASE_ITERATOR_PRIVATE_DATA_1
2000     if (PRIVATE_DATA(cc))
2001       {
2002       count = 1;
2003       srcw[0] = PRIVATE_DATA(cc);
2004       }
2005     cc += 2;
2006 #ifdef SUPPORT_UTF
2007     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2008 #endif
2009     break;
2010 
2011     CASE_ITERATOR_PRIVATE_DATA_2A
2012     if (PRIVATE_DATA(cc))
2013       {
2014       count = 2;
2015       srcw[0] = PRIVATE_DATA(cc);
2016       srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2017       }
2018     cc += 2;
2019 #ifdef SUPPORT_UTF
2020     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2021 #endif
2022     break;
2023 
2024     CASE_ITERATOR_PRIVATE_DATA_2B
2025     if (PRIVATE_DATA(cc))
2026       {
2027       count = 2;
2028       srcw[0] = PRIVATE_DATA(cc);
2029       srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2030       }
2031     cc += 2 + IMM2_SIZE;
2032 #ifdef SUPPORT_UTF
2033     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2034 #endif
2035     break;
2036 
2037     CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2038     if (PRIVATE_DATA(cc))
2039       {
2040       count = 1;
2041       srcw[0] = PRIVATE_DATA(cc);
2042       }
2043     cc += 1;
2044     break;
2045 
2046     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2047     if (PRIVATE_DATA(cc))
2048       {
2049       count = 2;
2050       srcw[0] = PRIVATE_DATA(cc);
2051       srcw[1] = srcw[0] + sizeof(sljit_sw);
2052       }
2053     cc += 1;
2054     break;
2055 
2056     CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2057     if (PRIVATE_DATA(cc))
2058       {
2059       count = 2;
2060       srcw[0] = PRIVATE_DATA(cc);
2061       srcw[1] = srcw[0] + sizeof(sljit_sw);
2062       }
2063     cc += 1 + IMM2_SIZE;
2064     break;
2065 
2066     case OP_CLASS:
2067     case OP_NCLASS:
2068 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2069     case OP_XCLASS:
2070     size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
2071 #else
2072     size = 1 + 32 / (int)sizeof(pcre_uchar);
2073 #endif
2074     if (PRIVATE_DATA(cc))
2075       switch(get_class_iterator_size(cc + size))
2076         {
2077         case 1:
2078         count = 1;
2079         srcw[0] = PRIVATE_DATA(cc);
2080         break;
2081 
2082         case 2:
2083         count = 2;
2084         srcw[0] = PRIVATE_DATA(cc);
2085         srcw[1] = srcw[0] + sizeof(sljit_sw);
2086         break;
2087 
2088         default:
2089         SLJIT_UNREACHABLE();
2090         break;
2091         }
2092     cc += size;
2093     break;
2094 
2095     default:
2096     cc = next_opcode(common, cc);
2097     SLJIT_ASSERT(cc != NULL);
2098     break;
2099     }
2100 
2101   while (count > 0)
2102     {
2103     count--;
2104     if (save)
2105       {
2106       if (tmp1next)
2107         {
2108         if (!tmp1empty)
2109           {
2110           OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2111           stackptr += sizeof(sljit_sw);
2112           }
2113         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
2114         tmp1empty = FALSE;
2115         tmp1next = FALSE;
2116         }
2117       else
2118         {
2119         if (!tmp2empty)
2120           {
2121           OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2122           stackptr += sizeof(sljit_sw);
2123           }
2124         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
2125         tmp2empty = FALSE;
2126         tmp1next = TRUE;
2127         }
2128       }
2129     else
2130       {
2131       if (tmp1next)
2132         {
2133         SLJIT_ASSERT(!tmp1empty);
2134         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
2135         tmp1empty = stackptr >= stacktop;
2136         if (!tmp1empty)
2137           {
2138           OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
2139           stackptr += sizeof(sljit_sw);
2140           }
2141         tmp1next = FALSE;
2142         }
2143       else
2144         {
2145         SLJIT_ASSERT(!tmp2empty);
2146         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
2147         tmp2empty = stackptr >= stacktop;
2148         if (!tmp2empty)
2149           {
2150           OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
2151           stackptr += sizeof(sljit_sw);
2152           }
2153         tmp1next = TRUE;
2154         }
2155       }
2156     }
2157   }
2158 while (status != end);
2159 
2160 if (save)
2161   {
2162   if (tmp1next)
2163     {
2164     if (!tmp1empty)
2165       {
2166       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2167       stackptr += sizeof(sljit_sw);
2168       }
2169     if (!tmp2empty)
2170       {
2171       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2172       stackptr += sizeof(sljit_sw);
2173       }
2174     }
2175   else
2176     {
2177     if (!tmp2empty)
2178       {
2179       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2180       stackptr += sizeof(sljit_sw);
2181       }
2182     if (!tmp1empty)
2183       {
2184       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2185       stackptr += sizeof(sljit_sw);
2186       }
2187     }
2188   }
2189 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
2190 }
2191 
set_then_offsets(compiler_common * common,pcre_uchar * cc,sljit_u8 * current_offset)2192 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, sljit_u8 *current_offset)
2193 {
2194 pcre_uchar *end = bracketend(cc);
2195 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
2196 
2197 /* Assert captures then. */
2198 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
2199   current_offset = NULL;
2200 /* Conditional block does not. */
2201 if (*cc == OP_COND || *cc == OP_SCOND)
2202   has_alternatives = FALSE;
2203 
2204 cc = next_opcode(common, cc);
2205 if (has_alternatives)
2206   current_offset = common->then_offsets + (cc - common->start);
2207 
2208 while (cc < end)
2209   {
2210   if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
2211     cc = set_then_offsets(common, cc, current_offset);
2212   else
2213     {
2214     if (*cc == OP_ALT && has_alternatives)
2215       current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2216     if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2217       *current_offset = 1;
2218     cc = next_opcode(common, cc);
2219     }
2220   }
2221 
2222 return end;
2223 }
2224 
2225 #undef CASE_ITERATOR_PRIVATE_DATA_1
2226 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2227 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2228 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2229 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2230 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2231 
is_powerof2(unsigned int value)2232 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2233 {
2234 return (value & (value - 1)) == 0;
2235 }
2236 
set_jumps(jump_list * list,struct sljit_label * label)2237 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2238 {
2239 while (list)
2240   {
2241   /* sljit_set_label is clever enough to do nothing
2242   if either the jump or the label is NULL. */
2243   SET_LABEL(list->jump, label);
2244   list = list->next;
2245   }
2246 }
2247 
add_jump(struct sljit_compiler * compiler,jump_list ** list,struct sljit_jump * jump)2248 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
2249 {
2250 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2251 if (list_item)
2252   {
2253   list_item->next = *list;
2254   list_item->jump = jump;
2255   *list = list_item;
2256   }
2257 }
2258 
add_stub(compiler_common * common,struct sljit_jump * start)2259 static void add_stub(compiler_common *common, struct sljit_jump *start)
2260 {
2261 DEFINE_COMPILER;
2262 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2263 
2264 if (list_item)
2265   {
2266   list_item->start = start;
2267   list_item->quit = LABEL();
2268   list_item->next = common->stubs;
2269   common->stubs = list_item;
2270   }
2271 }
2272 
flush_stubs(compiler_common * common)2273 static void flush_stubs(compiler_common *common)
2274 {
2275 DEFINE_COMPILER;
2276 stub_list *list_item = common->stubs;
2277 
2278 while (list_item)
2279   {
2280   JUMPHERE(list_item->start);
2281   add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2282   JUMPTO(SLJIT_JUMP, list_item->quit);
2283   list_item = list_item->next;
2284   }
2285 common->stubs = NULL;
2286 }
2287 
add_label_addr(compiler_common * common,sljit_uw * update_addr)2288 static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
2289 {
2290 DEFINE_COMPILER;
2291 label_addr_list *label_addr;
2292 
2293 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2294 if (label_addr == NULL)
2295   return;
2296 label_addr->label = LABEL();
2297 label_addr->update_addr = update_addr;
2298 label_addr->next = common->label_addrs;
2299 common->label_addrs = label_addr;
2300 }
2301 
count_match(compiler_common * common)2302 static SLJIT_INLINE void count_match(compiler_common *common)
2303 {
2304 DEFINE_COMPILER;
2305 
2306 OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2307 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
2308 }
2309 
allocate_stack(compiler_common * common,int size)2310 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2311 {
2312 /* May destroy all locals and registers except TMP2. */
2313 DEFINE_COMPILER;
2314 
2315 SLJIT_ASSERT(size > 0);
2316 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2317 #ifdef DESTROY_REGISTERS
2318 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2319 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2320 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2321 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
2322 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
2323 #endif
2324 add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
2325 }
2326 
free_stack(compiler_common * common,int size)2327 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2328 {
2329 DEFINE_COMPILER;
2330 
2331 SLJIT_ASSERT(size > 0);
2332 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2333 }
2334 
allocate_read_only_data(compiler_common * common,sljit_uw size)2335 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
2336 {
2337 DEFINE_COMPILER;
2338 sljit_uw *result;
2339 
2340 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
2341   return NULL;
2342 
2343 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
2344 if (SLJIT_UNLIKELY(result == NULL))
2345   {
2346   sljit_set_compiler_memory_error(compiler);
2347   return NULL;
2348   }
2349 
2350 *(void**)result = common->read_only_data_head;
2351 common->read_only_data_head = (void *)result;
2352 return result + 1;
2353 }
2354 
free_read_only_data(void * current,void * allocator_data)2355 static void free_read_only_data(void *current, void *allocator_data)
2356 {
2357 void *next;
2358 
2359 SLJIT_UNUSED_ARG(allocator_data);
2360 
2361 while (current != NULL)
2362   {
2363   next = *(void**)current;
2364   SLJIT_FREE(current, allocator_data);
2365   current = next;
2366   }
2367 }
2368 
reset_ovector(compiler_common * common,int length)2369 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2370 {
2371 DEFINE_COMPILER;
2372 struct sljit_label *loop;
2373 int i;
2374 
2375 /* At this point we can freely use all temporary registers. */
2376 SLJIT_ASSERT(length > 1);
2377 /* TMP1 returns with begin - 1. */
2378 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2379 if (length < 8)
2380   {
2381   for (i = 1; i < length; i++)
2382     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
2383   }
2384 else
2385   {
2386   GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
2387   OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2388   loop = LABEL();
2389   OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0);
2390   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2391   JUMPTO(SLJIT_NOT_ZERO, loop);
2392   }
2393 }
2394 
reset_fast_fail(compiler_common * common)2395 static SLJIT_INLINE void reset_fast_fail(compiler_common *common)
2396 {
2397 DEFINE_COMPILER;
2398 sljit_s32 i;
2399 
2400 SLJIT_ASSERT(common->fast_fail_start_ptr < common->fast_fail_end_ptr);
2401 
2402 OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2403 for (i = common->fast_fail_start_ptr; i < common->fast_fail_end_ptr; i += sizeof(sljit_sw))
2404   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, TMP1, 0);
2405 }
2406 
do_reset_match(compiler_common * common,int length)2407 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2408 {
2409 DEFINE_COMPILER;
2410 struct sljit_label *loop;
2411 int i;
2412 
2413 SLJIT_ASSERT(length > 1);
2414 /* OVECTOR(1) contains the "string begin - 1" constant. */
2415 if (length > 2)
2416   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2417 if (length < 8)
2418   {
2419   for (i = 2; i < length; i++)
2420     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
2421   }
2422 else
2423   {
2424   GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2425   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2426   loop = LABEL();
2427   OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2428   OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2429   JUMPTO(SLJIT_NOT_ZERO, loop);
2430   }
2431 
2432 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2433 if (common->mark_ptr != 0)
2434   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
2435 if (common->control_head_ptr != 0)
2436   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
2437 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2438 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2439 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2440 }
2441 
do_search_mark(sljit_sw * current,const pcre_uchar * skip_arg)2442 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2443 {
2444 while (current != NULL)
2445   {
2446   switch (current[1])
2447     {
2448     case type_then_trap:
2449     break;
2450 
2451     case type_mark:
2452     if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[2]) == 0)
2453       return current[3];
2454     break;
2455 
2456     default:
2457     SLJIT_UNREACHABLE();
2458     break;
2459     }
2460   SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
2461   current = (sljit_sw*)current[0];
2462   }
2463 return -1;
2464 }
2465 
copy_ovector(compiler_common * common,int topbracket)2466 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2467 {
2468 DEFINE_COMPILER;
2469 struct sljit_label *loop;
2470 struct sljit_jump *early_quit;
2471 
2472 /* At this point we can freely use all registers. */
2473 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2474 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
2475 
2476 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
2477 if (common->mark_ptr != 0)
2478   OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2479 OP1(SLJIT_MOV_S32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count));
2480 if (common->mark_ptr != 0)
2481   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
2482 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2483 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
2484 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START);
2485 /* Unlikely, but possible */
2486 early_quit = CMP(SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0);
2487 loop = LABEL();
2488 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0);
2489 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
2490 /* Copy the integer value to the output buffer */
2491 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2492 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2493 #endif
2494 OP1(SLJIT_MOVU_S32, SLJIT_MEM1(SLJIT_R2), sizeof(int), SLJIT_S1, 0);
2495 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2496 JUMPTO(SLJIT_NOT_ZERO, loop);
2497 JUMPHERE(early_quit);
2498 
2499 /* Calculate the return value, which is the maximum ovector value. */
2500 if (topbracket > 1)
2501   {
2502   GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2503   OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2504 
2505   /* OVECTOR(0) is never equal to SLJIT_S2. */
2506   loop = LABEL();
2507   OP1(SLJIT_MOVU, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
2508   OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2509   CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2510   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
2511   }
2512 else
2513   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2514 }
2515 
return_with_partial_match(compiler_common * common,struct sljit_label * quit)2516 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2517 {
2518 DEFINE_COMPILER;
2519 struct sljit_jump *jump;
2520 
2521 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2);
2522 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2523   && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2524 
2525 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
2526 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2527 OP1(SLJIT_MOV_S32, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2528 CMPTO(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit);
2529 
2530 /* Store match begin and end. */
2531 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
2532 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets));
2533 
2534 jump = CMP(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3);
2535 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0);
2536 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2537 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2538 #endif
2539 OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0);
2540 JUMPHERE(jump);
2541 
2542 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2543 OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0);
2544 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2545 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2546 #endif
2547 OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0);
2548 
2549 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
2550 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2551 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2552 #endif
2553 OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0);
2554 
2555 JUMPTO(SLJIT_JUMP, quit);
2556 }
2557 
check_start_used_ptr(compiler_common * common)2558 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2559 {
2560 /* May destroy TMP1. */
2561 DEFINE_COMPILER;
2562 struct sljit_jump *jump;
2563 
2564 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2565   {
2566   /* The value of -1 must be kept for start_used_ptr! */
2567   OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
2568   /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2569   is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2570   jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2571   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2572   JUMPHERE(jump);
2573   }
2574 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2575   {
2576   jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2577   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2578   JUMPHERE(jump);
2579   }
2580 }
2581 
char_has_othercase(compiler_common * common,pcre_uchar * cc)2582 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar *cc)
2583 {
2584 /* Detects if the character has an othercase. */
2585 unsigned int c;
2586 
2587 #ifdef SUPPORT_UTF
2588 if (common->utf)
2589   {
2590   GETCHAR(c, cc);
2591   if (c > 127)
2592     {
2593 #ifdef SUPPORT_UCP
2594     return c != UCD_OTHERCASE(c);
2595 #else
2596     return FALSE;
2597 #endif
2598     }
2599 #ifndef COMPILE_PCRE8
2600   return common->fcc[c] != c;
2601 #endif
2602   }
2603 else
2604 #endif
2605   c = *cc;
2606 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2607 }
2608 
char_othercase(compiler_common * common,unsigned int c)2609 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2610 {
2611 /* Returns with the othercase. */
2612 #ifdef SUPPORT_UTF
2613 if (common->utf && c > 127)
2614   {
2615 #ifdef SUPPORT_UCP
2616   return UCD_OTHERCASE(c);
2617 #else
2618   return c;
2619 #endif
2620   }
2621 #endif
2622 return TABLE_GET(c, common->fcc, c);
2623 }
2624 
char_get_othercase_bit(compiler_common * common,pcre_uchar * cc)2625 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar *cc)
2626 {
2627 /* Detects if the character and its othercase has only 1 bit difference. */
2628 unsigned int c, oc, bit;
2629 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2630 int n;
2631 #endif
2632 
2633 #ifdef SUPPORT_UTF
2634 if (common->utf)
2635   {
2636   GETCHAR(c, cc);
2637   if (c <= 127)
2638     oc = common->fcc[c];
2639   else
2640     {
2641 #ifdef SUPPORT_UCP
2642     oc = UCD_OTHERCASE(c);
2643 #else
2644     oc = c;
2645 #endif
2646     }
2647   }
2648 else
2649   {
2650   c = *cc;
2651   oc = TABLE_GET(c, common->fcc, c);
2652   }
2653 #else
2654 c = *cc;
2655 oc = TABLE_GET(c, common->fcc, c);
2656 #endif
2657 
2658 SLJIT_ASSERT(c != oc);
2659 
2660 bit = c ^ oc;
2661 /* Optimized for English alphabet. */
2662 if (c <= 127 && bit == 0x20)
2663   return (0 << 8) | 0x20;
2664 
2665 /* Since c != oc, they must have at least 1 bit difference. */
2666 if (!is_powerof2(bit))
2667   return 0;
2668 
2669 #if defined COMPILE_PCRE8
2670 
2671 #ifdef SUPPORT_UTF
2672 if (common->utf && c > 127)
2673   {
2674   n = GET_EXTRALEN(*cc);
2675   while ((bit & 0x3f) == 0)
2676     {
2677     n--;
2678     bit >>= 6;
2679     }
2680   return (n << 8) | bit;
2681   }
2682 #endif /* SUPPORT_UTF */
2683 return (0 << 8) | bit;
2684 
2685 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2686 
2687 #ifdef SUPPORT_UTF
2688 if (common->utf && c > 65535)
2689   {
2690   if (bit >= (1 << 10))
2691     bit >>= 10;
2692   else
2693     return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2694   }
2695 #endif /* SUPPORT_UTF */
2696 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2697 
2698 #endif /* COMPILE_PCRE[8|16|32] */
2699 }
2700 
check_partial(compiler_common * common,BOOL force)2701 static void check_partial(compiler_common *common, BOOL force)
2702 {
2703 /* Checks whether a partial matching is occurred. Does not modify registers. */
2704 DEFINE_COMPILER;
2705 struct sljit_jump *jump = NULL;
2706 
2707 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2708 
2709 if (common->mode == JIT_COMPILE)
2710   return;
2711 
2712 if (!force)
2713   jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2714 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2715   jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
2716 
2717 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2718   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2719 else
2720   {
2721   if (common->partialmatchlabel != NULL)
2722     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2723   else
2724     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2725   }
2726 
2727 if (jump != NULL)
2728   JUMPHERE(jump);
2729 }
2730 
check_str_end(compiler_common * common,jump_list ** end_reached)2731 static void check_str_end(compiler_common *common, jump_list **end_reached)
2732 {
2733 /* Does not affect registers. Usually used in a tight spot. */
2734 DEFINE_COMPILER;
2735 struct sljit_jump *jump;
2736 
2737 if (common->mode == JIT_COMPILE)
2738   {
2739   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2740   return;
2741   }
2742 
2743 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2744 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2745   {
2746   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2747   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2748   add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2749   }
2750 else
2751   {
2752   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2753   if (common->partialmatchlabel != NULL)
2754     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2755   else
2756     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2757   }
2758 JUMPHERE(jump);
2759 }
2760 
detect_partial_match(compiler_common * common,jump_list ** backtracks)2761 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2762 {
2763 DEFINE_COMPILER;
2764 struct sljit_jump *jump;
2765 
2766 if (common->mode == JIT_COMPILE)
2767   {
2768   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2769   return;
2770   }
2771 
2772 /* Partial matching mode. */
2773 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2774 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2775 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2776   {
2777   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2778   add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2779   }
2780 else
2781   {
2782   if (common->partialmatchlabel != NULL)
2783     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2784   else
2785     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2786   }
2787 JUMPHERE(jump);
2788 }
2789 
peek_char(compiler_common * common,sljit_u32 max)2790 static void peek_char(compiler_common *common, sljit_u32 max)
2791 {
2792 /* Reads the character into TMP1, keeps STR_PTR.
2793 Does not check STR_END. TMP2 Destroyed. */
2794 DEFINE_COMPILER;
2795 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2796 struct sljit_jump *jump;
2797 #endif
2798 
2799 SLJIT_UNUSED_ARG(max);
2800 
2801 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2802 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2803 if (common->utf)
2804   {
2805   if (max < 128) return;
2806 
2807   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2808   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2809   add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2810   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2811   JUMPHERE(jump);
2812   }
2813 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2814 
2815 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2816 if (common->utf)
2817   {
2818   if (max < 0xd800) return;
2819 
2820   OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2821   jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2822   /* TMP2 contains the high surrogate. */
2823   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2824   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2825   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2826   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2827   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2828   JUMPHERE(jump);
2829   }
2830 #endif
2831 }
2832 
2833 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2834 
is_char7_bitset(const sljit_u8 * bitset,BOOL nclass)2835 static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
2836 {
2837 /* Tells whether the character codes below 128 are enough
2838 to determine a match. */
2839 const sljit_u8 value = nclass ? 0xff : 0;
2840 const sljit_u8 *end = bitset + 32;
2841 
2842 bitset += 16;
2843 do
2844   {
2845   if (*bitset++ != value)
2846     return FALSE;
2847   }
2848 while (bitset < end);
2849 return TRUE;
2850 }
2851 
read_char7_type(compiler_common * common,BOOL full_read)2852 static void read_char7_type(compiler_common *common, BOOL full_read)
2853 {
2854 /* Reads the precise character type of a character into TMP1, if the character
2855 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2856 full_read argument tells whether characters above max are accepted or not. */
2857 DEFINE_COMPILER;
2858 struct sljit_jump *jump;
2859 
2860 SLJIT_ASSERT(common->utf);
2861 
2862 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2863 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2864 
2865 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2866 
2867 if (full_read)
2868   {
2869   jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2870   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2871   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2872   JUMPHERE(jump);
2873   }
2874 }
2875 
2876 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2877 
read_char_range(compiler_common * common,sljit_u32 min,sljit_u32 max,BOOL update_str_ptr)2878 static void read_char_range(compiler_common *common, sljit_u32 min, sljit_u32 max, BOOL update_str_ptr)
2879 {
2880 /* Reads the precise value of a character into TMP1, if the character is
2881 between min and max (c >= min && c <= max). Otherwise it returns with a value
2882 outside the range. Does not check STR_END. */
2883 DEFINE_COMPILER;
2884 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2885 struct sljit_jump *jump;
2886 #endif
2887 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2888 struct sljit_jump *jump2;
2889 #endif
2890 
2891 SLJIT_UNUSED_ARG(update_str_ptr);
2892 SLJIT_UNUSED_ARG(min);
2893 SLJIT_UNUSED_ARG(max);
2894 SLJIT_ASSERT(min <= max);
2895 
2896 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2897 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2898 
2899 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2900 if (common->utf)
2901   {
2902   if (max < 128 && !update_str_ptr) return;
2903 
2904   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2905   if (min >= 0x10000)
2906     {
2907     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2908     if (update_str_ptr)
2909       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2910     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2911     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2912     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2913     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2914     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2915     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2916     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2917     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2918     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2919     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2920     if (!update_str_ptr)
2921       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2922     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2923     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2924     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2925     JUMPHERE(jump2);
2926     if (update_str_ptr)
2927       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2928     }
2929   else if (min >= 0x800 && max <= 0xffff)
2930     {
2931     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2932     if (update_str_ptr)
2933       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2934     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2935     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2936     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2937     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2938     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2939     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2940     if (!update_str_ptr)
2941       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2942     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2943     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2944     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2945     JUMPHERE(jump2);
2946     if (update_str_ptr)
2947       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2948     }
2949   else if (max >= 0x800)
2950     add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2951   else if (max < 128)
2952     {
2953     OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2954     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2955     }
2956   else
2957     {
2958     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2959     if (!update_str_ptr)
2960       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2961     else
2962       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2963     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2964     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2965     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2966     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2967     if (update_str_ptr)
2968       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2969     }
2970   JUMPHERE(jump);
2971   }
2972 #endif
2973 
2974 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2975 if (common->utf)
2976   {
2977   if (max >= 0x10000)
2978     {
2979     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2980     jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2981     /* TMP2 contains the high surrogate. */
2982     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2983     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2984     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2985     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2986     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2987     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2988     JUMPHERE(jump);
2989     return;
2990     }
2991 
2992   if (max < 0xd800 && !update_str_ptr) return;
2993 
2994   /* Skip low surrogate if necessary. */
2995   OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2996   jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2997   if (update_str_ptr)
2998     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2999   if (max >= 0xd800)
3000     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
3001   JUMPHERE(jump);
3002   }
3003 #endif
3004 }
3005 
read_char(compiler_common * common)3006 static SLJIT_INLINE void read_char(compiler_common *common)
3007 {
3008 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
3009 }
3010 
read_char8_type(compiler_common * common,BOOL update_str_ptr)3011 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
3012 {
3013 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
3014 DEFINE_COMPILER;
3015 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3016 struct sljit_jump *jump;
3017 #endif
3018 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3019 struct sljit_jump *jump2;
3020 #endif
3021 
3022 SLJIT_UNUSED_ARG(update_str_ptr);
3023 
3024 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
3025 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3026 
3027 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3028 if (common->utf)
3029   {
3030   /* This can be an extra read in some situations, but hopefully
3031   it is needed in most cases. */
3032   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3033   jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
3034   if (!update_str_ptr)
3035     {
3036     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3037     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3038     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3039     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3040     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3041     OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3042     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3043     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3044     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3045     JUMPHERE(jump2);
3046     }
3047   else
3048     add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
3049   JUMPHERE(jump);
3050   return;
3051   }
3052 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
3053 
3054 #if !defined COMPILE_PCRE8
3055 /* The ctypes array contains only 256 values. */
3056 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3057 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3058 #endif
3059 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3060 #if !defined COMPILE_PCRE8
3061 JUMPHERE(jump);
3062 #endif
3063 
3064 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
3065 if (common->utf && update_str_ptr)
3066   {
3067   /* Skip low surrogate if necessary. */
3068   OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
3069   jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3070   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3071   JUMPHERE(jump);
3072   }
3073 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
3074 }
3075 
skip_char_back(compiler_common * common)3076 static void skip_char_back(compiler_common *common)
3077 {
3078 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
3079 DEFINE_COMPILER;
3080 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3081 #if defined COMPILE_PCRE8
3082 struct sljit_label *label;
3083 
3084 if (common->utf)
3085   {
3086   label = LABEL();
3087   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3088   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3089   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
3090   CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
3091   return;
3092   }
3093 #elif defined COMPILE_PCRE16
3094 if (common->utf)
3095   {
3096   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3097   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3098   /* Skip low surrogate if necessary. */
3099   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3100   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3101   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
3102   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3103   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3104   return;
3105   }
3106 #endif /* COMPILE_PCRE[8|16] */
3107 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3108 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3109 }
3110 
check_newlinechar(compiler_common * common,int nltype,jump_list ** backtracks,BOOL jumpifmatch)3111 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
3112 {
3113 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
3114 DEFINE_COMPILER;
3115 struct sljit_jump *jump;
3116 
3117 if (nltype == NLTYPE_ANY)
3118   {
3119   add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
3120   sljit_set_current_flags(compiler, SLJIT_SET_Z);
3121   add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
3122   }
3123 else if (nltype == NLTYPE_ANYCRLF)
3124   {
3125   if (jumpifmatch)
3126     {
3127     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
3128     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3129     }
3130   else
3131     {
3132     jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3133     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3134     JUMPHERE(jump);
3135     }
3136   }
3137 else
3138   {
3139   SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
3140   add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
3141   }
3142 }
3143 
3144 #ifdef SUPPORT_UTF
3145 
3146 #if defined COMPILE_PCRE8
do_utfreadchar(compiler_common * common)3147 static void do_utfreadchar(compiler_common *common)
3148 {
3149 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
3150 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
3151 DEFINE_COMPILER;
3152 struct sljit_jump *jump;
3153 
3154 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3155 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3156 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3157 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3158 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3159 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3160 
3161 /* Searching for the first zero. */
3162 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
3163 jump = JUMP(SLJIT_NOT_ZERO);
3164 /* Two byte sequence. */
3165 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3166 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
3167 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3168 
3169 JUMPHERE(jump);
3170 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3171 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3172 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3173 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3174 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3175 
3176 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
3177 jump = JUMP(SLJIT_NOT_ZERO);
3178 /* Three byte sequence. */
3179 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3180 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
3181 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3182 
3183 /* Four byte sequence. */
3184 JUMPHERE(jump);
3185 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
3186 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
3187 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3188 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
3189 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3190 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3191 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
3192 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3193 }
3194 
do_utfreadchar16(compiler_common * common)3195 static void do_utfreadchar16(compiler_common *common)
3196 {
3197 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
3198 of the character (>= 0xc0). Return value in TMP1. */
3199 DEFINE_COMPILER;
3200 struct sljit_jump *jump;
3201 
3202 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3203 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3204 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3205 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3206 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3207 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3208 
3209 /* Searching for the first zero. */
3210 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
3211 jump = JUMP(SLJIT_NOT_ZERO);
3212 /* Two byte sequence. */
3213 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3214 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3215 
3216 JUMPHERE(jump);
3217 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
3218 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
3219 /* This code runs only in 8 bit mode. No need to shift the value. */
3220 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3221 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3222 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3223 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3224 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3225 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3226 /* Three byte sequence. */
3227 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3228 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3229 }
3230 
do_utfreadtype8(compiler_common * common)3231 static void do_utfreadtype8(compiler_common *common)
3232 {
3233 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
3234 of the character (>= 0xc0). Return value in TMP1. */
3235 DEFINE_COMPILER;
3236 struct sljit_jump *jump;
3237 struct sljit_jump *compare;
3238 
3239 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3240 
3241 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
3242 jump = JUMP(SLJIT_NOT_ZERO);
3243 /* Two byte sequence. */
3244 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3245 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3246 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
3247 /* The upper 5 bits are known at this point. */
3248 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
3249 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3250 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3251 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3252 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3253 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3254 
3255 JUMPHERE(compare);
3256 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3257 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3258 
3259 /* We only have types for characters less than 256. */
3260 JUMPHERE(jump);
3261 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3262 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3263 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3264 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3265 }
3266 
3267 #endif /* COMPILE_PCRE8 */
3268 
3269 #endif /* SUPPORT_UTF */
3270 
3271 #ifdef SUPPORT_UCP
3272 
3273 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3274 #define UCD_BLOCK_MASK 127
3275 #define UCD_BLOCK_SHIFT 7
3276 
do_getucd(compiler_common * common)3277 static void do_getucd(compiler_common *common)
3278 {
3279 /* Search the UCD record for the character comes in TMP1.
3280 Returns chartype in TMP1 and UCD offset in TMP2. */
3281 DEFINE_COMPILER;
3282 #ifdef COMPILE_PCRE32
3283 struct sljit_jump *jump;
3284 #endif
3285 
3286 #if defined SLJIT_DEBUG && SLJIT_DEBUG
3287 /* dummy_ucd_record */
3288 const ucd_record *record = GET_UCD(INVALID_UTF_CHAR);
3289 SLJIT_ASSERT(record->script == ucp_Common && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
3290 SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
3291 #endif
3292 
3293 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3294 
3295 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3296 
3297 #ifdef COMPILE_PCRE32
3298 if (!common->utf)
3299   {
3300   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10ffff + 1);
3301   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
3302   JUMPHERE(jump);
3303   }
3304 #endif
3305 
3306 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3307 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3308 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3309 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3310 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3311 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3312 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3313 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3314 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3315 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3316 }
3317 #endif
3318 
mainloop_entry(compiler_common * common,BOOL hascrorlf)3319 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf)
3320 {
3321 DEFINE_COMPILER;
3322 struct sljit_label *mainloop;
3323 struct sljit_label *newlinelabel = NULL;
3324 struct sljit_jump *start;
3325 struct sljit_jump *end = NULL;
3326 struct sljit_jump *end2 = NULL;
3327 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3328 struct sljit_jump *singlechar;
3329 #endif
3330 jump_list *newline = NULL;
3331 BOOL newlinecheck = FALSE;
3332 BOOL readuchar = FALSE;
3333 
3334 if (!(hascrorlf || (common->match_end_ptr != 0)) &&
3335     (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3336   newlinecheck = TRUE;
3337 
3338 if (common->match_end_ptr != 0)
3339   {
3340   /* Search for the end of the first line. */
3341   OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3342 
3343   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3344     {
3345     mainloop = LABEL();
3346     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3347     end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3348     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3349     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3350     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3351     CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3352     JUMPHERE(end);
3353     OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3354     }
3355   else
3356     {
3357     end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3358     mainloop = LABEL();
3359     /* Continual stores does not cause data dependency. */
3360     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
3361     read_char_range(common, common->nlmin, common->nlmax, TRUE);
3362     check_newlinechar(common, common->nltype, &newline, TRUE);
3363     CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3364     JUMPHERE(end);
3365     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
3366     set_jumps(newline, LABEL());
3367     }
3368 
3369   OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3370   }
3371 
3372 start = JUMP(SLJIT_JUMP);
3373 
3374 if (newlinecheck)
3375   {
3376   newlinelabel = LABEL();
3377   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3378   end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3379   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3380   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3381   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
3382 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3383   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3384 #endif
3385   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3386   end2 = JUMP(SLJIT_JUMP);
3387   }
3388 
3389 mainloop = LABEL();
3390 
3391 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3392 #ifdef SUPPORT_UTF
3393 if (common->utf) readuchar = TRUE;
3394 #endif
3395 if (newlinecheck) readuchar = TRUE;
3396 
3397 if (readuchar)
3398   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3399 
3400 if (newlinecheck)
3401   CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3402 
3403 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3404 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3405 #if defined COMPILE_PCRE8
3406 if (common->utf)
3407   {
3408   singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3409   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3410   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3411   JUMPHERE(singlechar);
3412   }
3413 #elif defined COMPILE_PCRE16
3414 if (common->utf)
3415   {
3416   singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3417   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3418   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3419   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
3420   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3421   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3422   JUMPHERE(singlechar);
3423   }
3424 #endif /* COMPILE_PCRE[8|16] */
3425 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3426 JUMPHERE(start);
3427 
3428 if (newlinecheck)
3429   {
3430   JUMPHERE(end);
3431   JUMPHERE(end2);
3432   }
3433 
3434 return mainloop;
3435 }
3436 
3437 #define MAX_N_CHARS 16
3438 #define MAX_DIFF_CHARS 6
3439 
add_prefix_char(pcre_uchar chr,pcre_uchar * chars)3440 static SLJIT_INLINE void add_prefix_char(pcre_uchar chr, pcre_uchar *chars)
3441 {
3442 pcre_uchar i, len;
3443 
3444 len = chars[0];
3445 if (len == 255)
3446   return;
3447 
3448 if (len == 0)
3449   {
3450   chars[0] = 1;
3451   chars[1] = chr;
3452   return;
3453   }
3454 
3455 for (i = len; i > 0; i--)
3456   if (chars[i] == chr)
3457     return;
3458 
3459 if (len >= MAX_DIFF_CHARS - 1)
3460   {
3461   chars[0] = 255;
3462   return;
3463   }
3464 
3465 len++;
3466 chars[len] = chr;
3467 chars[0] = len;
3468 }
3469 
scan_prefix(compiler_common * common,pcre_uchar * cc,pcre_uchar * chars,int max_chars,sljit_u32 * rec_count)3470 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uchar *chars, int max_chars, sljit_u32 *rec_count)
3471 {
3472 /* Recursive function, which scans prefix literals. */
3473 BOOL last, any, class, caseless;
3474 int len, repeat, len_save, consumed = 0;
3475 sljit_u32 chr; /* Any unicode character. */
3476 sljit_u8 *bytes, *bytes_end, byte;
3477 pcre_uchar *alternative, *cc_save, *oc;
3478 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3479 pcre_uchar othercase[8];
3480 #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3481 pcre_uchar othercase[2];
3482 #else
3483 pcre_uchar othercase[1];
3484 #endif
3485 
3486 repeat = 1;
3487 while (TRUE)
3488   {
3489   if (*rec_count == 0)
3490     return 0;
3491   (*rec_count)--;
3492 
3493   last = TRUE;
3494   any = FALSE;
3495   class = FALSE;
3496   caseless = FALSE;
3497 
3498   switch (*cc)
3499     {
3500     case OP_CHARI:
3501     caseless = TRUE;
3502     case OP_CHAR:
3503     last = FALSE;
3504     cc++;
3505     break;
3506 
3507     case OP_SOD:
3508     case OP_SOM:
3509     case OP_SET_SOM:
3510     case OP_NOT_WORD_BOUNDARY:
3511     case OP_WORD_BOUNDARY:
3512     case OP_EODN:
3513     case OP_EOD:
3514     case OP_CIRC:
3515     case OP_CIRCM:
3516     case OP_DOLL:
3517     case OP_DOLLM:
3518     /* Zero width assertions. */
3519     cc++;
3520     continue;
3521 
3522     case OP_ASSERT:
3523     case OP_ASSERT_NOT:
3524     case OP_ASSERTBACK:
3525     case OP_ASSERTBACK_NOT:
3526     cc = bracketend(cc);
3527     continue;
3528 
3529     case OP_PLUSI:
3530     case OP_MINPLUSI:
3531     case OP_POSPLUSI:
3532     caseless = TRUE;
3533     case OP_PLUS:
3534     case OP_MINPLUS:
3535     case OP_POSPLUS:
3536     cc++;
3537     break;
3538 
3539     case OP_EXACTI:
3540     caseless = TRUE;
3541     case OP_EXACT:
3542     repeat = GET2(cc, 1);
3543     last = FALSE;
3544     cc += 1 + IMM2_SIZE;
3545     break;
3546 
3547     case OP_QUERYI:
3548     case OP_MINQUERYI:
3549     case OP_POSQUERYI:
3550     caseless = TRUE;
3551     case OP_QUERY:
3552     case OP_MINQUERY:
3553     case OP_POSQUERY:
3554     len = 1;
3555     cc++;
3556 #ifdef SUPPORT_UTF
3557     if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3558 #endif
3559     max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
3560     if (max_chars == 0)
3561       return consumed;
3562     last = FALSE;
3563     break;
3564 
3565     case OP_KET:
3566     cc += 1 + LINK_SIZE;
3567     continue;
3568 
3569     case OP_ALT:
3570     cc += GET(cc, 1);
3571     continue;
3572 
3573     case OP_ONCE:
3574     case OP_ONCE_NC:
3575     case OP_BRA:
3576     case OP_BRAPOS:
3577     case OP_CBRA:
3578     case OP_CBRAPOS:
3579     alternative = cc + GET(cc, 1);
3580     while (*alternative == OP_ALT)
3581       {
3582       max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
3583       if (max_chars == 0)
3584         return consumed;
3585       alternative += GET(alternative, 1);
3586       }
3587 
3588     if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3589       cc += IMM2_SIZE;
3590     cc += 1 + LINK_SIZE;
3591     continue;
3592 
3593     case OP_CLASS:
3594 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3595     if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
3596       return consumed;
3597 #endif
3598     class = TRUE;
3599     break;
3600 
3601     case OP_NCLASS:
3602 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3603     if (common->utf) return consumed;
3604 #endif
3605     class = TRUE;
3606     break;
3607 
3608 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3609     case OP_XCLASS:
3610 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3611     if (common->utf) return consumed;
3612 #endif
3613     any = TRUE;
3614     cc += GET(cc, 1);
3615     break;
3616 #endif
3617 
3618     case OP_DIGIT:
3619 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3620     if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
3621       return consumed;
3622 #endif
3623     any = TRUE;
3624     cc++;
3625     break;
3626 
3627     case OP_WHITESPACE:
3628 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3629     if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
3630       return consumed;
3631 #endif
3632     any = TRUE;
3633     cc++;
3634     break;
3635 
3636     case OP_WORDCHAR:
3637 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3638     if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
3639       return consumed;
3640 #endif
3641     any = TRUE;
3642     cc++;
3643     break;
3644 
3645     case OP_NOT:
3646     case OP_NOTI:
3647     cc++;
3648     /* Fall through. */
3649     case OP_NOT_DIGIT:
3650     case OP_NOT_WHITESPACE:
3651     case OP_NOT_WORDCHAR:
3652     case OP_ANY:
3653     case OP_ALLANY:
3654 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3655     if (common->utf) return consumed;
3656 #endif
3657     any = TRUE;
3658     cc++;
3659     break;
3660 
3661 #ifdef SUPPORT_UTF
3662     case OP_NOTPROP:
3663     case OP_PROP:
3664 #ifndef COMPILE_PCRE32
3665     if (common->utf) return consumed;
3666 #endif
3667     any = TRUE;
3668     cc += 1 + 2;
3669     break;
3670 #endif
3671 
3672     case OP_TYPEEXACT:
3673     repeat = GET2(cc, 1);
3674     cc += 1 + IMM2_SIZE;
3675     continue;
3676 
3677     case OP_NOTEXACT:
3678     case OP_NOTEXACTI:
3679 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3680     if (common->utf) return consumed;
3681 #endif
3682     any = TRUE;
3683     repeat = GET2(cc, 1);
3684     cc += 1 + IMM2_SIZE + 1;
3685     break;
3686 
3687     default:
3688     return consumed;
3689     }
3690 
3691   if (any)
3692     {
3693     do
3694       {
3695       chars[0] = 255;
3696 
3697       consumed++;
3698       if (--max_chars == 0)
3699         return consumed;
3700       chars += MAX_DIFF_CHARS;
3701       }
3702     while (--repeat > 0);
3703 
3704     repeat = 1;
3705     continue;
3706     }
3707 
3708   if (class)
3709     {
3710     bytes = (sljit_u8*) (cc + 1);
3711     cc += 1 + 32 / sizeof(pcre_uchar);
3712 
3713     switch (*cc)
3714       {
3715       case OP_CRSTAR:
3716       case OP_CRMINSTAR:
3717       case OP_CRPOSSTAR:
3718       case OP_CRQUERY:
3719       case OP_CRMINQUERY:
3720       case OP_CRPOSQUERY:
3721       max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
3722       if (max_chars == 0)
3723         return consumed;
3724       break;
3725 
3726       default:
3727       case OP_CRPLUS:
3728       case OP_CRMINPLUS:
3729       case OP_CRPOSPLUS:
3730       break;
3731 
3732       case OP_CRRANGE:
3733       case OP_CRMINRANGE:
3734       case OP_CRPOSRANGE:
3735       repeat = GET2(cc, 1);
3736       if (repeat <= 0)
3737         return consumed;
3738       break;
3739       }
3740 
3741     do
3742       {
3743       if (bytes[31] & 0x80)
3744         chars[0] = 255;
3745       else if (chars[0] != 255)
3746         {
3747         bytes_end = bytes + 32;
3748         chr = 0;
3749         do
3750           {
3751           byte = *bytes++;
3752           SLJIT_ASSERT((chr & 0x7) == 0);
3753           if (byte == 0)
3754             chr += 8;
3755           else
3756             {
3757             do
3758               {
3759               if ((byte & 0x1) != 0)
3760                 add_prefix_char(chr, chars);
3761               byte >>= 1;
3762               chr++;
3763               }
3764             while (byte != 0);
3765             chr = (chr + 7) & ~7;
3766             }
3767           }
3768         while (chars[0] != 255 && bytes < bytes_end);
3769         bytes = bytes_end - 32;
3770         }
3771 
3772       consumed++;
3773       if (--max_chars == 0)
3774         return consumed;
3775       chars += MAX_DIFF_CHARS;
3776       }
3777     while (--repeat > 0);
3778 
3779     switch (*cc)
3780       {
3781       case OP_CRSTAR:
3782       case OP_CRMINSTAR:
3783       case OP_CRPOSSTAR:
3784       return consumed;
3785 
3786       case OP_CRQUERY:
3787       case OP_CRMINQUERY:
3788       case OP_CRPOSQUERY:
3789       cc++;
3790       break;
3791 
3792       case OP_CRRANGE:
3793       case OP_CRMINRANGE:
3794       case OP_CRPOSRANGE:
3795       if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
3796         return consumed;
3797       cc += 1 + 2 * IMM2_SIZE;
3798       break;
3799       }
3800 
3801     repeat = 1;
3802     continue;
3803     }
3804 
3805   len = 1;
3806 #ifdef SUPPORT_UTF
3807   if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3808 #endif
3809 
3810   if (caseless && char_has_othercase(common, cc))
3811     {
3812 #ifdef SUPPORT_UTF
3813     if (common->utf)
3814       {
3815       GETCHAR(chr, cc);
3816       if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3817         return consumed;
3818       }
3819     else
3820 #endif
3821       {
3822       chr = *cc;
3823       othercase[0] = TABLE_GET(chr, common->fcc, chr);
3824       }
3825     }
3826   else
3827     {
3828     caseless = FALSE;
3829     othercase[0] = 0; /* Stops compiler warning - PH */
3830     }
3831 
3832   len_save = len;
3833   cc_save = cc;
3834   while (TRUE)
3835     {
3836     oc = othercase;
3837     do
3838       {
3839       chr = *cc;
3840       add_prefix_char(*cc, chars);
3841 
3842       if (caseless)
3843         add_prefix_char(*oc, chars);
3844 
3845       len--;
3846       consumed++;
3847       if (--max_chars == 0)
3848         return consumed;
3849       chars += MAX_DIFF_CHARS;
3850       cc++;
3851       oc++;
3852       }
3853     while (len > 0);
3854 
3855     if (--repeat == 0)
3856       break;
3857 
3858     len = len_save;
3859     cc = cc_save;
3860     }
3861 
3862   repeat = 1;
3863   if (last)
3864     return consumed;
3865   }
3866 }
3867 
3868 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
3869 
character_to_int32(pcre_uchar chr)3870 static sljit_s32 character_to_int32(pcre_uchar chr)
3871 {
3872 sljit_s32 value = (sljit_s32)chr;
3873 #if defined COMPILE_PCRE8
3874 #define SSE2_COMPARE_TYPE_INDEX 0
3875 return (value << 24) | (value << 16) | (value << 8) | value;
3876 #elif defined COMPILE_PCRE16
3877 #define SSE2_COMPARE_TYPE_INDEX 1
3878 return (value << 16) | value;
3879 #elif defined COMPILE_PCRE32
3880 #define SSE2_COMPARE_TYPE_INDEX 2
3881 return value;
3882 #else
3883 #error "Unsupported unit width"
3884 #endif
3885 }
3886 
fast_forward_first_char2_sse2(compiler_common * common,pcre_uchar char1,pcre_uchar char2)3887 static SLJIT_INLINE void fast_forward_first_char2_sse2(compiler_common *common, pcre_uchar char1, pcre_uchar char2)
3888 {
3889 DEFINE_COMPILER;
3890 struct sljit_label *start;
3891 struct sljit_jump *quit[3];
3892 struct sljit_jump *nomatch;
3893 sljit_u8 instruction[8];
3894 sljit_s32 tmp1_ind = sljit_get_register_index(TMP1);
3895 sljit_s32 tmp2_ind = sljit_get_register_index(TMP2);
3896 sljit_s32 str_ptr_ind = sljit_get_register_index(STR_PTR);
3897 BOOL load_twice = FALSE;
3898 pcre_uchar bit;
3899 
3900 bit = char1 ^ char2;
3901 if (!is_powerof2(bit))
3902   bit = 0;
3903 
3904 if ((char1 != char2) && bit == 0)
3905   load_twice = TRUE;
3906 
3907 quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3908 
3909 /* First part (unaligned start) */
3910 
3911 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
3912 
3913 SLJIT_ASSERT(tmp1_ind < 8 && tmp2_ind == 1);
3914 
3915 /* MOVD xmm, r/m32 */
3916 instruction[0] = 0x66;
3917 instruction[1] = 0x0f;
3918 instruction[2] = 0x6e;
3919 instruction[3] = 0xc0 | (2 << 3) | tmp1_ind;
3920 sljit_emit_op_custom(compiler, instruction, 4);
3921 
3922 if (char1 != char2)
3923   {
3924   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
3925 
3926   /* MOVD xmm, r/m32 */
3927   instruction[3] = 0xc0 | (3 << 3) | tmp1_ind;
3928   sljit_emit_op_custom(compiler, instruction, 4);
3929   }
3930 
3931 /* PSHUFD xmm1, xmm2/m128, imm8 */
3932 instruction[2] = 0x70;
3933 instruction[3] = 0xc0 | (2 << 3) | 2;
3934 instruction[4] = 0;
3935 sljit_emit_op_custom(compiler, instruction, 5);
3936 
3937 if (char1 != char2)
3938   {
3939   /* PSHUFD xmm1, xmm2/m128, imm8 */
3940   instruction[3] = 0xc0 | (3 << 3) | 3;
3941   instruction[4] = 0;
3942   sljit_emit_op_custom(compiler, instruction, 5);
3943   }
3944 
3945 OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 0xf);
3946 OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
3947 
3948 /* MOVDQA xmm1, xmm2/m128 */
3949 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3950 
3951 if (str_ptr_ind < 8)
3952   {
3953   instruction[2] = 0x6f;
3954   instruction[3] = (0 << 3) | str_ptr_ind;
3955   sljit_emit_op_custom(compiler, instruction, 4);
3956 
3957   if (load_twice)
3958     {
3959     instruction[3] = (1 << 3) | str_ptr_ind;
3960     sljit_emit_op_custom(compiler, instruction, 4);
3961     }
3962   }
3963 else
3964   {
3965   instruction[1] = 0x41;
3966   instruction[2] = 0x0f;
3967   instruction[3] = 0x6f;
3968   instruction[4] = (0 << 3) | (str_ptr_ind & 0x7);
3969   sljit_emit_op_custom(compiler, instruction, 5);
3970 
3971   if (load_twice)
3972     {
3973     instruction[4] = (1 << 3) | str_ptr_ind;
3974     sljit_emit_op_custom(compiler, instruction, 5);
3975     }
3976   instruction[1] = 0x0f;
3977   }
3978 
3979 #else
3980 
3981 instruction[2] = 0x6f;
3982 instruction[3] = (0 << 3) | str_ptr_ind;
3983 sljit_emit_op_custom(compiler, instruction, 4);
3984 
3985 if (load_twice)
3986   {
3987   instruction[3] = (1 << 3) | str_ptr_ind;
3988   sljit_emit_op_custom(compiler, instruction, 4);
3989   }
3990 
3991 #endif
3992 
3993 if (bit != 0)
3994   {
3995   /* POR xmm1, xmm2/m128 */
3996   instruction[2] = 0xeb;
3997   instruction[3] = 0xc0 | (0 << 3) | 3;
3998   sljit_emit_op_custom(compiler, instruction, 4);
3999   }
4000 
4001 /* PCMPEQB/W/D xmm1, xmm2/m128 */
4002 instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
4003 instruction[3] = 0xc0 | (0 << 3) | 2;
4004 sljit_emit_op_custom(compiler, instruction, 4);
4005 
4006 if (load_twice)
4007   {
4008   instruction[3] = 0xc0 | (1 << 3) | 3;
4009   sljit_emit_op_custom(compiler, instruction, 4);
4010   }
4011 
4012 /* PMOVMSKB reg, xmm */
4013 instruction[2] = 0xd7;
4014 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
4015 sljit_emit_op_custom(compiler, instruction, 4);
4016 
4017 if (load_twice)
4018   {
4019   OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0);
4020   instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
4021   sljit_emit_op_custom(compiler, instruction, 4);
4022 
4023   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4024   OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0);
4025   }
4026 
4027 OP2(SLJIT_ASHR, TMP1, 0, TMP1, 0, TMP2, 0);
4028 
4029 /* BSF r32, r/m32 */
4030 instruction[0] = 0x0f;
4031 instruction[1] = 0xbc;
4032 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
4033 sljit_emit_op_custom(compiler, instruction, 3);
4034 sljit_set_current_flags(compiler, SLJIT_SET_Z);
4035 
4036 nomatch = JUMP(SLJIT_ZERO);
4037 
4038 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4039 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4040 quit[1] = JUMP(SLJIT_JUMP);
4041 
4042 JUMPHERE(nomatch);
4043 
4044 start = LABEL();
4045 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
4046 quit[2] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4047 
4048 /* Second part (aligned) */
4049 
4050 instruction[0] = 0x66;
4051 instruction[1] = 0x0f;
4052 
4053 /* MOVDQA xmm1, xmm2/m128 */
4054 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4055 
4056 if (str_ptr_ind < 8)
4057   {
4058   instruction[2] = 0x6f;
4059   instruction[3] = (0 << 3) | str_ptr_ind;
4060   sljit_emit_op_custom(compiler, instruction, 4);
4061 
4062   if (load_twice)
4063     {
4064     instruction[3] = (1 << 3) | str_ptr_ind;
4065     sljit_emit_op_custom(compiler, instruction, 4);
4066     }
4067   }
4068 else
4069   {
4070   instruction[1] = 0x41;
4071   instruction[2] = 0x0f;
4072   instruction[3] = 0x6f;
4073   instruction[4] = (0 << 3) | (str_ptr_ind & 0x7);
4074   sljit_emit_op_custom(compiler, instruction, 5);
4075 
4076   if (load_twice)
4077     {
4078     instruction[4] = (1 << 3) | str_ptr_ind;
4079     sljit_emit_op_custom(compiler, instruction, 5);
4080     }
4081   instruction[1] = 0x0f;
4082   }
4083 
4084 #else
4085 
4086 instruction[2] = 0x6f;
4087 instruction[3] = (0 << 3) | str_ptr_ind;
4088 sljit_emit_op_custom(compiler, instruction, 4);
4089 
4090 if (load_twice)
4091   {
4092   instruction[3] = (1 << 3) | str_ptr_ind;
4093   sljit_emit_op_custom(compiler, instruction, 4);
4094   }
4095 
4096 #endif
4097 
4098 if (bit != 0)
4099   {
4100   /* POR xmm1, xmm2/m128 */
4101   instruction[2] = 0xeb;
4102   instruction[3] = 0xc0 | (0 << 3) | 3;
4103   sljit_emit_op_custom(compiler, instruction, 4);
4104   }
4105 
4106 /* PCMPEQB/W/D xmm1, xmm2/m128 */
4107 instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
4108 instruction[3] = 0xc0 | (0 << 3) | 2;
4109 sljit_emit_op_custom(compiler, instruction, 4);
4110 
4111 if (load_twice)
4112   {
4113   instruction[3] = 0xc0 | (1 << 3) | 3;
4114   sljit_emit_op_custom(compiler, instruction, 4);
4115   }
4116 
4117 /* PMOVMSKB reg, xmm */
4118 instruction[2] = 0xd7;
4119 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
4120 sljit_emit_op_custom(compiler, instruction, 4);
4121 
4122 if (load_twice)
4123   {
4124   instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
4125   sljit_emit_op_custom(compiler, instruction, 4);
4126 
4127   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4128   }
4129 
4130 /* BSF r32, r/m32 */
4131 instruction[0] = 0x0f;
4132 instruction[1] = 0xbc;
4133 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
4134 sljit_emit_op_custom(compiler, instruction, 3);
4135 sljit_set_current_flags(compiler, SLJIT_SET_Z);
4136 
4137 JUMPTO(SLJIT_ZERO, start);
4138 
4139 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4140 
4141 start = LABEL();
4142 SET_LABEL(quit[0], start);
4143 SET_LABEL(quit[1], start);
4144 SET_LABEL(quit[2], start);
4145 }
4146 
4147 #undef SSE2_COMPARE_TYPE_INDEX
4148 
4149 #endif
4150 
fast_forward_first_char2(compiler_common * common,pcre_uchar char1,pcre_uchar char2,sljit_s32 offset)4151 static void fast_forward_first_char2(compiler_common *common, pcre_uchar char1, pcre_uchar char2, sljit_s32 offset)
4152 {
4153 DEFINE_COMPILER;
4154 struct sljit_label *start;
4155 struct sljit_jump *quit;
4156 struct sljit_jump *found;
4157 pcre_uchar mask;
4158 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4159 struct sljit_label *utf_start = NULL;
4160 struct sljit_jump *utf_quit = NULL;
4161 #endif
4162 BOOL has_match_end = (common->match_end_ptr != 0);
4163 
4164 if (offset > 0)
4165   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4166 
4167 if (has_match_end)
4168   {
4169   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4170 
4171   OP2(SLJIT_ADD, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, SLJIT_IMM, IN_UCHARS(offset + 1));
4172   OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP3, 0);
4173   sljit_emit_cmov(compiler, SLJIT_GREATER, STR_END, TMP3, 0);
4174   }
4175 
4176 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4177 if (common->utf && offset > 0)
4178   utf_start = LABEL();
4179 #endif
4180 
4181 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
4182 
4183 /* SSE2 accelerated first character search. */
4184 
4185 if (sljit_has_cpu_feature(SLJIT_HAS_SSE2))
4186   {
4187   fast_forward_first_char2_sse2(common, char1, char2);
4188 
4189   SLJIT_ASSERT(common->mode == JIT_COMPILE || offset == 0);
4190   if (common->mode == JIT_COMPILE)
4191     {
4192     /* In complete mode, we don't need to run a match when STR_PTR == STR_END. */
4193     SLJIT_ASSERT(common->forced_quit_label == NULL);
4194     OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
4195     add_jump(compiler, &common->forced_quit, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4196 
4197 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4198     if (common->utf && offset > 0)
4199       {
4200       SLJIT_ASSERT(common->mode == JIT_COMPILE);
4201 
4202       OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
4203       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4204 #if defined COMPILE_PCRE8
4205       OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4206       CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
4207 #elif defined COMPILE_PCRE16
4208       OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4209       CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
4210 #else
4211 #error "Unknown code width"
4212 #endif
4213       OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4214       }
4215 #endif
4216 
4217     if (offset > 0)
4218       OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4219     }
4220   else
4221     {
4222     OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
4223     if (has_match_end)
4224       {
4225       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4226       sljit_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, TMP1, 0);
4227       }
4228     else
4229       sljit_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, STR_END, 0);
4230     }
4231 
4232   if (has_match_end)
4233     OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4234   return;
4235   }
4236 
4237 #endif
4238 
4239 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4240 
4241 start = LABEL();
4242 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4243 
4244 if (char1 == char2)
4245   found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
4246 else
4247   {
4248   mask = char1 ^ char2;
4249   if (is_powerof2(mask))
4250     {
4251     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
4252     found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask);
4253     }
4254   else
4255     {
4256     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char1);
4257     OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
4258     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char2);
4259     OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
4260     found = JUMP(SLJIT_NOT_ZERO);
4261     }
4262   }
4263 
4264 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4265 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, start);
4266 
4267 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4268 if (common->utf && offset > 0)
4269   utf_quit = JUMP(SLJIT_JUMP);
4270 #endif
4271 
4272 JUMPHERE(found);
4273 
4274 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4275 if (common->utf && offset > 0)
4276   {
4277   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
4278   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4279 #if defined COMPILE_PCRE8
4280   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4281   CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
4282 #elif defined COMPILE_PCRE16
4283   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4284   CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
4285 #else
4286 #error "Unknown code width"
4287 #endif
4288   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4289   JUMPHERE(utf_quit);
4290   }
4291 #endif
4292 
4293 JUMPHERE(quit);
4294 
4295 if (has_match_end)
4296   {
4297   quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
4298   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4299   if (offset > 0)
4300     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4301   JUMPHERE(quit);
4302   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4303   }
4304 
4305 if (offset > 0)
4306   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4307 }
4308 
fast_forward_first_n_chars(compiler_common * common)4309 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
4310 {
4311 DEFINE_COMPILER;
4312 struct sljit_label *start;
4313 struct sljit_jump *quit;
4314 struct sljit_jump *match;
4315 /* bytes[0] represent the number of characters between 0
4316 and MAX_N_BYTES - 1, 255 represents any character. */
4317 pcre_uchar chars[MAX_N_CHARS * MAX_DIFF_CHARS];
4318 sljit_s32 offset;
4319 pcre_uchar mask;
4320 pcre_uchar *char_set, *char_set_end;
4321 int i, max, from;
4322 int range_right = -1, range_len;
4323 sljit_u8 *update_table = NULL;
4324 BOOL in_range;
4325 sljit_u32 rec_count;
4326 
4327 for (i = 0; i < MAX_N_CHARS; i++)
4328   chars[i * MAX_DIFF_CHARS] = 0;
4329 
4330 rec_count = 10000;
4331 max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
4332 
4333 if (max < 1)
4334   return FALSE;
4335 
4336 in_range = FALSE;
4337 /* Prevent compiler "uninitialized" warning */
4338 from = 0;
4339 range_len = 4 /* minimum length */ - 1;
4340 for (i = 0; i <= max; i++)
4341   {
4342   if (in_range && (i - from) > range_len && (chars[(i - 1) * MAX_DIFF_CHARS] < 255))
4343     {
4344     range_len = i - from;
4345     range_right = i - 1;
4346     }
4347 
4348   if (i < max && chars[i * MAX_DIFF_CHARS] < 255)
4349     {
4350     SLJIT_ASSERT(chars[i * MAX_DIFF_CHARS] > 0);
4351     if (!in_range)
4352       {
4353       in_range = TRUE;
4354       from = i;
4355       }
4356     }
4357   else
4358     in_range = FALSE;
4359   }
4360 
4361 if (range_right >= 0)
4362   {
4363   update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
4364   if (update_table == NULL)
4365     return TRUE;
4366   memset(update_table, IN_UCHARS(range_len), 256);
4367 
4368   for (i = 0; i < range_len; i++)
4369     {
4370     char_set = chars + ((range_right - i) * MAX_DIFF_CHARS);
4371     SLJIT_ASSERT(char_set[0] > 0 && char_set[0] < 255);
4372     char_set_end = char_set + char_set[0];
4373     char_set++;
4374     while (char_set <= char_set_end)
4375       {
4376       if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
4377         update_table[(*char_set) & 0xff] = IN_UCHARS(i);
4378       char_set++;
4379       }
4380     }
4381   }
4382 
4383 offset = -1;
4384 /* Scan forward. */
4385 for (i = 0; i < max; i++)
4386   {
4387   if (offset == -1)
4388     {
4389     if (chars[i * MAX_DIFF_CHARS] <= 2)
4390       offset = i;
4391     }
4392   else if (chars[offset * MAX_DIFF_CHARS] == 2 && chars[i * MAX_DIFF_CHARS] <= 2)
4393     {
4394     if (chars[i * MAX_DIFF_CHARS] == 1)
4395       offset = i;
4396     else
4397       {
4398       mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2];
4399       if (!is_powerof2(mask))
4400         {
4401         mask = chars[i * MAX_DIFF_CHARS + 1] ^ chars[i * MAX_DIFF_CHARS + 2];
4402         if (is_powerof2(mask))
4403           offset = i;
4404         }
4405       }
4406     }
4407   }
4408 
4409 if (range_right < 0)
4410   {
4411   if (offset < 0)
4412     return FALSE;
4413   SLJIT_ASSERT(chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2);
4414   /* Works regardless the value is 1 or 2. */
4415   mask = chars[offset * MAX_DIFF_CHARS + chars[offset * MAX_DIFF_CHARS]];
4416   fast_forward_first_char2(common, chars[offset * MAX_DIFF_CHARS + 1], mask, offset);
4417   return TRUE;
4418   }
4419 
4420 if (range_right == offset)
4421   offset = -1;
4422 
4423 SLJIT_ASSERT(offset == -1 || (chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2));
4424 
4425 max -= 1;
4426 SLJIT_ASSERT(max > 0);
4427 if (common->match_end_ptr != 0)
4428   {
4429   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4430   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4431   OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4432   quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0);
4433   OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
4434   JUMPHERE(quit);
4435   }
4436 else
4437   OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4438 
4439 SLJIT_ASSERT(range_right >= 0);
4440 
4441 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4442 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
4443 #endif
4444 
4445 start = LABEL();
4446 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4447 
4448 #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
4449 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
4450 #else
4451 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
4452 #endif
4453 
4454 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4455 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
4456 #else
4457 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
4458 #endif
4459 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4460 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
4461 
4462 if (offset >= 0)
4463   {
4464   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
4465   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4466 
4467   if (chars[offset * MAX_DIFF_CHARS] == 1)
4468     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1], start);
4469   else
4470     {
4471     mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2];
4472     if (is_powerof2(mask))
4473       {
4474       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
4475       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1] | mask, start);
4476       }
4477     else
4478       {
4479       match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1]);
4480       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 2], start);
4481       JUMPHERE(match);
4482       }
4483     }
4484   }
4485 
4486 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4487 if (common->utf && offset != 0)
4488   {
4489   if (offset < 0)
4490     {
4491     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4492     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4493     }
4494   else
4495     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4496 #if defined COMPILE_PCRE8
4497   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4498   CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, start);
4499 #elif defined COMPILE_PCRE16
4500   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4501   CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, start);
4502 #else
4503 #error "Unknown code width"
4504 #endif
4505   if (offset < 0)
4506     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4507   }
4508 #endif
4509 
4510 if (offset >= 0)
4511   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4512 
4513 JUMPHERE(quit);
4514 
4515 if (common->match_end_ptr != 0)
4516   {
4517   if (range_right >= 0)
4518     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4519   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4520   if (range_right >= 0)
4521     {
4522     quit = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4523     OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
4524     JUMPHERE(quit);
4525     }
4526   }
4527 else
4528   OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4529 return TRUE;
4530 }
4531 
4532 #undef MAX_N_CHARS
4533 #undef MAX_DIFF_CHARS
4534 
fast_forward_first_char(compiler_common * common,pcre_uchar first_char,BOOL caseless)4535 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless)
4536 {
4537 pcre_uchar oc;
4538 
4539 oc = first_char;
4540 if (caseless)
4541   {
4542   oc = TABLE_GET(first_char, common->fcc, first_char);
4543 #if defined SUPPORT_UCP && !defined COMPILE_PCRE8
4544   if (first_char > 127 && common->utf)
4545     oc = UCD_OTHERCASE(first_char);
4546 #endif
4547   }
4548 
4549 fast_forward_first_char2(common, first_char, oc, 0);
4550 }
4551 
fast_forward_newline(compiler_common * common)4552 static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
4553 {
4554 DEFINE_COMPILER;
4555 struct sljit_label *loop;
4556 struct sljit_jump *lastchar;
4557 struct sljit_jump *firstchar;
4558 struct sljit_jump *quit;
4559 struct sljit_jump *foundcr = NULL;
4560 struct sljit_jump *notfoundnl;
4561 jump_list *newline = NULL;
4562 
4563 if (common->match_end_ptr != 0)
4564   {
4565   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4566   OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4567   }
4568 
4569 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4570   {
4571   lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4572   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4573   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4574   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4575   firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
4576 
4577   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
4578   OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
4579   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
4580 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4581   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
4582 #endif
4583   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4584 
4585   loop = LABEL();
4586   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4587   quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4588   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4589   OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4590   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
4591   CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
4592 
4593   JUMPHERE(quit);
4594   JUMPHERE(firstchar);
4595   JUMPHERE(lastchar);
4596 
4597   if (common->match_end_ptr != 0)
4598     OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4599   return;
4600   }
4601 
4602 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4603 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4604 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
4605 skip_char_back(common);
4606 
4607 loop = LABEL();
4608 common->ff_newline_shortcut = loop;
4609 
4610 read_char_range(common, common->nlmin, common->nlmax, TRUE);
4611 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4612 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
4613   foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4614 check_newlinechar(common, common->nltype, &newline, FALSE);
4615 set_jumps(newline, loop);
4616 
4617 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
4618   {
4619   quit = JUMP(SLJIT_JUMP);
4620   JUMPHERE(foundcr);
4621   notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4622   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4623   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
4624   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4625 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4626   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4627 #endif
4628   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4629   JUMPHERE(notfoundnl);
4630   JUMPHERE(quit);
4631   }
4632 JUMPHERE(lastchar);
4633 JUMPHERE(firstchar);
4634 
4635 if (common->match_end_ptr != 0)
4636   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4637 }
4638 
4639 static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
4640 
fast_forward_start_bits(compiler_common * common,const sljit_u8 * start_bits)4641 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, const sljit_u8 *start_bits)
4642 {
4643 DEFINE_COMPILER;
4644 struct sljit_label *start;
4645 struct sljit_jump *quit;
4646 struct sljit_jump *found = NULL;
4647 jump_list *matches = NULL;
4648 #ifndef COMPILE_PCRE8
4649 struct sljit_jump *jump;
4650 #endif
4651 
4652 if (common->match_end_ptr != 0)
4653   {
4654   OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
4655   OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4656   }
4657 
4658 start = LABEL();
4659 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4660 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4661 #ifdef SUPPORT_UTF
4662 if (common->utf)
4663   OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4664 #endif
4665 
4666 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
4667   {
4668 #ifndef COMPILE_PCRE8
4669   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 255);
4670   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
4671   JUMPHERE(jump);
4672 #endif
4673   OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4674   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4675   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
4676   OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4677   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4678   found = JUMP(SLJIT_NOT_ZERO);
4679   }
4680 
4681 #ifdef SUPPORT_UTF
4682 if (common->utf)
4683   OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4684 #endif
4685 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4686 #ifdef SUPPORT_UTF
4687 #if defined COMPILE_PCRE8
4688 if (common->utf)
4689   {
4690   CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
4691   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4692   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4693   }
4694 #elif defined COMPILE_PCRE16
4695 if (common->utf)
4696   {
4697   CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
4698   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4699   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4700   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4701   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4702   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4703   }
4704 #endif /* COMPILE_PCRE[8|16] */
4705 #endif /* SUPPORT_UTF */
4706 JUMPTO(SLJIT_JUMP, start);
4707 if (found != NULL)
4708   JUMPHERE(found);
4709 if (matches != NULL)
4710   set_jumps(matches, LABEL());
4711 JUMPHERE(quit);
4712 
4713 if (common->match_end_ptr != 0)
4714   OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
4715 }
4716 
search_requested_char(compiler_common * common,pcre_uchar req_char,BOOL caseless,BOOL has_firstchar)4717 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
4718 {
4719 DEFINE_COMPILER;
4720 struct sljit_label *loop;
4721 struct sljit_jump *toolong;
4722 struct sljit_jump *alreadyfound;
4723 struct sljit_jump *found;
4724 struct sljit_jump *foundoc = NULL;
4725 struct sljit_jump *notfound;
4726 sljit_u32 oc, bit;
4727 
4728 SLJIT_ASSERT(common->req_char_ptr != 0);
4729 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
4730 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
4731 toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
4732 alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4733 
4734 if (has_firstchar)
4735   OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4736 else
4737   OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
4738 
4739 loop = LABEL();
4740 notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
4741 
4742 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4743 oc = req_char;
4744 if (caseless)
4745   {
4746   oc = TABLE_GET(req_char, common->fcc, req_char);
4747 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
4748   if (req_char > 127 && common->utf)
4749     oc = UCD_OTHERCASE(req_char);
4750 #endif
4751   }
4752 if (req_char == oc)
4753   found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4754 else
4755   {
4756   bit = req_char ^ oc;
4757   if (is_powerof2(bit))
4758     {
4759     OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
4760     found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
4761     }
4762   else
4763     {
4764     found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4765     foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
4766     }
4767   }
4768 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4769 JUMPTO(SLJIT_JUMP, loop);
4770 
4771 JUMPHERE(found);
4772 if (foundoc)
4773   JUMPHERE(foundoc);
4774 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
4775 JUMPHERE(alreadyfound);
4776 JUMPHERE(toolong);
4777 return notfound;
4778 }
4779 
do_revertframes(compiler_common * common)4780 static void do_revertframes(compiler_common *common)
4781 {
4782 DEFINE_COMPILER;
4783 struct sljit_jump *jump;
4784 struct sljit_label *mainloop;
4785 
4786 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4787 OP1(SLJIT_MOV, TMP3, 0, STACK_TOP, 0);
4788 GET_LOCAL_BASE(TMP1, 0, 0);
4789 
4790 /* Drop frames until we reach STACK_TOP. */
4791 mainloop = LABEL();
4792 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -sizeof(sljit_sw));
4793 jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0);
4794 
4795 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4796 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -2 * sizeof(sljit_sw));
4797 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -3 * sizeof(sljit_sw));
4798 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
4799 JUMPTO(SLJIT_JUMP, mainloop);
4800 
4801 JUMPHERE(jump);
4802 jump = CMP(SLJIT_NOT_ZERO /* SIG_LESS */, TMP2, 0, SLJIT_IMM, 0);
4803 /* End of reverting values. */
4804 OP1(SLJIT_MOV, STACK_TOP, 0, TMP3, 0);
4805 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4806 
4807 JUMPHERE(jump);
4808 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
4809 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4810 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -2 * sizeof(sljit_sw));
4811 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
4812 JUMPTO(SLJIT_JUMP, mainloop);
4813 }
4814 
check_wordboundary(compiler_common * common)4815 static void check_wordboundary(compiler_common *common)
4816 {
4817 DEFINE_COMPILER;
4818 struct sljit_jump *skipread;
4819 jump_list *skipread_list = NULL;
4820 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
4821 struct sljit_jump *jump;
4822 #endif
4823 
4824 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
4825 
4826 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4827 /* Get type of the previous char, and put it to LOCALS1. */
4828 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4829 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4830 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0);
4831 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4832 skip_char_back(common);
4833 check_start_used_ptr(common);
4834 read_char(common);
4835 
4836 /* Testing char type. */
4837 #ifdef SUPPORT_UCP
4838 if (common->use_ucp)
4839   {
4840   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4841   jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4842   add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4843   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4844   OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4845   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
4846   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4847   OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4848   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
4849   JUMPHERE(jump);
4850   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
4851   }
4852 else
4853 #endif
4854   {
4855 #ifndef COMPILE_PCRE8
4856   jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4857 #elif defined SUPPORT_UTF
4858   /* Here LOCALS1 has already been zeroed. */
4859   jump = NULL;
4860   if (common->utf)
4861     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4862 #endif /* COMPILE_PCRE8 */
4863   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
4864   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
4865   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4866   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
4867 #ifndef COMPILE_PCRE8
4868   JUMPHERE(jump);
4869 #elif defined SUPPORT_UTF
4870   if (jump != NULL)
4871     JUMPHERE(jump);
4872 #endif /* COMPILE_PCRE8 */
4873   }
4874 JUMPHERE(skipread);
4875 
4876 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4877 check_str_end(common, &skipread_list);
4878 peek_char(common, READ_CHAR_MAX);
4879 
4880 /* Testing char type. This is a code duplication. */
4881 #ifdef SUPPORT_UCP
4882 if (common->use_ucp)
4883   {
4884   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4885   jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4886   add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4887   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4888   OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4889   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
4890   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4891   OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4892   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
4893   JUMPHERE(jump);
4894   }
4895 else
4896 #endif
4897   {
4898 #ifndef COMPILE_PCRE8
4899   /* TMP2 may be destroyed by peek_char. */
4900   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4901   jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4902 #elif defined SUPPORT_UTF
4903   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4904   jump = NULL;
4905   if (common->utf)
4906     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4907 #endif
4908   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
4909   OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
4910   OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4911 #ifndef COMPILE_PCRE8
4912   JUMPHERE(jump);
4913 #elif defined SUPPORT_UTF
4914   if (jump != NULL)
4915     JUMPHERE(jump);
4916 #endif /* COMPILE_PCRE8 */
4917   }
4918 set_jumps(skipread_list, LABEL());
4919 
4920 OP2(SLJIT_XOR | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4921 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4922 }
4923 
check_class_ranges(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)4924 static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
4925 {
4926 /* May destroy TMP1. */
4927 DEFINE_COMPILER;
4928 int ranges[MAX_RANGE_SIZE];
4929 sljit_u8 bit, cbit, all;
4930 int i, byte, length = 0;
4931 
4932 bit = bits[0] & 0x1;
4933 /* All bits will be zero or one (since bit is zero or one). */
4934 all = -bit;
4935 
4936 for (i = 0; i < 256; )
4937   {
4938   byte = i >> 3;
4939   if ((i & 0x7) == 0 && bits[byte] == all)
4940     i += 8;
4941   else
4942     {
4943     cbit = (bits[byte] >> (i & 0x7)) & 0x1;
4944     if (cbit != bit)
4945       {
4946       if (length >= MAX_RANGE_SIZE)
4947         return FALSE;
4948       ranges[length] = i;
4949       length++;
4950       bit = cbit;
4951       all = -cbit;
4952       }
4953     i++;
4954     }
4955   }
4956 
4957 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
4958   {
4959   if (length >= MAX_RANGE_SIZE)
4960     return FALSE;
4961   ranges[length] = 256;
4962   length++;
4963   }
4964 
4965 if (length < 0 || length > 4)
4966   return FALSE;
4967 
4968 bit = bits[0] & 0x1;
4969 if (invert) bit ^= 0x1;
4970 
4971 /* No character is accepted. */
4972 if (length == 0 && bit == 0)
4973   add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4974 
4975 switch(length)
4976   {
4977   case 0:
4978   /* When bit != 0, all characters are accepted. */
4979   return TRUE;
4980 
4981   case 1:
4982   add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4983   return TRUE;
4984 
4985   case 2:
4986   if (ranges[0] + 1 != ranges[1])
4987     {
4988     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4989     add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4990     }
4991   else
4992     add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4993   return TRUE;
4994 
4995   case 3:
4996   if (bit != 0)
4997     {
4998     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4999     if (ranges[0] + 1 != ranges[1])
5000       {
5001       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5002       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5003       }
5004     else
5005       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5006     return TRUE;
5007     }
5008 
5009   add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
5010   if (ranges[1] + 1 != ranges[2])
5011     {
5012     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
5013     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
5014     }
5015   else
5016     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
5017   return TRUE;
5018 
5019   case 4:
5020   if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
5021       && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
5022       && (ranges[1] & (ranges[2] - ranges[0])) == 0
5023       && is_powerof2(ranges[2] - ranges[0]))
5024     {
5025     SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
5026     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
5027     if (ranges[2] + 1 != ranges[3])
5028       {
5029       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
5030       add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
5031       }
5032     else
5033       add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
5034     return TRUE;
5035     }
5036 
5037   if (bit != 0)
5038     {
5039     i = 0;
5040     if (ranges[0] + 1 != ranges[1])
5041       {
5042       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5043       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5044       i = ranges[0];
5045       }
5046     else
5047       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5048 
5049     if (ranges[2] + 1 != ranges[3])
5050       {
5051       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
5052       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
5053       }
5054     else
5055       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
5056     return TRUE;
5057     }
5058 
5059   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5060   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
5061   if (ranges[1] + 1 != ranges[2])
5062     {
5063     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
5064     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
5065     }
5066   else
5067     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5068   return TRUE;
5069 
5070   default:
5071   SLJIT_UNREACHABLE();
5072   return FALSE;
5073   }
5074 }
5075 
check_anynewline(compiler_common * common)5076 static void check_anynewline(compiler_common *common)
5077 {
5078 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5079 DEFINE_COMPILER;
5080 
5081 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5082 
5083 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
5084 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
5085 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
5086 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
5087 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5088 #ifdef COMPILE_PCRE8
5089 if (common->utf)
5090   {
5091 #endif
5092   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5093   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
5094   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
5095 #ifdef COMPILE_PCRE8
5096   }
5097 #endif
5098 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5099 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5100 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5101 }
5102 
check_hspace(compiler_common * common)5103 static void check_hspace(compiler_common *common)
5104 {
5105 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5106 DEFINE_COMPILER;
5107 
5108 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5109 
5110 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
5111 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
5112 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
5113 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5114 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
5115 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5116 #ifdef COMPILE_PCRE8
5117 if (common->utf)
5118   {
5119 #endif
5120   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5121   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
5122   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5123   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
5124   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5125   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
5126   OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
5127   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
5128   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
5129   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5130   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
5131   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5132   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
5133 #ifdef COMPILE_PCRE8
5134   }
5135 #endif
5136 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5137 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5138 
5139 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5140 }
5141 
check_vspace(compiler_common * common)5142 static void check_vspace(compiler_common *common)
5143 {
5144 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5145 DEFINE_COMPILER;
5146 
5147 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5148 
5149 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
5150 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
5151 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
5152 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
5153 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5154 #ifdef COMPILE_PCRE8
5155 if (common->utf)
5156   {
5157 #endif
5158   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5159   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
5160   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
5161 #ifdef COMPILE_PCRE8
5162   }
5163 #endif
5164 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5165 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5166 
5167 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5168 }
5169 
5170 #define CHAR1 STR_END
5171 #define CHAR2 STACK_TOP
5172 
do_casefulcmp(compiler_common * common)5173 static void do_casefulcmp(compiler_common *common)
5174 {
5175 DEFINE_COMPILER;
5176 struct sljit_jump *jump;
5177 struct sljit_label *label;
5178 
5179 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5180 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5181 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
5182 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR2, 0);
5183 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5184 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5185 
5186 label = LABEL();
5187 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5188 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5189 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
5190 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5191 JUMPTO(SLJIT_NOT_ZERO, label);
5192 
5193 JUMPHERE(jump);
5194 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5195 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
5196 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5197 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5198 }
5199 
5200 #define LCC_TABLE STACK_LIMIT
5201 
do_caselesscmp(compiler_common * common)5202 static void do_caselesscmp(compiler_common *common)
5203 {
5204 DEFINE_COMPILER;
5205 struct sljit_jump *jump;
5206 struct sljit_label *label;
5207 
5208 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5209 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5210 
5211 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
5212 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR1, 0);
5213 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, CHAR2, 0);
5214 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
5215 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5216 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5217 
5218 label = LABEL();
5219 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5220 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5221 #ifndef COMPILE_PCRE8
5222 jump = CMP(SLJIT_GREATER, CHAR1, 0, SLJIT_IMM, 255);
5223 #endif
5224 OP1(SLJIT_MOV_U8, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
5225 #ifndef COMPILE_PCRE8
5226 JUMPHERE(jump);
5227 jump = CMP(SLJIT_GREATER, CHAR2, 0, SLJIT_IMM, 255);
5228 #endif
5229 OP1(SLJIT_MOV_U8, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
5230 #ifndef COMPILE_PCRE8
5231 JUMPHERE(jump);
5232 #endif
5233 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
5234 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5235 JUMPTO(SLJIT_NOT_ZERO, label);
5236 
5237 JUMPHERE(jump);
5238 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5239 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
5240 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5241 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
5242 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5243 }
5244 
5245 #undef LCC_TABLE
5246 #undef CHAR1
5247 #undef CHAR2
5248 
5249 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5250 
do_utf_caselesscmp(pcre_uchar * src1,jit_arguments * args,pcre_uchar * end1)5251 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
5252 {
5253 /* This function would be ineffective to do in JIT level. */
5254 sljit_u32 c1, c2;
5255 const pcre_uchar *src2 = args->uchar_ptr;
5256 const pcre_uchar *end2 = args->end;
5257 const ucd_record *ur;
5258 const sljit_u32 *pp;
5259 
5260 while (src1 < end1)
5261   {
5262   if (src2 >= end2)
5263     return (pcre_uchar*)1;
5264   GETCHARINC(c1, src1);
5265   GETCHARINC(c2, src2);
5266   ur = GET_UCD(c2);
5267   if (c1 != c2 && c1 != c2 + ur->other_case)
5268     {
5269     pp = PRIV(ucd_caseless_sets) + ur->caseset;
5270     for (;;)
5271       {
5272       if (c1 < *pp) return NULL;
5273       if (c1 == *pp++) break;
5274       }
5275     }
5276   }
5277 return src2;
5278 }
5279 
5280 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5281 
byte_sequence_compare(compiler_common * common,BOOL caseless,pcre_uchar * cc,compare_context * context,jump_list ** backtracks)5282 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
5283     compare_context *context, jump_list **backtracks)
5284 {
5285 DEFINE_COMPILER;
5286 unsigned int othercasebit = 0;
5287 pcre_uchar *othercasechar = NULL;
5288 #ifdef SUPPORT_UTF
5289 int utflength;
5290 #endif
5291 
5292 if (caseless && char_has_othercase(common, cc))
5293   {
5294   othercasebit = char_get_othercase_bit(common, cc);
5295   SLJIT_ASSERT(othercasebit);
5296   /* Extracting bit difference info. */
5297 #if defined COMPILE_PCRE8
5298   othercasechar = cc + (othercasebit >> 8);
5299   othercasebit &= 0xff;
5300 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5301   /* Note that this code only handles characters in the BMP. If there
5302   ever are characters outside the BMP whose othercase differs in only one
5303   bit from itself (there currently are none), this code will need to be
5304   revised for COMPILE_PCRE32. */
5305   othercasechar = cc + (othercasebit >> 9);
5306   if ((othercasebit & 0x100) != 0)
5307     othercasebit = (othercasebit & 0xff) << 8;
5308   else
5309     othercasebit &= 0xff;
5310 #endif /* COMPILE_PCRE[8|16|32] */
5311   }
5312 
5313 if (context->sourcereg == -1)
5314   {
5315 #if defined COMPILE_PCRE8
5316 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5317   if (context->length >= 4)
5318     OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5319   else if (context->length >= 2)
5320     OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5321   else
5322 #endif
5323     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5324 #elif defined COMPILE_PCRE16
5325 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5326   if (context->length >= 4)
5327     OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5328   else
5329 #endif
5330     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5331 #elif defined COMPILE_PCRE32
5332   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5333 #endif /* COMPILE_PCRE[8|16|32] */
5334   context->sourcereg = TMP2;
5335   }
5336 
5337 #ifdef SUPPORT_UTF
5338 utflength = 1;
5339 if (common->utf && HAS_EXTRALEN(*cc))
5340   utflength += GET_EXTRALEN(*cc);
5341 
5342 do
5343   {
5344 #endif
5345 
5346   context->length -= IN_UCHARS(1);
5347 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
5348 
5349   /* Unaligned read is supported. */
5350   if (othercasebit != 0 && othercasechar == cc)
5351     {
5352     context->c.asuchars[context->ucharptr] = *cc | othercasebit;
5353     context->oc.asuchars[context->ucharptr] = othercasebit;
5354     }
5355   else
5356     {
5357     context->c.asuchars[context->ucharptr] = *cc;
5358     context->oc.asuchars[context->ucharptr] = 0;
5359     }
5360   context->ucharptr++;
5361 
5362 #if defined COMPILE_PCRE8
5363   if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
5364 #else
5365   if (context->ucharptr >= 2 || context->length == 0)
5366 #endif
5367     {
5368     if (context->length >= 4)
5369       OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5370     else if (context->length >= 2)
5371       OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5372 #if defined COMPILE_PCRE8
5373     else if (context->length >= 1)
5374       OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5375 #endif /* COMPILE_PCRE8 */
5376     context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
5377 
5378     switch(context->ucharptr)
5379       {
5380       case 4 / sizeof(pcre_uchar):
5381       if (context->oc.asint != 0)
5382         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
5383       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
5384       break;
5385 
5386       case 2 / sizeof(pcre_uchar):
5387       if (context->oc.asushort != 0)
5388         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
5389       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
5390       break;
5391 
5392 #ifdef COMPILE_PCRE8
5393       case 1:
5394       if (context->oc.asbyte != 0)
5395         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
5396       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
5397       break;
5398 #endif
5399 
5400       default:
5401       SLJIT_UNREACHABLE();
5402       break;
5403       }
5404     context->ucharptr = 0;
5405     }
5406 
5407 #else
5408 
5409   /* Unaligned read is unsupported or in 32 bit mode. */
5410   if (context->length >= 1)
5411     OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5412 
5413   context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
5414 
5415   if (othercasebit != 0 && othercasechar == cc)
5416     {
5417     OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
5418     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
5419     }
5420   else
5421     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
5422 
5423 #endif
5424 
5425   cc++;
5426 #ifdef SUPPORT_UTF
5427   utflength--;
5428   }
5429 while (utflength > 0);
5430 #endif
5431 
5432 return cc;
5433 }
5434 
5435 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5436 
5437 #define SET_TYPE_OFFSET(value) \
5438   if ((value) != typeoffset) \
5439     { \
5440     if ((value) < typeoffset) \
5441       OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
5442     else \
5443       OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
5444     } \
5445   typeoffset = (value);
5446 
5447 #define SET_CHAR_OFFSET(value) \
5448   if ((value) != charoffset) \
5449     { \
5450     if ((value) < charoffset) \
5451       OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
5452     else \
5453       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
5454     } \
5455   charoffset = (value);
5456 
5457 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr);
5458 
compile_xclass_matchingpath(compiler_common * common,pcre_uchar * cc,jump_list ** backtracks)5459 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5460 {
5461 DEFINE_COMPILER;
5462 jump_list *found = NULL;
5463 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
5464 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
5465 struct sljit_jump *jump = NULL;
5466 pcre_uchar *ccbegin;
5467 int compares, invertcmp, numberofcmps;
5468 #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
5469 BOOL utf = common->utf;
5470 #endif
5471 
5472 #ifdef SUPPORT_UCP
5473 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
5474 BOOL charsaved = FALSE;
5475 int typereg = TMP1;
5476 const sljit_u32 *other_cases;
5477 sljit_uw typeoffset;
5478 #endif
5479 
5480 /* Scanning the necessary info. */
5481 cc++;
5482 ccbegin = cc;
5483 compares = 0;
5484 if (cc[-1] & XCL_MAP)
5485   {
5486   min = 0;
5487   cc += 32 / sizeof(pcre_uchar);
5488   }
5489 
5490 while (*cc != XCL_END)
5491   {
5492   compares++;
5493   if (*cc == XCL_SINGLE)
5494     {
5495     cc ++;
5496     GETCHARINCTEST(c, cc);
5497     if (c > max) max = c;
5498     if (c < min) min = c;
5499 #ifdef SUPPORT_UCP
5500     needschar = TRUE;
5501 #endif
5502     }
5503   else if (*cc == XCL_RANGE)
5504     {
5505     cc ++;
5506     GETCHARINCTEST(c, cc);
5507     if (c < min) min = c;
5508     GETCHARINCTEST(c, cc);
5509     if (c > max) max = c;
5510 #ifdef SUPPORT_UCP
5511     needschar = TRUE;
5512 #endif
5513     }
5514 #ifdef SUPPORT_UCP
5515   else
5516     {
5517     SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5518     cc++;
5519     if (*cc == PT_CLIST)
5520       {
5521       other_cases = PRIV(ucd_caseless_sets) + cc[1];
5522       while (*other_cases != NOTACHAR)
5523         {
5524         if (*other_cases > max) max = *other_cases;
5525         if (*other_cases < min) min = *other_cases;
5526         other_cases++;
5527         }
5528       }
5529     else
5530       {
5531       max = READ_CHAR_MAX;
5532       min = 0;
5533       }
5534 
5535     switch(*cc)
5536       {
5537       case PT_ANY:
5538       /* Any either accepts everything or ignored. */
5539       if (cc[-1] == XCL_PROP)
5540         {
5541         compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
5542         if (list == backtracks)
5543           add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5544         return;
5545         }
5546       break;
5547 
5548       case PT_LAMP:
5549       case PT_GC:
5550       case PT_PC:
5551       case PT_ALNUM:
5552       needstype = TRUE;
5553       break;
5554 
5555       case PT_SC:
5556       needsscript = TRUE;
5557       break;
5558 
5559       case PT_SPACE:
5560       case PT_PXSPACE:
5561       case PT_WORD:
5562       case PT_PXGRAPH:
5563       case PT_PXPRINT:
5564       case PT_PXPUNCT:
5565       needstype = TRUE;
5566       needschar = TRUE;
5567       break;
5568 
5569       case PT_CLIST:
5570       case PT_UCNC:
5571       needschar = TRUE;
5572       break;
5573 
5574       default:
5575       SLJIT_UNREACHABLE();
5576       break;
5577       }
5578     cc += 2;
5579     }
5580 #endif
5581   }
5582 SLJIT_ASSERT(compares > 0);
5583 
5584 /* We are not necessary in utf mode even in 8 bit mode. */
5585 cc = ccbegin;
5586 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
5587 
5588 if ((cc[-1] & XCL_HASPROP) == 0)
5589   {
5590   if ((cc[-1] & XCL_MAP) != 0)
5591     {
5592     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5593     if (!check_class_ranges(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
5594       {
5595       OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5596       OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5597       OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5598       OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5599       OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5600       add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
5601       }
5602 
5603     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5604     JUMPHERE(jump);
5605 
5606     cc += 32 / sizeof(pcre_uchar);
5607     }
5608   else
5609     {
5610     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
5611     add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
5612     }
5613   }
5614 else if ((cc[-1] & XCL_MAP) != 0)
5615   {
5616   OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
5617 #ifdef SUPPORT_UCP
5618   charsaved = TRUE;
5619 #endif
5620   if (!check_class_ranges(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
5621     {
5622 #ifdef COMPILE_PCRE8
5623     jump = NULL;
5624     if (common->utf)
5625 #endif
5626       jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5627 
5628     OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5629     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5630     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5631     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5632     OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5633     add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
5634 
5635 #ifdef COMPILE_PCRE8
5636     if (common->utf)
5637 #endif
5638       JUMPHERE(jump);
5639     }
5640 
5641   OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
5642   cc += 32 / sizeof(pcre_uchar);
5643   }
5644 
5645 #ifdef SUPPORT_UCP
5646 if (needstype || needsscript)
5647   {
5648   if (needschar && !charsaved)
5649     OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
5650 
5651 #ifdef COMPILE_PCRE32
5652   if (!common->utf)
5653     {
5654     jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10ffff + 1);
5655     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5656     JUMPHERE(jump);
5657     }
5658 #endif
5659 
5660   OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5661   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5662   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5663   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5664   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5665   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5666   OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5667 
5668   /* Before anything else, we deal with scripts. */
5669   if (needsscript)
5670     {
5671     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
5672     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5673 
5674     ccbegin = cc;
5675 
5676     while (*cc != XCL_END)
5677       {
5678       if (*cc == XCL_SINGLE)
5679         {
5680         cc ++;
5681         GETCHARINCTEST(c, cc);
5682         }
5683       else if (*cc == XCL_RANGE)
5684         {
5685         cc ++;
5686         GETCHARINCTEST(c, cc);
5687         GETCHARINCTEST(c, cc);
5688         }
5689       else
5690         {
5691         SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5692         cc++;
5693         if (*cc == PT_SC)
5694           {
5695           compares--;
5696           invertcmp = (compares == 0 && list != backtracks);
5697           if (cc[-1] == XCL_NOTPROP)
5698             invertcmp ^= 0x1;
5699           jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
5700           add_jump(compiler, compares > 0 ? list : backtracks, jump);
5701           }
5702         cc += 2;
5703         }
5704       }
5705 
5706     cc = ccbegin;
5707     }
5708 
5709   if (needschar)
5710     {
5711     OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
5712     }
5713 
5714   if (needstype)
5715     {
5716     if (!needschar)
5717       {
5718       OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5719       OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5720       }
5721     else
5722       {
5723       OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
5724       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5725       typereg = RETURN_ADDR;
5726       }
5727     }
5728   }
5729 #endif
5730 
5731 /* Generating code. */
5732 charoffset = 0;
5733 numberofcmps = 0;
5734 #ifdef SUPPORT_UCP
5735 typeoffset = 0;
5736 #endif
5737 
5738 while (*cc != XCL_END)
5739   {
5740   compares--;
5741   invertcmp = (compares == 0 && list != backtracks);
5742   jump = NULL;
5743 
5744   if (*cc == XCL_SINGLE)
5745     {
5746     cc ++;
5747     GETCHARINCTEST(c, cc);
5748 
5749     if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5750       {
5751       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5752       OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5753       numberofcmps++;
5754       }
5755     else if (numberofcmps > 0)
5756       {
5757       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5758       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5759       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5760       numberofcmps = 0;
5761       }
5762     else
5763       {
5764       jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5765       numberofcmps = 0;
5766       }
5767     }
5768   else if (*cc == XCL_RANGE)
5769     {
5770     cc ++;
5771     GETCHARINCTEST(c, cc);
5772     SET_CHAR_OFFSET(c);
5773     GETCHARINCTEST(c, cc);
5774 
5775     if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5776       {
5777       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5778       OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
5779       numberofcmps++;
5780       }
5781     else if (numberofcmps > 0)
5782       {
5783       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5784       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
5785       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5786       numberofcmps = 0;
5787       }
5788     else
5789       {
5790       jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5791       numberofcmps = 0;
5792       }
5793     }
5794 #ifdef SUPPORT_UCP
5795   else
5796     {
5797     SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5798     if (*cc == XCL_NOTPROP)
5799       invertcmp ^= 0x1;
5800     cc++;
5801     switch(*cc)
5802       {
5803       case PT_ANY:
5804       if (!invertcmp)
5805         jump = JUMP(SLJIT_JUMP);
5806       break;
5807 
5808       case PT_LAMP:
5809       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
5810       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
5811       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
5812       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5813       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
5814       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5815       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5816       break;
5817 
5818       case PT_GC:
5819       c = PRIV(ucp_typerange)[(int)cc[1] * 2];
5820       SET_TYPE_OFFSET(c);
5821       jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
5822       break;
5823 
5824       case PT_PC:
5825       jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
5826       break;
5827 
5828       case PT_SC:
5829       compares++;
5830       /* Do nothing. */
5831       break;
5832 
5833       case PT_SPACE:
5834       case PT_PXSPACE:
5835       SET_CHAR_OFFSET(9);
5836       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
5837       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
5838 
5839       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
5840       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5841 
5842       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
5843       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5844 
5845       SET_TYPE_OFFSET(ucp_Zl);
5846       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
5847       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
5848       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5849       break;
5850 
5851       case PT_WORD:
5852       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
5853       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
5854       /* Fall through. */
5855 
5856       case PT_ALNUM:
5857       SET_TYPE_OFFSET(ucp_Ll);
5858       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
5859       OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
5860       SET_TYPE_OFFSET(ucp_Nd);
5861       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
5862       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
5863       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5864       break;
5865 
5866       case PT_CLIST:
5867       other_cases = PRIV(ucd_caseless_sets) + cc[1];
5868 
5869       /* At least three characters are required.
5870          Otherwise this case would be handled by the normal code path. */
5871       SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
5872       SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
5873 
5874       /* Optimizing character pairs, if their difference is power of 2. */
5875       if (is_powerof2(other_cases[1] ^ other_cases[0]))
5876         {
5877         if (charoffset == 0)
5878           OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5879         else
5880           {
5881           OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5882           OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5883           }
5884         OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
5885         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
5886         other_cases += 2;
5887         }
5888       else if (is_powerof2(other_cases[2] ^ other_cases[1]))
5889         {
5890         if (charoffset == 0)
5891           OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
5892         else
5893           {
5894           OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5895           OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5896           }
5897         OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
5898         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
5899 
5900         OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
5901         OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
5902 
5903         other_cases += 3;
5904         }
5905       else
5906         {
5907         OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5908         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
5909         }
5910 
5911       while (*other_cases != NOTACHAR)
5912         {
5913         OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5914         OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
5915         }
5916       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5917       break;
5918 
5919       case PT_UCNC:
5920       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
5921       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
5922       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
5923       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5924       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
5925       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5926 
5927       SET_CHAR_OFFSET(0xa0);
5928       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
5929       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
5930       SET_CHAR_OFFSET(0);
5931       OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
5932       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
5933       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5934       break;
5935 
5936       case PT_PXGRAPH:
5937       /* C and Z groups are the farthest two groups. */
5938       SET_TYPE_OFFSET(ucp_Ll);
5939       OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5940       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
5941 
5942       jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5943 
5944       /* In case of ucp_Cf, we overwrite the result. */
5945       SET_CHAR_OFFSET(0x2066);
5946       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5947       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
5948 
5949       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5950       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5951 
5952       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
5953       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5954 
5955       JUMPHERE(jump);
5956       jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5957       break;
5958 
5959       case PT_PXPRINT:
5960       /* C and Z groups are the farthest two groups. */
5961       SET_TYPE_OFFSET(ucp_Ll);
5962       OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5963       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
5964 
5965       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
5966       OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_NOT_EQUAL);
5967 
5968       jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5969 
5970       /* In case of ucp_Cf, we overwrite the result. */
5971       SET_CHAR_OFFSET(0x2066);
5972       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5973       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
5974 
5975       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5976       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5977 
5978       JUMPHERE(jump);
5979       jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5980       break;
5981 
5982       case PT_PXPUNCT:
5983       SET_TYPE_OFFSET(ucp_Sc);
5984       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
5985       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
5986 
5987       SET_CHAR_OFFSET(0);
5988       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
5989       OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
5990 
5991       SET_TYPE_OFFSET(ucp_Pc);
5992       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
5993       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
5994       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5995       break;
5996 
5997       default:
5998       SLJIT_UNREACHABLE();
5999       break;
6000       }
6001     cc += 2;
6002     }
6003 #endif
6004 
6005   if (jump != NULL)
6006     add_jump(compiler, compares > 0 ? list : backtracks, jump);
6007   }
6008 
6009 if (found != NULL)
6010   set_jumps(found, LABEL());
6011 }
6012 
6013 #undef SET_TYPE_OFFSET
6014 #undef SET_CHAR_OFFSET
6015 
6016 #endif
6017 
compile_simple_assertion_matchingpath(compiler_common * common,pcre_uchar type,pcre_uchar * cc,jump_list ** backtracks)6018 static pcre_uchar *compile_simple_assertion_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
6019 {
6020 DEFINE_COMPILER;
6021 int length;
6022 struct sljit_jump *jump[4];
6023 #ifdef SUPPORT_UTF
6024 struct sljit_label *label;
6025 #endif /* SUPPORT_UTF */
6026 
6027 switch(type)
6028   {
6029   case OP_SOD:
6030   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6031   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6032   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
6033   return cc;
6034 
6035   case OP_SOM:
6036   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6037   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6038   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
6039   return cc;
6040 
6041   case OP_NOT_WORD_BOUNDARY:
6042   case OP_WORD_BOUNDARY:
6043   add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
6044   sljit_set_current_flags(compiler, SLJIT_SET_Z);
6045   add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6046   return cc;
6047 
6048   case OP_EODN:
6049   /* Requires rather complex checks. */
6050   jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6051   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6052     {
6053     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6054     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6055     if (common->mode == JIT_COMPILE)
6056       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
6057     else
6058       {
6059       jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
6060       OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
6061       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
6062       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
6063       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
6064       add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
6065       check_partial(common, TRUE);
6066       add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6067       JUMPHERE(jump[1]);
6068       }
6069     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6070     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6071     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6072     }
6073   else if (common->nltype == NLTYPE_FIXED)
6074     {
6075     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6076     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6077     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
6078     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
6079     }
6080   else
6081     {
6082     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6083     jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6084     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6085     OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
6086     jump[2] = JUMP(SLJIT_GREATER);
6087     add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
6088     /* Equal. */
6089     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6090     jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
6091     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6092 
6093     JUMPHERE(jump[1]);
6094     if (common->nltype == NLTYPE_ANYCRLF)
6095       {
6096       OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6097       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
6098       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
6099       }
6100     else
6101       {
6102       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
6103       read_char_range(common, common->nlmin, common->nlmax, TRUE);
6104       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
6105       add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
6106       sljit_set_current_flags(compiler, SLJIT_SET_Z);
6107       add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
6108       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
6109       }
6110     JUMPHERE(jump[2]);
6111     JUMPHERE(jump[3]);
6112     }
6113   JUMPHERE(jump[0]);
6114   check_partial(common, FALSE);
6115   return cc;
6116 
6117   case OP_EOD:
6118   add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
6119   check_partial(common, FALSE);
6120   return cc;
6121 
6122   case OP_DOLL:
6123   OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6124   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
6125   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6126 
6127   if (!common->endonly)
6128     compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
6129   else
6130     {
6131     add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
6132     check_partial(common, FALSE);
6133     }
6134   return cc;
6135 
6136   case OP_DOLLM:
6137   jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
6138   OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6139   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
6140   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6141   check_partial(common, FALSE);
6142   jump[0] = JUMP(SLJIT_JUMP);
6143   JUMPHERE(jump[1]);
6144 
6145   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6146     {
6147     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6148     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6149     if (common->mode == JIT_COMPILE)
6150       add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
6151     else
6152       {
6153       jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
6154       /* STR_PTR = STR_END - IN_UCHARS(1) */
6155       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6156       check_partial(common, TRUE);
6157       add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6158       JUMPHERE(jump[1]);
6159       }
6160 
6161     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6162     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6163     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6164     }
6165   else
6166     {
6167     peek_char(common, common->nlmax);
6168     check_newlinechar(common, common->nltype, backtracks, FALSE);
6169     }
6170   JUMPHERE(jump[0]);
6171   return cc;
6172 
6173   case OP_CIRC:
6174   OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6175   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
6176   add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
6177   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
6178   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6179   return cc;
6180 
6181   case OP_CIRCM:
6182   OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6183   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
6184   jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
6185   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
6186   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6187   jump[0] = JUMP(SLJIT_JUMP);
6188   JUMPHERE(jump[1]);
6189 
6190   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6191   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6192     {
6193     OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6194     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
6195     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6196     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6197     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6198     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6199     }
6200   else
6201     {
6202     skip_char_back(common);
6203     read_char_range(common, common->nlmin, common->nlmax, TRUE);
6204     check_newlinechar(common, common->nltype, backtracks, FALSE);
6205     }
6206   JUMPHERE(jump[0]);
6207   return cc;
6208 
6209   case OP_REVERSE:
6210   length = GET(cc, 0);
6211   if (length == 0)
6212     return cc + LINK_SIZE;
6213   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6214 #ifdef SUPPORT_UTF
6215   if (common->utf)
6216     {
6217     OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6218     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
6219     label = LABEL();
6220     add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
6221     skip_char_back(common);
6222     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
6223     JUMPTO(SLJIT_NOT_ZERO, label);
6224     }
6225   else
6226 #endif
6227     {
6228     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6229     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
6230     add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
6231     }
6232   check_start_used_ptr(common);
6233   return cc + LINK_SIZE;
6234   }
6235 SLJIT_UNREACHABLE();
6236 return cc;
6237 }
6238 
compile_char1_matchingpath(compiler_common * common,pcre_uchar type,pcre_uchar * cc,jump_list ** backtracks,BOOL check_str_ptr)6239 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr)
6240 {
6241 DEFINE_COMPILER;
6242 int length;
6243 unsigned int c, oc, bit;
6244 compare_context context;
6245 struct sljit_jump *jump[3];
6246 jump_list *end_list;
6247 #ifdef SUPPORT_UTF
6248 struct sljit_label *label;
6249 #ifdef SUPPORT_UCP
6250 pcre_uchar propdata[5];
6251 #endif
6252 #endif /* SUPPORT_UTF */
6253 
6254 switch(type)
6255   {
6256   case OP_NOT_DIGIT:
6257   case OP_DIGIT:
6258   /* Digits are usually 0-9, so it is worth to optimize them. */
6259   if (check_str_ptr)
6260     detect_partial_match(common, backtracks);
6261 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6262   if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
6263     read_char7_type(common, type == OP_NOT_DIGIT);
6264   else
6265 #endif
6266     read_char8_type(common, type == OP_NOT_DIGIT);
6267     /* Flip the starting bit in the negative case. */
6268   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
6269   add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6270   return cc;
6271 
6272   case OP_NOT_WHITESPACE:
6273   case OP_WHITESPACE:
6274   if (check_str_ptr)
6275     detect_partial_match(common, backtracks);
6276 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6277   if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
6278     read_char7_type(common, type == OP_NOT_WHITESPACE);
6279   else
6280 #endif
6281     read_char8_type(common, type == OP_NOT_WHITESPACE);
6282   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
6283   add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6284   return cc;
6285 
6286   case OP_NOT_WORDCHAR:
6287   case OP_WORDCHAR:
6288   if (check_str_ptr)
6289     detect_partial_match(common, backtracks);
6290 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6291   if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
6292     read_char7_type(common, type == OP_NOT_WORDCHAR);
6293   else
6294 #endif
6295     read_char8_type(common, type == OP_NOT_WORDCHAR);
6296   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
6297   add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6298   return cc;
6299 
6300   case OP_ANY:
6301   if (check_str_ptr)
6302     detect_partial_match(common, backtracks);
6303   read_char_range(common, common->nlmin, common->nlmax, TRUE);
6304   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6305     {
6306     jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
6307     end_list = NULL;
6308     if (common->mode != JIT_PARTIAL_HARD_COMPILE)
6309       add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6310     else
6311       check_str_end(common, &end_list);
6312 
6313     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6314     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
6315     set_jumps(end_list, LABEL());
6316     JUMPHERE(jump[0]);
6317     }
6318   else
6319     check_newlinechar(common, common->nltype, backtracks, TRUE);
6320   return cc;
6321 
6322   case OP_ALLANY:
6323   if (check_str_ptr)
6324     detect_partial_match(common, backtracks);
6325 #ifdef SUPPORT_UTF
6326   if (common->utf)
6327     {
6328     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6329     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6330 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
6331 #if defined COMPILE_PCRE8
6332     jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
6333     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
6334     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6335 #elif defined COMPILE_PCRE16
6336     jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
6337     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
6338     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
6339     OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
6340     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6341     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6342 #endif
6343     JUMPHERE(jump[0]);
6344 #endif /* COMPILE_PCRE[8|16] */
6345     return cc;
6346     }
6347 #endif
6348   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6349   return cc;
6350 
6351   case OP_ANYBYTE:
6352   if (check_str_ptr)
6353     detect_partial_match(common, backtracks);
6354   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6355   return cc;
6356 
6357 #ifdef SUPPORT_UTF
6358 #ifdef SUPPORT_UCP
6359   case OP_NOTPROP:
6360   case OP_PROP:
6361   propdata[0] = XCL_HASPROP;
6362   propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
6363   propdata[2] = cc[0];
6364   propdata[3] = cc[1];
6365   propdata[4] = XCL_END;
6366   if (check_str_ptr)
6367     detect_partial_match(common, backtracks);
6368   compile_xclass_matchingpath(common, propdata, backtracks);
6369   return cc + 2;
6370 #endif
6371 #endif
6372 
6373   case OP_ANYNL:
6374   if (check_str_ptr)
6375     detect_partial_match(common, backtracks);
6376   read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
6377   jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6378   /* We don't need to handle soft partial matching case. */
6379   end_list = NULL;
6380   if (common->mode != JIT_PARTIAL_HARD_COMPILE)
6381     add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6382   else
6383     check_str_end(common, &end_list);
6384   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6385   jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
6386   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6387   jump[2] = JUMP(SLJIT_JUMP);
6388   JUMPHERE(jump[0]);
6389   check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
6390   set_jumps(end_list, LABEL());
6391   JUMPHERE(jump[1]);
6392   JUMPHERE(jump[2]);
6393   return cc;
6394 
6395   case OP_NOT_HSPACE:
6396   case OP_HSPACE:
6397   if (check_str_ptr)
6398     detect_partial_match(common, backtracks);
6399   read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
6400   add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
6401   sljit_set_current_flags(compiler, SLJIT_SET_Z);
6402   add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6403   return cc;
6404 
6405   case OP_NOT_VSPACE:
6406   case OP_VSPACE:
6407   if (check_str_ptr)
6408     detect_partial_match(common, backtracks);
6409   read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
6410   add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
6411   sljit_set_current_flags(compiler, SLJIT_SET_Z);
6412   add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6413   return cc;
6414 
6415 #ifdef SUPPORT_UCP
6416   case OP_EXTUNI:
6417   if (check_str_ptr)
6418     detect_partial_match(common, backtracks);
6419   read_char(common);
6420   add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
6421   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
6422   /* Optimize register allocation: use a real register. */
6423   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6424   OP1(SLJIT_MOV_U8, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
6425 
6426   label = LABEL();
6427   jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6428   OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
6429   read_char(common);
6430   add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
6431   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
6432   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
6433 
6434   OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
6435   OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
6436   OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
6437   OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6438   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6439   JUMPTO(SLJIT_NOT_ZERO, label);
6440 
6441   OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
6442   JUMPHERE(jump[0]);
6443   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6444 
6445   if (common->mode == JIT_PARTIAL_HARD_COMPILE)
6446     {
6447     jump[0] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
6448     /* Since we successfully read a char above, partial matching must occure. */
6449     check_partial(common, TRUE);
6450     JUMPHERE(jump[0]);
6451     }
6452   return cc;
6453 #endif
6454 
6455   case OP_CHAR:
6456   case OP_CHARI:
6457   length = 1;
6458 #ifdef SUPPORT_UTF
6459   if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
6460 #endif
6461   if (common->mode == JIT_COMPILE && check_str_ptr
6462       && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
6463     {
6464     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
6465     add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6466 
6467     context.length = IN_UCHARS(length);
6468     context.sourcereg = -1;
6469 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
6470     context.ucharptr = 0;
6471 #endif
6472     return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
6473     }
6474 
6475   if (check_str_ptr)
6476     detect_partial_match(common, backtracks);
6477 #ifdef SUPPORT_UTF
6478   if (common->utf)
6479     {
6480     GETCHAR(c, cc);
6481     }
6482   else
6483 #endif
6484     c = *cc;
6485 
6486   if (type == OP_CHAR || !char_has_othercase(common, cc))
6487     {
6488     read_char_range(common, c, c, FALSE);
6489     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6490     return cc + length;
6491     }
6492   oc = char_othercase(common, c);
6493   read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
6494   bit = c ^ oc;
6495   if (is_powerof2(bit))
6496     {
6497     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
6498     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
6499     return cc + length;
6500     }
6501   jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
6502   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
6503   JUMPHERE(jump[0]);
6504   return cc + length;
6505 
6506   case OP_NOT:
6507   case OP_NOTI:
6508   if (check_str_ptr)
6509     detect_partial_match(common, backtracks);
6510   length = 1;
6511 #ifdef SUPPORT_UTF
6512   if (common->utf)
6513     {
6514 #ifdef COMPILE_PCRE8
6515     c = *cc;
6516     if (c < 128)
6517       {
6518       OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6519       if (type == OP_NOT || !char_has_othercase(common, cc))
6520         add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6521       else
6522         {
6523         /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
6524         OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
6525         add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
6526         }
6527       /* Skip the variable-length character. */
6528       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6529       jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
6530       OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
6531       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6532       JUMPHERE(jump[0]);
6533       return cc + 1;
6534       }
6535     else
6536 #endif /* COMPILE_PCRE8 */
6537       {
6538       GETCHARLEN(c, cc, length);
6539       }
6540     }
6541   else
6542 #endif /* SUPPORT_UTF */
6543     c = *cc;
6544 
6545   if (type == OP_NOT || !char_has_othercase(common, cc))
6546     {
6547     read_char_range(common, c, c, TRUE);
6548     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6549     }
6550   else
6551     {
6552     oc = char_othercase(common, c);
6553     read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
6554     bit = c ^ oc;
6555     if (is_powerof2(bit))
6556       {
6557       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
6558       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
6559       }
6560     else
6561       {
6562       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6563       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
6564       }
6565     }
6566   return cc + length;
6567 
6568   case OP_CLASS:
6569   case OP_NCLASS:
6570   if (check_str_ptr)
6571     detect_partial_match(common, backtracks);
6572 
6573 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6574   bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
6575   read_char_range(common, 0, bit, type == OP_NCLASS);
6576 #else
6577   read_char_range(common, 0, 255, type == OP_NCLASS);
6578 #endif
6579 
6580   if (check_class_ranges(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
6581     return cc + 32 / sizeof(pcre_uchar);
6582 
6583 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6584   jump[0] = NULL;
6585   if (common->utf)
6586     {
6587     jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
6588     if (type == OP_CLASS)
6589       {
6590       add_jump(compiler, backtracks, jump[0]);
6591       jump[0] = NULL;
6592       }
6593     }
6594 #elif !defined COMPILE_PCRE8
6595   jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6596   if (type == OP_CLASS)
6597     {
6598     add_jump(compiler, backtracks, jump[0]);
6599     jump[0] = NULL;
6600     }
6601 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
6602 
6603   OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6604   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6605   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
6606   OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6607   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6608   add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
6609 
6610 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6611   if (jump[0] != NULL)
6612     JUMPHERE(jump[0]);
6613 #endif
6614   return cc + 32 / sizeof(pcre_uchar);
6615 
6616 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6617   case OP_XCLASS:
6618   if (check_str_ptr)
6619     detect_partial_match(common, backtracks);
6620   compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
6621   return cc + GET(cc, 0) - 1;
6622 #endif
6623   }
6624 SLJIT_UNREACHABLE();
6625 return cc;
6626 }
6627 
compile_charn_matchingpath(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,jump_list ** backtracks)6628 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
6629 {
6630 /* This function consumes at least one input character. */
6631 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
6632 DEFINE_COMPILER;
6633 pcre_uchar *ccbegin = cc;
6634 compare_context context;
6635 int size;
6636 
6637 context.length = 0;
6638 do
6639   {
6640   if (cc >= ccend)
6641     break;
6642 
6643   if (*cc == OP_CHAR)
6644     {
6645     size = 1;
6646 #ifdef SUPPORT_UTF
6647     if (common->utf && HAS_EXTRALEN(cc[1]))
6648       size += GET_EXTRALEN(cc[1]);
6649 #endif
6650     }
6651   else if (*cc == OP_CHARI)
6652     {
6653     size = 1;
6654 #ifdef SUPPORT_UTF
6655     if (common->utf)
6656       {
6657       if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
6658         size = 0;
6659       else if (HAS_EXTRALEN(cc[1]))
6660         size += GET_EXTRALEN(cc[1]);
6661       }
6662     else
6663 #endif
6664     if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
6665       size = 0;
6666     }
6667   else
6668     size = 0;
6669 
6670   cc += 1 + size;
6671   context.length += IN_UCHARS(size);
6672   }
6673 while (size > 0 && context.length <= 128);
6674 
6675 cc = ccbegin;
6676 if (context.length > 0)
6677   {
6678   /* We have a fixed-length byte sequence. */
6679   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
6680   add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6681 
6682   context.sourcereg = -1;
6683 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
6684   context.ucharptr = 0;
6685 #endif
6686   do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
6687   return cc;
6688   }
6689 
6690 /* A non-fixed length character will be checked if length == 0. */
6691 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
6692 }
6693 
6694 /* Forward definitions. */
6695 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
6696 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
6697 
6698 #define PUSH_BACKTRACK(size, ccstart, error) \
6699   do \
6700     { \
6701     backtrack = sljit_alloc_memory(compiler, (size)); \
6702     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6703       return error; \
6704     memset(backtrack, 0, size); \
6705     backtrack->prev = parent->top; \
6706     backtrack->cc = (ccstart); \
6707     parent->top = backtrack; \
6708     } \
6709   while (0)
6710 
6711 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
6712   do \
6713     { \
6714     backtrack = sljit_alloc_memory(compiler, (size)); \
6715     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6716       return; \
6717     memset(backtrack, 0, size); \
6718     backtrack->prev = parent->top; \
6719     backtrack->cc = (ccstart); \
6720     parent->top = backtrack; \
6721     } \
6722   while (0)
6723 
6724 #define BACKTRACK_AS(type) ((type *)backtrack)
6725 
compile_dnref_search(compiler_common * common,pcre_uchar * cc,jump_list ** backtracks)6726 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
6727 {
6728 /* The OVECTOR offset goes to TMP2. */
6729 DEFINE_COMPILER;
6730 int count = GET2(cc, 1 + IMM2_SIZE);
6731 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
6732 unsigned int offset;
6733 jump_list *found = NULL;
6734 
6735 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
6736 
6737 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
6738 
6739 count--;
6740 while (count-- > 0)
6741   {
6742   offset = GET2(slot, 0) << 1;
6743   GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
6744   add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
6745   slot += common->name_entry_size;
6746   }
6747 
6748 offset = GET2(slot, 0) << 1;
6749 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
6750 if (backtracks != NULL && !common->jscript_compat)
6751   add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
6752 
6753 set_jumps(found, LABEL());
6754 }
6755 
compile_ref_matchingpath(compiler_common * common,pcre_uchar * cc,jump_list ** backtracks,BOOL withchecks,BOOL emptyfail)6756 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
6757 {
6758 DEFINE_COMPILER;
6759 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6760 int offset = 0;
6761 struct sljit_jump *jump = NULL;
6762 struct sljit_jump *partial;
6763 struct sljit_jump *nopartial;
6764 
6765 if (ref)
6766   {
6767   offset = GET2(cc, 1) << 1;
6768   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6769   /* OVECTOR(1) contains the "string begin - 1" constant. */
6770   if (withchecks && !common->jscript_compat)
6771     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6772   }
6773 else
6774   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6775 
6776 #if defined SUPPORT_UTF && defined SUPPORT_UCP
6777 if (common->utf && *cc == OP_REFI)
6778   {
6779   SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1 && TMP2 == SLJIT_R2);
6780   if (ref)
6781     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6782   else
6783     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6784 
6785   if (withchecks)
6786     jump = CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0);
6787 
6788   /* Needed to save important temporary registers. */
6789   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6790   OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
6791   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
6792   sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
6793   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6794   if (common->mode == JIT_COMPILE)
6795     add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
6796   else
6797     {
6798     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
6799     nopartial = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
6800     check_partial(common, FALSE);
6801     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6802     JUMPHERE(nopartial);
6803     }
6804   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
6805   }
6806 else
6807 #endif /* SUPPORT_UTF && SUPPORT_UCP */
6808   {
6809   if (ref)
6810     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
6811   else
6812     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
6813 
6814   if (withchecks)
6815     jump = JUMP(SLJIT_ZERO);
6816 
6817   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6818   partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
6819   if (common->mode == JIT_COMPILE)
6820     add_jump(compiler, backtracks, partial);
6821 
6822   add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6823   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6824 
6825   if (common->mode != JIT_COMPILE)
6826     {
6827     nopartial = JUMP(SLJIT_JUMP);
6828     JUMPHERE(partial);
6829     /* TMP2 -= STR_END - STR_PTR */
6830     OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
6831     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
6832     partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6833     OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
6834     add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6835     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6836     JUMPHERE(partial);
6837     check_partial(common, FALSE);
6838     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6839     JUMPHERE(nopartial);
6840     }
6841   }
6842 
6843 if (jump != NULL)
6844   {
6845   if (emptyfail)
6846     add_jump(compiler, backtracks, jump);
6847   else
6848     JUMPHERE(jump);
6849   }
6850 }
6851 
compile_ref_iterator_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)6852 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6853 {
6854 DEFINE_COMPILER;
6855 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6856 backtrack_common *backtrack;
6857 pcre_uchar type;
6858 int offset = 0;
6859 struct sljit_label *label;
6860 struct sljit_jump *zerolength;
6861 struct sljit_jump *jump = NULL;
6862 pcre_uchar *ccbegin = cc;
6863 int min = 0, max = 0;
6864 BOOL minimize;
6865 
6866 PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
6867 
6868 if (ref)
6869   offset = GET2(cc, 1) << 1;
6870 else
6871   cc += IMM2_SIZE;
6872 type = cc[1 + IMM2_SIZE];
6873 
6874 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
6875 minimize = (type & 0x1) != 0;
6876 switch(type)
6877   {
6878   case OP_CRSTAR:
6879   case OP_CRMINSTAR:
6880   min = 0;
6881   max = 0;
6882   cc += 1 + IMM2_SIZE + 1;
6883   break;
6884   case OP_CRPLUS:
6885   case OP_CRMINPLUS:
6886   min = 1;
6887   max = 0;
6888   cc += 1 + IMM2_SIZE + 1;
6889   break;
6890   case OP_CRQUERY:
6891   case OP_CRMINQUERY:
6892   min = 0;
6893   max = 1;
6894   cc += 1 + IMM2_SIZE + 1;
6895   break;
6896   case OP_CRRANGE:
6897   case OP_CRMINRANGE:
6898   min = GET2(cc, 1 + IMM2_SIZE + 1);
6899   max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
6900   cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
6901   break;
6902   default:
6903   SLJIT_UNREACHABLE();
6904   break;
6905   }
6906 
6907 if (!minimize)
6908   {
6909   if (min == 0)
6910     {
6911     allocate_stack(common, 2);
6912     if (ref)
6913       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6914     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6915     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6916     /* Temporary release of STR_PTR. */
6917     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6918     /* Handles both invalid and empty cases. Since the minimum repeat,
6919     is zero the invalid case is basically the same as an empty case. */
6920     if (ref)
6921       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6922     else
6923       {
6924       compile_dnref_search(common, ccbegin, NULL);
6925       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6926       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6927       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6928       }
6929     /* Restore if not zero length. */
6930     OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6931     }
6932   else
6933     {
6934     allocate_stack(common, 1);
6935     if (ref)
6936       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6937     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6938     if (ref)
6939       {
6940       add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6941       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6942       }
6943     else
6944       {
6945       compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6946       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6947       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6948       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6949       }
6950     }
6951 
6952   if (min > 1 || max > 1)
6953     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
6954 
6955   label = LABEL();
6956   if (!ref)
6957     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
6958   compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
6959 
6960   if (min > 1 || max > 1)
6961     {
6962     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
6963     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6964     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
6965     if (min > 1)
6966       CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
6967     if (max > 1)
6968       {
6969       jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
6970       allocate_stack(common, 1);
6971       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6972       JUMPTO(SLJIT_JUMP, label);
6973       JUMPHERE(jump);
6974       }
6975     }
6976 
6977   if (max == 0)
6978     {
6979     /* Includes min > 1 case as well. */
6980     allocate_stack(common, 1);
6981     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6982     JUMPTO(SLJIT_JUMP, label);
6983     }
6984 
6985   JUMPHERE(zerolength);
6986   BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
6987 
6988   count_match(common);
6989   return cc;
6990   }
6991 
6992 allocate_stack(common, ref ? 2 : 3);
6993 if (ref)
6994   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6995 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6996 if (type != OP_CRMINSTAR)
6997   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6998 
6999 if (min == 0)
7000   {
7001   /* Handles both invalid and empty cases. Since the minimum repeat,
7002   is zero the invalid case is basically the same as an empty case. */
7003   if (ref)
7004     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7005   else
7006     {
7007     compile_dnref_search(common, ccbegin, NULL);
7008     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
7009     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
7010     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
7011     }
7012   /* Length is non-zero, we can match real repeats. */
7013   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7014   jump = JUMP(SLJIT_JUMP);
7015   }
7016 else
7017   {
7018   if (ref)
7019     {
7020     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
7021     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7022     }
7023   else
7024     {
7025     compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
7026     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
7027     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
7028     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
7029     }
7030   }
7031 
7032 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
7033 if (max > 0)
7034   add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
7035 
7036 if (!ref)
7037   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
7038 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
7039 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7040 
7041 if (min > 1)
7042   {
7043   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
7044   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
7045   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
7046   CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
7047   }
7048 else if (max > 0)
7049   OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
7050 
7051 if (jump != NULL)
7052   JUMPHERE(jump);
7053 JUMPHERE(zerolength);
7054 
7055 count_match(common);
7056 return cc;
7057 }
7058 
compile_recurse_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)7059 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7060 {
7061 DEFINE_COMPILER;
7062 backtrack_common *backtrack;
7063 recurse_entry *entry = common->entries;
7064 recurse_entry *prev = NULL;
7065 sljit_sw start = GET(cc, 1);
7066 pcre_uchar *start_cc;
7067 BOOL needs_control_head;
7068 
7069 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
7070 
7071 /* Inlining simple patterns. */
7072 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
7073   {
7074   start_cc = common->start + start;
7075   compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
7076   BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
7077   return cc + 1 + LINK_SIZE;
7078   }
7079 
7080 while (entry != NULL)
7081   {
7082   if (entry->start == start)
7083     break;
7084   prev = entry;
7085   entry = entry->next;
7086   }
7087 
7088 if (entry == NULL)
7089   {
7090   entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
7091   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7092     return NULL;
7093   entry->next = NULL;
7094   entry->entry = NULL;
7095   entry->calls = NULL;
7096   entry->start = start;
7097 
7098   if (prev != NULL)
7099     prev->next = entry;
7100   else
7101     common->entries = entry;
7102   }
7103 
7104 if (common->has_set_som && common->mark_ptr != 0)
7105   {
7106   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
7107   allocate_stack(common, 2);
7108   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
7109   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7110   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
7111   }
7112 else if (common->has_set_som || common->mark_ptr != 0)
7113   {
7114   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
7115   allocate_stack(common, 1);
7116   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7117   }
7118 
7119 if (entry->entry == NULL)
7120   add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
7121 else
7122   JUMPTO(SLJIT_FAST_CALL, entry->entry);
7123 /* Leave if the match is failed. */
7124 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
7125 return cc + 1 + LINK_SIZE;
7126 }
7127 
do_callout(struct jit_arguments * arguments,PUBL (callout_block)* callout_block,pcre_uchar ** jit_ovector)7128 static int SLJIT_CALL do_callout(struct jit_arguments *arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
7129 {
7130 const pcre_uchar *begin = arguments->begin;
7131 int *offset_vector = arguments->offsets;
7132 int offset_count = arguments->offset_count;
7133 int i;
7134 
7135 if (PUBL(callout) == NULL)
7136   return 0;
7137 
7138 callout_block->version = 2;
7139 callout_block->callout_data = arguments->callout_data;
7140 
7141 /* Offsets in subject. */
7142 callout_block->subject_length = arguments->end - arguments->begin;
7143 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
7144 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
7145 #if defined COMPILE_PCRE8
7146 callout_block->subject = (PCRE_SPTR)begin;
7147 #elif defined COMPILE_PCRE16
7148 callout_block->subject = (PCRE_SPTR16)begin;
7149 #elif defined COMPILE_PCRE32
7150 callout_block->subject = (PCRE_SPTR32)begin;
7151 #endif
7152 
7153 /* Convert and copy the JIT offset vector to the offset_vector array. */
7154 callout_block->capture_top = 0;
7155 callout_block->offset_vector = offset_vector;
7156 for (i = 2; i < offset_count; i += 2)
7157   {
7158   offset_vector[i] = jit_ovector[i] - begin;
7159   offset_vector[i + 1] = jit_ovector[i + 1] - begin;
7160   if (jit_ovector[i] >= begin)
7161     callout_block->capture_top = i;
7162   }
7163 
7164 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
7165 if (offset_count > 0)
7166   offset_vector[0] = -1;
7167 if (offset_count > 1)
7168   offset_vector[1] = -1;
7169 return (*PUBL(callout))(callout_block);
7170 }
7171 
7172 /* Aligning to 8 byte. */
7173 #define CALLOUT_ARG_SIZE \
7174     (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
7175 
7176 #define CALLOUT_ARG_OFFSET(arg) \
7177     SLJIT_OFFSETOF(PUBL(callout_block), arg)
7178 
compile_callout_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)7179 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7180 {
7181 DEFINE_COMPILER;
7182 backtrack_common *backtrack;
7183 
7184 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
7185 
7186 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
7187 
7188 SLJIT_ASSERT(common->capture_last_ptr != 0);
7189 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
7190 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7191 OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
7192 OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
7193 
7194 /* These pointer sized fields temporarly stores internal variables. */
7195 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
7196 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
7197 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
7198 
7199 if (common->mark_ptr != 0)
7200   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
7201 OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
7202 OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
7203 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
7204 
7205 /* Needed to save important temporary registers. */
7206 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
7207 /* SLJIT_R0 = arguments */
7208 OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
7209 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
7210 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
7211 OP1(SLJIT_MOV_S32, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
7212 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7213 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
7214 
7215 /* Check return value. */
7216 OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
7217 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER));
7218 if (common->forced_quit_label == NULL)
7219   add_jump(compiler, &common->forced_quit, JUMP(SLJIT_NOT_EQUAL) /* SIG_LESS */);
7220 else
7221   JUMPTO(SLJIT_NOT_EQUAL /* SIG_LESS */, common->forced_quit_label);
7222 return cc + 2 + 2 * LINK_SIZE;
7223 }
7224 
7225 #undef CALLOUT_ARG_SIZE
7226 #undef CALLOUT_ARG_OFFSET
7227 
assert_needs_str_ptr_saving(pcre_uchar * cc)7228 static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(pcre_uchar *cc)
7229 {
7230 while (TRUE)
7231   {
7232   switch (*cc)
7233     {
7234     case OP_NOT_WORD_BOUNDARY:
7235     case OP_WORD_BOUNDARY:
7236     case OP_CIRC:
7237     case OP_CIRCM:
7238     case OP_DOLL:
7239     case OP_DOLLM:
7240     case OP_CALLOUT:
7241     case OP_ALT:
7242     cc += PRIV(OP_lengths)[*cc];
7243     break;
7244 
7245     case OP_KET:
7246     return FALSE;
7247 
7248     default:
7249     return TRUE;
7250     }
7251   }
7252 }
7253 
compile_assert_matchingpath(compiler_common * common,pcre_uchar * cc,assert_backtrack * backtrack,BOOL conditional)7254 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
7255 {
7256 DEFINE_COMPILER;
7257 int framesize;
7258 int extrasize;
7259 BOOL needs_control_head;
7260 int private_data_ptr;
7261 backtrack_common altbacktrack;
7262 pcre_uchar *ccbegin;
7263 pcre_uchar opcode;
7264 pcre_uchar bra = OP_BRA;
7265 jump_list *tmp = NULL;
7266 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
7267 jump_list **found;
7268 /* Saving previous accept variables. */
7269 BOOL save_local_exit = common->local_exit;
7270 BOOL save_positive_assert = common->positive_assert;
7271 then_trap_backtrack *save_then_trap = common->then_trap;
7272 struct sljit_label *save_quit_label = common->quit_label;
7273 struct sljit_label *save_accept_label = common->accept_label;
7274 jump_list *save_quit = common->quit;
7275 jump_list *save_positive_assert_quit = common->positive_assert_quit;
7276 jump_list *save_accept = common->accept;
7277 struct sljit_jump *jump;
7278 struct sljit_jump *brajump = NULL;
7279 
7280 /* Assert captures then. */
7281 common->then_trap = NULL;
7282 
7283 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
7284   {
7285   SLJIT_ASSERT(!conditional);
7286   bra = *cc;
7287   cc++;
7288   }
7289 private_data_ptr = PRIVATE_DATA(cc);
7290 SLJIT_ASSERT(private_data_ptr != 0);
7291 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
7292 backtrack->framesize = framesize;
7293 backtrack->private_data_ptr = private_data_ptr;
7294 opcode = *cc;
7295 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
7296 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
7297 ccbegin = cc;
7298 cc += GET(cc, 1);
7299 
7300 if (bra == OP_BRAMINZERO)
7301   {
7302   /* This is a braminzero backtrack path. */
7303   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7304   free_stack(common, 1);
7305   brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7306   }
7307 
7308 if (framesize < 0)
7309   {
7310   extrasize = 1;
7311   if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
7312     extrasize = 0;
7313 
7314   if (needs_control_head)
7315     extrasize++;
7316 
7317   if (framesize == no_frame)
7318     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
7319 
7320   if (extrasize > 0)
7321     allocate_stack(common, extrasize);
7322 
7323   if (needs_control_head)
7324     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7325 
7326   if (extrasize > 0)
7327     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7328 
7329   if (needs_control_head)
7330     {
7331     SLJIT_ASSERT(extrasize == 2);
7332     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
7333     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
7334     }
7335   }
7336 else
7337   {
7338   extrasize = needs_control_head ? 3 : 2;
7339   allocate_stack(common, framesize + extrasize);
7340 
7341   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7342   OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
7343   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
7344   if (needs_control_head)
7345     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7346   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7347 
7348   if (needs_control_head)
7349     {
7350     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
7351     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
7352     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
7353     }
7354   else
7355     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
7356 
7357   init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
7358   }
7359 
7360 memset(&altbacktrack, 0, sizeof(backtrack_common));
7361 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7362   {
7363   /* Negative assert is stronger than positive assert. */
7364   common->local_exit = TRUE;
7365   common->quit_label = NULL;
7366   common->quit = NULL;
7367   common->positive_assert = FALSE;
7368   }
7369 else
7370   common->positive_assert = TRUE;
7371 common->positive_assert_quit = NULL;
7372 
7373 while (1)
7374   {
7375   common->accept_label = NULL;
7376   common->accept = NULL;
7377   altbacktrack.top = NULL;
7378   altbacktrack.topbacktracks = NULL;
7379 
7380   if (*ccbegin == OP_ALT && extrasize > 0)
7381     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7382 
7383   altbacktrack.cc = ccbegin;
7384   compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
7385   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7386     {
7387     if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7388       {
7389       common->local_exit = save_local_exit;
7390       common->quit_label = save_quit_label;
7391       common->quit = save_quit;
7392       }
7393     common->positive_assert = save_positive_assert;
7394     common->then_trap = save_then_trap;
7395     common->accept_label = save_accept_label;
7396     common->positive_assert_quit = save_positive_assert_quit;
7397     common->accept = save_accept;
7398     return NULL;
7399     }
7400   common->accept_label = LABEL();
7401   if (common->accept != NULL)
7402     set_jumps(common->accept, common->accept_label);
7403 
7404   /* Reset stack. */
7405   if (framesize < 0)
7406     {
7407     if (framesize == no_frame)
7408       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7409     else if (extrasize > 0)
7410       free_stack(common, extrasize);
7411 
7412     if (needs_control_head)
7413       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
7414     }
7415   else
7416     {
7417     if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
7418       {
7419       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
7420       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
7421       if (needs_control_head)
7422         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
7423       }
7424     else
7425       {
7426       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7427       if (needs_control_head)
7428         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));
7429       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
7430       }
7431     }
7432 
7433   if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7434     {
7435     /* We know that STR_PTR was stored on the top of the stack. */
7436     if (conditional)
7437       {
7438       if (extrasize > 0)
7439         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? STACK(-2) : STACK(-1));
7440       }
7441     else if (bra == OP_BRAZERO)
7442       {
7443       if (framesize < 0)
7444         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
7445       else
7446         {
7447         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
7448         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize));
7449         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
7450         }
7451       OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
7452       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7453       }
7454     else if (framesize >= 0)
7455       {
7456       /* For OP_BRA and OP_BRAMINZERO. */
7457       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
7458       }
7459     }
7460   add_jump(compiler, found, JUMP(SLJIT_JUMP));
7461 
7462   compile_backtrackingpath(common, altbacktrack.top);
7463   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7464     {
7465     if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7466       {
7467       common->local_exit = save_local_exit;
7468       common->quit_label = save_quit_label;
7469       common->quit = save_quit;
7470       }
7471     common->positive_assert = save_positive_assert;
7472     common->then_trap = save_then_trap;
7473     common->accept_label = save_accept_label;
7474     common->positive_assert_quit = save_positive_assert_quit;
7475     common->accept = save_accept;
7476     return NULL;
7477     }
7478   set_jumps(altbacktrack.topbacktracks, LABEL());
7479 
7480   if (*cc != OP_ALT)
7481     break;
7482 
7483   ccbegin = cc;
7484   cc += GET(cc, 1);
7485   }
7486 
7487 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7488   {
7489   SLJIT_ASSERT(common->positive_assert_quit == NULL);
7490   /* Makes the check less complicated below. */
7491   common->positive_assert_quit = common->quit;
7492   }
7493 
7494 /* None of them matched. */
7495 if (common->positive_assert_quit != NULL)
7496   {
7497   jump = JUMP(SLJIT_JUMP);
7498   set_jumps(common->positive_assert_quit, LABEL());
7499   SLJIT_ASSERT(framesize != no_stack);
7500   if (framesize < 0)
7501     OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
7502   else
7503     {
7504     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7505     add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
7506     OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
7507     }
7508   JUMPHERE(jump);
7509   }
7510 
7511 if (needs_control_head)
7512   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
7513 
7514 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
7515   {
7516   /* Assert is failed. */
7517   if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
7518     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7519 
7520   if (framesize < 0)
7521     {
7522     /* The topmost item should be 0. */
7523     if (bra == OP_BRAZERO)
7524       {
7525       if (extrasize == 2)
7526         free_stack(common, 1);
7527       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7528       }
7529     else if (extrasize > 0)
7530       free_stack(common, extrasize);
7531     }
7532   else
7533     {
7534     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
7535     /* The topmost item should be 0. */
7536     if (bra == OP_BRAZERO)
7537       {
7538       free_stack(common, framesize + extrasize - 1);
7539       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7540       }
7541     else
7542       free_stack(common, framesize + extrasize);
7543     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
7544     }
7545   jump = JUMP(SLJIT_JUMP);
7546   if (bra != OP_BRAZERO)
7547     add_jump(compiler, target, jump);
7548 
7549   /* Assert is successful. */
7550   set_jumps(tmp, LABEL());
7551   if (framesize < 0)
7552     {
7553     /* We know that STR_PTR was stored on the top of the stack. */
7554     if (extrasize > 0)
7555       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
7556 
7557     /* Keep the STR_PTR on the top of the stack. */
7558     if (bra == OP_BRAZERO)
7559       {
7560       OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
7561       if (extrasize == 2)
7562         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7563       }
7564     else if (bra == OP_BRAMINZERO)
7565       {
7566       OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
7567       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7568       }
7569     }
7570   else
7571     {
7572     if (bra == OP_BRA)
7573       {
7574       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
7575       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
7576       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
7577       }
7578     else
7579       {
7580       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
7581       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
7582       if (extrasize == 2)
7583         {
7584         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7585         if (bra == OP_BRAMINZERO)
7586           OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7587         }
7588       else
7589         {
7590         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
7591         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
7592         }
7593       }
7594     }
7595 
7596   if (bra == OP_BRAZERO)
7597     {
7598     backtrack->matchingpath = LABEL();
7599     SET_LABEL(jump, backtrack->matchingpath);
7600     }
7601   else if (bra == OP_BRAMINZERO)
7602     {
7603     JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
7604     JUMPHERE(brajump);
7605     if (framesize >= 0)
7606       {
7607       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7608       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
7609       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
7610       }
7611     set_jumps(backtrack->common.topbacktracks, LABEL());
7612     }
7613   }
7614 else
7615   {
7616   /* AssertNot is successful. */
7617   if (framesize < 0)
7618     {
7619     if (extrasize > 0)
7620       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7621 
7622     if (bra != OP_BRA)
7623       {
7624       if (extrasize == 2)
7625         free_stack(common, 1);
7626       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7627       }
7628     else if (extrasize > 0)
7629       free_stack(common, extrasize);
7630     }
7631   else
7632     {
7633     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7634     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
7635     /* The topmost item should be 0. */
7636     if (bra != OP_BRA)
7637       {
7638       free_stack(common, framesize + extrasize - 1);
7639       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7640       }
7641     else
7642       free_stack(common, framesize + extrasize);
7643     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
7644     }
7645 
7646   if (bra == OP_BRAZERO)
7647     backtrack->matchingpath = LABEL();
7648   else if (bra == OP_BRAMINZERO)
7649     {
7650     JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
7651     JUMPHERE(brajump);
7652     }
7653 
7654   if (bra != OP_BRA)
7655     {
7656     SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
7657     set_jumps(backtrack->common.topbacktracks, LABEL());
7658     backtrack->common.topbacktracks = NULL;
7659     }
7660   }
7661 
7662 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7663   {
7664   common->local_exit = save_local_exit;
7665   common->quit_label = save_quit_label;
7666   common->quit = save_quit;
7667   }
7668 common->positive_assert = save_positive_assert;
7669 common->then_trap = save_then_trap;
7670 common->accept_label = save_accept_label;
7671 common->positive_assert_quit = save_positive_assert_quit;
7672 common->accept = save_accept;
7673 return cc + 1 + LINK_SIZE;
7674 }
7675 
match_once_common(compiler_common * common,pcre_uchar ket,int framesize,int private_data_ptr,BOOL has_alternatives,BOOL needs_control_head)7676 static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
7677 {
7678 DEFINE_COMPILER;
7679 int stacksize;
7680 
7681 if (framesize < 0)
7682   {
7683   if (framesize == no_frame)
7684     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7685   else
7686     {
7687     stacksize = needs_control_head ? 1 : 0;
7688     if (ket != OP_KET || has_alternatives)
7689       stacksize++;
7690 
7691     if (stacksize > 0)
7692       free_stack(common, stacksize);
7693     }
7694 
7695   if (needs_control_head)
7696     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1));
7697 
7698   /* TMP2 which is set here used by OP_KETRMAX below. */
7699   if (ket == OP_KETRMAX)
7700     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
7701   else if (ket == OP_KETRMIN)
7702     {
7703     /* Move the STR_PTR to the private_data_ptr. */
7704     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
7705     }
7706   }
7707 else
7708   {
7709   stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
7710   OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
7711   if (needs_control_head)
7712     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
7713 
7714   if (ket == OP_KETRMAX)
7715     {
7716     /* TMP2 which is set here used by OP_KETRMAX below. */
7717     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7718     }
7719   }
7720 if (needs_control_head)
7721   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
7722 }
7723 
match_capture_common(compiler_common * common,int stacksize,int offset,int private_data_ptr)7724 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
7725 {
7726 DEFINE_COMPILER;
7727 
7728 if (common->capture_last_ptr != 0)
7729   {
7730   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
7731   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
7732   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
7733   stacksize++;
7734   }
7735 if (common->optimized_cbracket[offset >> 1] == 0)
7736   {
7737   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
7738   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7739   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
7740   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7741   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
7742   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
7743   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
7744   stacksize += 2;
7745   }
7746 return stacksize;
7747 }
7748 
7749 /*
7750   Handling bracketed expressions is probably the most complex part.
7751 
7752   Stack layout naming characters:
7753     S - Push the current STR_PTR
7754     0 - Push a 0 (NULL)
7755     A - Push the current STR_PTR. Needed for restoring the STR_PTR
7756         before the next alternative. Not pushed if there are no alternatives.
7757     M - Any values pushed by the current alternative. Can be empty, or anything.
7758     C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
7759     L - Push the previous local (pointed by localptr) to the stack
7760    () - opional values stored on the stack
7761   ()* - optonal, can be stored multiple times
7762 
7763   The following list shows the regular expression templates, their PCRE byte codes
7764   and stack layout supported by pcre-sljit.
7765 
7766   (?:)                     OP_BRA     | OP_KET                A M
7767   ()                       OP_CBRA    | OP_KET                C M
7768   (?:)+                    OP_BRA     | OP_KETRMAX        0   A M S   ( A M S )*
7769                            OP_SBRA    | OP_KETRMAX        0   L M S   ( L M S )*
7770   (?:)+?                   OP_BRA     | OP_KETRMIN        0   A M S   ( A M S )*
7771                            OP_SBRA    | OP_KETRMIN        0   L M S   ( L M S )*
7772   ()+                      OP_CBRA    | OP_KETRMAX        0   C M S   ( C M S )*
7773                            OP_SCBRA   | OP_KETRMAX        0   C M S   ( C M S )*
7774   ()+?                     OP_CBRA    | OP_KETRMIN        0   C M S   ( C M S )*
7775                            OP_SCBRA   | OP_KETRMIN        0   C M S   ( C M S )*
7776   (?:)?    OP_BRAZERO    | OP_BRA     | OP_KET            S ( A M 0 )
7777   (?:)??   OP_BRAMINZERO | OP_BRA     | OP_KET            S ( A M 0 )
7778   ()?      OP_BRAZERO    | OP_CBRA    | OP_KET            S ( C M 0 )
7779   ()??     OP_BRAMINZERO | OP_CBRA    | OP_KET            S ( C M 0 )
7780   (?:)*    OP_BRAZERO    | OP_BRA     | OP_KETRMAX      S 0 ( A M S )*
7781            OP_BRAZERO    | OP_SBRA    | OP_KETRMAX      S 0 ( L M S )*
7782   (?:)*?   OP_BRAMINZERO | OP_BRA     | OP_KETRMIN      S 0 ( A M S )*
7783            OP_BRAMINZERO | OP_SBRA    | OP_KETRMIN      S 0 ( L M S )*
7784   ()*      OP_BRAZERO    | OP_CBRA    | OP_KETRMAX      S 0 ( C M S )*
7785            OP_BRAZERO    | OP_SCBRA   | OP_KETRMAX      S 0 ( C M S )*
7786   ()*?     OP_BRAMINZERO | OP_CBRA    | OP_KETRMIN      S 0 ( C M S )*
7787            OP_BRAMINZERO | OP_SCBRA   | OP_KETRMIN      S 0 ( C M S )*
7788 
7789 
7790   Stack layout naming characters:
7791     A - Push the alternative index (starting from 0) on the stack.
7792         Not pushed if there is no alternatives.
7793     M - Any values pushed by the current alternative. Can be empty, or anything.
7794 
7795   The next list shows the possible content of a bracket:
7796   (|)     OP_*BRA    | OP_ALT ...         M A
7797   (?()|)  OP_*COND   | OP_ALT             M A
7798   (?>|)   OP_ONCE    | OP_ALT ...         [stack trace] M A
7799   (?>|)   OP_ONCE_NC | OP_ALT ...         [stack trace] M A
7800                                           Or nothing, if trace is unnecessary
7801 */
7802 
compile_bracket_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)7803 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7804 {
7805 DEFINE_COMPILER;
7806 backtrack_common *backtrack;
7807 pcre_uchar opcode;
7808 int private_data_ptr = 0;
7809 int offset = 0;
7810 int i, stacksize;
7811 int repeat_ptr = 0, repeat_length = 0;
7812 int repeat_type = 0, repeat_count = 0;
7813 pcre_uchar *ccbegin;
7814 pcre_uchar *matchingpath;
7815 pcre_uchar *slot;
7816 pcre_uchar bra = OP_BRA;
7817 pcre_uchar ket;
7818 assert_backtrack *assert;
7819 BOOL has_alternatives;
7820 BOOL needs_control_head = FALSE;
7821 struct sljit_jump *jump;
7822 struct sljit_jump *skip;
7823 struct sljit_label *rmax_label = NULL;
7824 struct sljit_jump *braminzero = NULL;
7825 
7826 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
7827 
7828 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
7829   {
7830   bra = *cc;
7831   cc++;
7832   opcode = *cc;
7833   }
7834 
7835 opcode = *cc;
7836 ccbegin = cc;
7837 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
7838 ket = *matchingpath;
7839 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
7840   {
7841   repeat_ptr = PRIVATE_DATA(matchingpath);
7842   repeat_length = PRIVATE_DATA(matchingpath + 1);
7843   repeat_type = PRIVATE_DATA(matchingpath + 2);
7844   repeat_count = PRIVATE_DATA(matchingpath + 3);
7845   SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
7846   if (repeat_type == OP_UPTO)
7847     ket = OP_KETRMAX;
7848   if (repeat_type == OP_MINUPTO)
7849     ket = OP_KETRMIN;
7850   }
7851 
7852 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
7853   {
7854   /* Drop this bracket_backtrack. */
7855   parent->top = backtrack->prev;
7856   return matchingpath + 1 + LINK_SIZE + repeat_length;
7857   }
7858 
7859 matchingpath = ccbegin + 1 + LINK_SIZE;
7860 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
7861 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
7862 cc += GET(cc, 1);
7863 
7864 has_alternatives = *cc == OP_ALT;
7865 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
7866   has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF || *matchingpath == OP_FAIL) ? FALSE : TRUE;
7867 
7868 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
7869   opcode = OP_SCOND;
7870 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
7871   opcode = OP_ONCE;
7872 
7873 if (opcode == OP_CBRA || opcode == OP_SCBRA)
7874   {
7875   /* Capturing brackets has a pre-allocated space. */
7876   offset = GET2(ccbegin, 1 + LINK_SIZE);
7877   if (common->optimized_cbracket[offset] == 0)
7878     {
7879     private_data_ptr = OVECTOR_PRIV(offset);
7880     offset <<= 1;
7881     }
7882   else
7883     {
7884     offset <<= 1;
7885     private_data_ptr = OVECTOR(offset);
7886     }
7887   BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
7888   matchingpath += IMM2_SIZE;
7889   }
7890 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
7891   {
7892   /* Other brackets simply allocate the next entry. */
7893   private_data_ptr = PRIVATE_DATA(ccbegin);
7894   SLJIT_ASSERT(private_data_ptr != 0);
7895   BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
7896   if (opcode == OP_ONCE)
7897     BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
7898   }
7899 
7900 /* Instructions before the first alternative. */
7901 stacksize = 0;
7902 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
7903   stacksize++;
7904 if (bra == OP_BRAZERO)
7905   stacksize++;
7906 
7907 if (stacksize > 0)
7908   allocate_stack(common, stacksize);
7909 
7910 stacksize = 0;
7911 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
7912   {
7913   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
7914   stacksize++;
7915   }
7916 
7917 if (bra == OP_BRAZERO)
7918   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
7919 
7920 if (bra == OP_BRAMINZERO)
7921   {
7922   /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
7923   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7924   if (ket != OP_KETRMIN)
7925     {
7926     free_stack(common, 1);
7927     braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7928     }
7929   else
7930     {
7931     if (opcode == OP_ONCE || opcode >= OP_SBRA)
7932       {
7933       jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7934       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
7935       /* Nothing stored during the first run. */
7936       skip = JUMP(SLJIT_JUMP);
7937       JUMPHERE(jump);
7938       /* Checking zero-length iteration. */
7939       if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
7940         {
7941         /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
7942         braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7943         }
7944       else
7945         {
7946         /* Except when the whole stack frame must be saved. */
7947         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7948         braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));
7949         }
7950       JUMPHERE(skip);
7951       }
7952     else
7953       {
7954       jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7955       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
7956       JUMPHERE(jump);
7957       }
7958     }
7959   }
7960 
7961 if (repeat_type != 0)
7962   {
7963   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
7964   if (repeat_type == OP_EXACT)
7965     rmax_label = LABEL();
7966   }
7967 
7968 if (ket == OP_KETRMIN)
7969   BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
7970 
7971 if (ket == OP_KETRMAX)
7972   {
7973   rmax_label = LABEL();
7974   if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0)
7975     BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
7976   }
7977 
7978 /* Handling capturing brackets and alternatives. */
7979 if (opcode == OP_ONCE)
7980   {
7981   stacksize = 0;
7982   if (needs_control_head)
7983     {
7984     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7985     stacksize++;
7986     }
7987 
7988   if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
7989     {
7990     /* Neither capturing brackets nor recursions are found in the block. */
7991     if (ket == OP_KETRMIN)
7992       {
7993       stacksize += 2;
7994       if (!needs_control_head)
7995         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7996       }
7997     else
7998       {
7999       if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
8000         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
8001       if (ket == OP_KETRMAX || has_alternatives)
8002         stacksize++;
8003       }
8004 
8005     if (stacksize > 0)
8006       allocate_stack(common, stacksize);
8007 
8008     stacksize = 0;
8009     if (needs_control_head)
8010       {
8011       stacksize++;
8012       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8013       }
8014 
8015     if (ket == OP_KETRMIN)
8016       {
8017       if (needs_control_head)
8018         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8019       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
8020       if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
8021         OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
8022       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
8023       }
8024     else if (ket == OP_KETRMAX || has_alternatives)
8025       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
8026     }
8027   else
8028     {
8029     if (ket != OP_KET || has_alternatives)
8030       stacksize++;
8031 
8032     stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
8033     allocate_stack(common, stacksize);
8034 
8035     if (needs_control_head)
8036       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8037 
8038     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8039     OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
8040 
8041     stacksize = needs_control_head ? 1 : 0;
8042     if (ket != OP_KET || has_alternatives)
8043       {
8044       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
8045       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
8046       stacksize++;
8047       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
8048       }
8049     else
8050       {
8051       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
8052       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
8053       }
8054     init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);
8055     }
8056   }
8057 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
8058   {
8059   /* Saving the previous values. */
8060   if (common->optimized_cbracket[offset >> 1] != 0)
8061     {
8062     SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
8063     allocate_stack(common, 2);
8064     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8065     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
8066     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
8067     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
8068     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
8069     }
8070   else
8071     {
8072     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8073     allocate_stack(common, 1);
8074     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
8075     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8076     }
8077   }
8078 else if (opcode == OP_SBRA || opcode == OP_SCOND)
8079   {
8080   /* Saving the previous value. */
8081   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8082   allocate_stack(common, 1);
8083   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
8084   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8085   }
8086 else if (has_alternatives)
8087   {
8088   /* Pushing the starting string pointer. */
8089   allocate_stack(common, 1);
8090   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8091   }
8092 
8093 /* Generating code for the first alternative. */
8094 if (opcode == OP_COND || opcode == OP_SCOND)
8095   {
8096   if (*matchingpath == OP_CREF)
8097     {
8098     SLJIT_ASSERT(has_alternatives);
8099     add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
8100       CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
8101     matchingpath += 1 + IMM2_SIZE;
8102     }
8103   else if (*matchingpath == OP_DNCREF)
8104     {
8105     SLJIT_ASSERT(has_alternatives);
8106 
8107     i = GET2(matchingpath, 1 + IMM2_SIZE);
8108     slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
8109     OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
8110     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
8111     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
8112     slot += common->name_entry_size;
8113     i--;
8114     while (i-- > 0)
8115       {
8116       OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
8117       OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);
8118       slot += common->name_entry_size;
8119       }
8120     OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
8121     add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
8122     matchingpath += 1 + 2 * IMM2_SIZE;
8123     }
8124   else if (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF || *matchingpath == OP_FAIL)
8125     {
8126     /* Never has other case. */
8127     BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
8128     SLJIT_ASSERT(!has_alternatives);
8129 
8130     if (*matchingpath == OP_FAIL)
8131       stacksize = 0;
8132     else if (*matchingpath == OP_RREF)
8133       {
8134       stacksize = GET2(matchingpath, 1);
8135       if (common->currententry == NULL)
8136         stacksize = 0;
8137       else if (stacksize == RREF_ANY)
8138         stacksize = 1;
8139       else if (common->currententry->start == 0)
8140         stacksize = stacksize == 0;
8141       else
8142         stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
8143 
8144       if (stacksize != 0)
8145         matchingpath += 1 + IMM2_SIZE;
8146       }
8147     else
8148       {
8149       if (common->currententry == NULL || common->currententry->start == 0)
8150         stacksize = 0;
8151       else
8152         {
8153         stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
8154         slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
8155         i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
8156         while (stacksize > 0)
8157           {
8158           if ((int)GET2(slot, 0) == i)
8159             break;
8160           slot += common->name_entry_size;
8161           stacksize--;
8162           }
8163         }
8164 
8165       if (stacksize != 0)
8166         matchingpath += 1 + 2 * IMM2_SIZE;
8167       }
8168 
8169       /* The stacksize == 0 is a common "else" case. */
8170       if (stacksize == 0)
8171         {
8172         if (*cc == OP_ALT)
8173           {
8174           matchingpath = cc + 1 + LINK_SIZE;
8175           cc += GET(cc, 1);
8176           }
8177         else
8178           matchingpath = cc;
8179         }
8180     }
8181   else
8182     {
8183     SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
8184     /* Similar code as PUSH_BACKTRACK macro. */
8185     assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
8186     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8187       return NULL;
8188     memset(assert, 0, sizeof(assert_backtrack));
8189     assert->common.cc = matchingpath;
8190     BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
8191     matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
8192     }
8193   }
8194 
8195 compile_matchingpath(common, matchingpath, cc, backtrack);
8196 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8197   return NULL;
8198 
8199 if (opcode == OP_ONCE)
8200   match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
8201 
8202 stacksize = 0;
8203 if (repeat_type == OP_MINUPTO)
8204   {
8205   /* We need to preserve the counter. TMP2 will be used below. */
8206   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
8207   stacksize++;
8208   }
8209 if (ket != OP_KET || bra != OP_BRA)
8210   stacksize++;
8211 if (offset != 0)
8212   {
8213   if (common->capture_last_ptr != 0)
8214     stacksize++;
8215   if (common->optimized_cbracket[offset >> 1] == 0)
8216     stacksize += 2;
8217   }
8218 if (has_alternatives && opcode != OP_ONCE)
8219   stacksize++;
8220 
8221 if (stacksize > 0)
8222   allocate_stack(common, stacksize);
8223 
8224 stacksize = 0;
8225 if (repeat_type == OP_MINUPTO)
8226   {
8227   /* TMP2 was set above. */
8228   OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
8229   stacksize++;
8230   }
8231 
8232 if (ket != OP_KET || bra != OP_BRA)
8233   {
8234   if (ket != OP_KET)
8235     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
8236   else
8237     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
8238   stacksize++;
8239   }
8240 
8241 if (offset != 0)
8242   stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
8243 
8244 if (has_alternatives)
8245   {
8246   if (opcode != OP_ONCE)
8247     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
8248   if (ket != OP_KETRMAX)
8249     BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
8250   }
8251 
8252 /* Must be after the matchingpath label. */
8253 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
8254   {
8255   SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
8256   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
8257   }
8258 
8259 if (ket == OP_KETRMAX)
8260   {
8261   if (repeat_type != 0)
8262     {
8263     if (has_alternatives)
8264       BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
8265     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
8266     JUMPTO(SLJIT_NOT_ZERO, rmax_label);
8267     /* Drop STR_PTR for greedy plus quantifier. */
8268     if (opcode != OP_ONCE)
8269       free_stack(common, 1);
8270     }
8271   else if (opcode == OP_ONCE || opcode >= OP_SBRA)
8272     {
8273     if (has_alternatives)
8274       BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
8275     /* Checking zero-length iteration. */
8276     if (opcode != OP_ONCE)
8277       {
8278       CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
8279       /* Drop STR_PTR for greedy plus quantifier. */
8280       if (bra != OP_BRAZERO)
8281         free_stack(common, 1);
8282       }
8283     else
8284       /* TMP2 must contain the starting STR_PTR. */
8285       CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
8286     }
8287   else
8288     JUMPTO(SLJIT_JUMP, rmax_label);
8289   BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
8290   }
8291 
8292 if (repeat_type == OP_EXACT)
8293   {
8294   count_match(common);
8295   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
8296   JUMPTO(SLJIT_NOT_ZERO, rmax_label);
8297   }
8298 else if (repeat_type == OP_UPTO)
8299   {
8300   /* We need to preserve the counter. */
8301   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
8302   allocate_stack(common, 1);
8303   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8304   }
8305 
8306 if (bra == OP_BRAZERO)
8307   BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
8308 
8309 if (bra == OP_BRAMINZERO)
8310   {
8311   /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
8312   JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
8313   if (braminzero != NULL)
8314     {
8315     JUMPHERE(braminzero);
8316     /* We need to release the end pointer to perform the
8317     backtrack for the zero-length iteration. When
8318     framesize is < 0, OP_ONCE will do the release itself. */
8319     if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
8320       {
8321       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8322       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
8323       }
8324     else if (ket == OP_KETRMIN && opcode != OP_ONCE)
8325       free_stack(common, 1);
8326     }
8327   /* Continue to the normal backtrack. */
8328   }
8329 
8330 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
8331   count_match(common);
8332 
8333 /* Skip the other alternatives. */
8334 while (*cc == OP_ALT)
8335   cc += GET(cc, 1);
8336 cc += 1 + LINK_SIZE;
8337 
8338 if (opcode == OP_ONCE)
8339   {
8340   /* We temporarily encode the needs_control_head in the lowest bit.
8341      Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns
8342      the same value for small signed numbers (including negative numbers). */
8343   BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
8344   }
8345 return cc + repeat_length;
8346 }
8347 
compile_bracketpos_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)8348 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
8349 {
8350 DEFINE_COMPILER;
8351 backtrack_common *backtrack;
8352 pcre_uchar opcode;
8353 int private_data_ptr;
8354 int cbraprivptr = 0;
8355 BOOL needs_control_head;
8356 int framesize;
8357 int stacksize;
8358 int offset = 0;
8359 BOOL zero = FALSE;
8360 pcre_uchar *ccbegin = NULL;
8361 int stack; /* Also contains the offset of control head. */
8362 struct sljit_label *loop = NULL;
8363 struct jump_list *emptymatch = NULL;
8364 
8365 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
8366 if (*cc == OP_BRAPOSZERO)
8367   {
8368   zero = TRUE;
8369   cc++;
8370   }
8371 
8372 opcode = *cc;
8373 private_data_ptr = PRIVATE_DATA(cc);
8374 SLJIT_ASSERT(private_data_ptr != 0);
8375 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
8376 switch(opcode)
8377   {
8378   case OP_BRAPOS:
8379   case OP_SBRAPOS:
8380   ccbegin = cc + 1 + LINK_SIZE;
8381   break;
8382 
8383   case OP_CBRAPOS:
8384   case OP_SCBRAPOS:
8385   offset = GET2(cc, 1 + LINK_SIZE);
8386   /* This case cannot be optimized in the same was as
8387   normal capturing brackets. */
8388   SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
8389   cbraprivptr = OVECTOR_PRIV(offset);
8390   offset <<= 1;
8391   ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
8392   break;
8393 
8394   default:
8395   SLJIT_UNREACHABLE();
8396   break;
8397   }
8398 
8399 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
8400 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
8401 if (framesize < 0)
8402   {
8403   if (offset != 0)
8404     {
8405     stacksize = 2;
8406     if (common->capture_last_ptr != 0)
8407       stacksize++;
8408     }
8409   else
8410     stacksize = 1;
8411 
8412   if (needs_control_head)
8413     stacksize++;
8414   if (!zero)
8415     stacksize++;
8416 
8417   BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
8418   allocate_stack(common, stacksize);
8419   if (framesize == no_frame)
8420     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
8421 
8422   stack = 0;
8423   if (offset != 0)
8424     {
8425     stack = 2;
8426     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
8427     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
8428     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
8429     if (common->capture_last_ptr != 0)
8430       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
8431     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
8432     if (needs_control_head)
8433       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8434     if (common->capture_last_ptr != 0)
8435       {
8436       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
8437       stack = 3;
8438       }
8439     }
8440   else
8441     {
8442     if (needs_control_head)
8443       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8444     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8445     stack = 1;
8446     }
8447 
8448   if (needs_control_head)
8449     stack++;
8450   if (!zero)
8451     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
8452   if (needs_control_head)
8453     {
8454     stack--;
8455     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
8456     }
8457   }
8458 else
8459   {
8460   stacksize = framesize + 1;
8461   if (!zero)
8462     stacksize++;
8463   if (needs_control_head)
8464     stacksize++;
8465   if (offset == 0)
8466     stacksize++;
8467   BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
8468 
8469   allocate_stack(common, stacksize);
8470   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8471   if (needs_control_head)
8472     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8473   OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
8474 
8475   stack = 0;
8476   if (!zero)
8477     {
8478     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
8479     stack = 1;
8480     }
8481   if (needs_control_head)
8482     {
8483     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
8484     stack++;
8485     }
8486   if (offset == 0)
8487     {
8488     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
8489     stack++;
8490     }
8491   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
8492   init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize, FALSE);
8493   stack -= 1 + (offset == 0);
8494   }
8495 
8496 if (offset != 0)
8497   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
8498 
8499 loop = LABEL();
8500 while (*cc != OP_KETRPOS)
8501   {
8502   backtrack->top = NULL;
8503   backtrack->topbacktracks = NULL;
8504   cc += GET(cc, 1);
8505 
8506   compile_matchingpath(common, ccbegin, cc, backtrack);
8507   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8508     return NULL;
8509 
8510   if (framesize < 0)
8511     {
8512     if (framesize == no_frame)
8513       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8514 
8515     if (offset != 0)
8516       {
8517       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
8518       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
8519       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
8520       if (common->capture_last_ptr != 0)
8521         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
8522       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
8523       }
8524     else
8525       {
8526       if (opcode == OP_SBRAPOS)
8527         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8528       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8529       }
8530 
8531     /* Even if the match is empty, we need to reset the control head. */
8532     if (needs_control_head)
8533       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
8534 
8535     if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
8536       add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
8537 
8538     if (!zero)
8539       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
8540     }
8541   else
8542     {
8543     if (offset != 0)
8544       {
8545       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
8546       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
8547       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
8548       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
8549       if (common->capture_last_ptr != 0)
8550         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
8551       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
8552       }
8553     else
8554       {
8555       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8556       OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
8557       if (opcode == OP_SBRAPOS)
8558         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
8559       OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0);
8560       }
8561 
8562     /* Even if the match is empty, we need to reset the control head. */
8563     if (needs_control_head)
8564       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
8565 
8566     if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
8567       add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
8568 
8569     if (!zero)
8570       {
8571       if (framesize < 0)
8572         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
8573       else
8574         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8575       }
8576     }
8577 
8578   JUMPTO(SLJIT_JUMP, loop);
8579   flush_stubs(common);
8580 
8581   compile_backtrackingpath(common, backtrack->top);
8582   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8583     return NULL;
8584   set_jumps(backtrack->topbacktracks, LABEL());
8585 
8586   if (framesize < 0)
8587     {
8588     if (offset != 0)
8589       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
8590     else
8591       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8592     }
8593   else
8594     {
8595     if (offset != 0)
8596       {
8597       /* Last alternative. */
8598       if (*cc == OP_KETRPOS)
8599         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8600       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
8601       }
8602     else
8603       {
8604       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8605       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
8606       }
8607     }
8608 
8609   if (*cc == OP_KETRPOS)
8610     break;
8611   ccbegin = cc + 1 + LINK_SIZE;
8612   }
8613 
8614 /* We don't have to restore the control head in case of a failed match. */
8615 
8616 backtrack->topbacktracks = NULL;
8617 if (!zero)
8618   {
8619   if (framesize < 0)
8620     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
8621   else /* TMP2 is set to [private_data_ptr] above. */
8622     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));
8623   }
8624 
8625 /* None of them matched. */
8626 set_jumps(emptymatch, LABEL());
8627 count_match(common);
8628 return cc + 1 + LINK_SIZE;
8629 }
8630 
get_iterator_parameters(compiler_common * common,pcre_uchar * cc,pcre_uchar * opcode,pcre_uchar * type,sljit_u32 * max,sljit_u32 * exact,pcre_uchar ** end)8631 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, sljit_u32 *max, sljit_u32 *exact, pcre_uchar **end)
8632 {
8633 int class_len;
8634 
8635 *opcode = *cc;
8636 *exact = 0;
8637 
8638 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
8639   {
8640   cc++;
8641   *type = OP_CHAR;
8642   }
8643 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
8644   {
8645   cc++;
8646   *type = OP_CHARI;
8647   *opcode -= OP_STARI - OP_STAR;
8648   }
8649 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
8650   {
8651   cc++;
8652   *type = OP_NOT;
8653   *opcode -= OP_NOTSTAR - OP_STAR;
8654   }
8655 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
8656   {
8657   cc++;
8658   *type = OP_NOTI;
8659   *opcode -= OP_NOTSTARI - OP_STAR;
8660   }
8661 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
8662   {
8663   cc++;
8664   *opcode -= OP_TYPESTAR - OP_STAR;
8665   *type = OP_END;
8666   }
8667 else
8668   {
8669   SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
8670   *type = *opcode;
8671   cc++;
8672   class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
8673   *opcode = cc[class_len - 1];
8674 
8675   if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
8676     {
8677     *opcode -= OP_CRSTAR - OP_STAR;
8678     *end = cc + class_len;
8679 
8680     if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
8681       {
8682       *exact = 1;
8683       *opcode -= OP_PLUS - OP_STAR;
8684       }
8685     }
8686   else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
8687     {
8688     *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
8689     *end = cc + class_len;
8690 
8691     if (*opcode == OP_POSPLUS)
8692       {
8693       *exact = 1;
8694       *opcode = OP_POSSTAR;
8695       }
8696     }
8697   else
8698     {
8699     SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
8700     *max = GET2(cc, (class_len + IMM2_SIZE));
8701     *exact = GET2(cc, class_len);
8702 
8703     if (*max == 0)
8704       {
8705       if (*opcode == OP_CRPOSRANGE)
8706         *opcode = OP_POSSTAR;
8707       else
8708         *opcode -= OP_CRRANGE - OP_STAR;
8709       }
8710     else
8711       {
8712       *max -= *exact;
8713       if (*max == 0)
8714         *opcode = OP_EXACT;
8715       else if (*max == 1)
8716         {
8717         if (*opcode == OP_CRPOSRANGE)
8718           *opcode = OP_POSQUERY;
8719         else
8720           *opcode -= OP_CRRANGE - OP_QUERY;
8721         }
8722       else
8723         {
8724         if (*opcode == OP_CRPOSRANGE)
8725           *opcode = OP_POSUPTO;
8726         else
8727           *opcode -= OP_CRRANGE - OP_UPTO;
8728         }
8729       }
8730     *end = cc + class_len + 2 * IMM2_SIZE;
8731     }
8732   return cc;
8733   }
8734 
8735 switch(*opcode)
8736   {
8737   case OP_EXACT:
8738   *exact = GET2(cc, 0);
8739   cc += IMM2_SIZE;
8740   break;
8741 
8742   case OP_PLUS:
8743   case OP_MINPLUS:
8744   *exact = 1;
8745   *opcode -= OP_PLUS - OP_STAR;
8746   break;
8747 
8748   case OP_POSPLUS:
8749   *exact = 1;
8750   *opcode = OP_POSSTAR;
8751   break;
8752 
8753   case OP_UPTO:
8754   case OP_MINUPTO:
8755   case OP_POSUPTO:
8756   *max = GET2(cc, 0);
8757   cc += IMM2_SIZE;
8758   break;
8759   }
8760 
8761 if (*type == OP_END)
8762   {
8763   *type = *cc;
8764   *end = next_opcode(common, cc);
8765   cc++;
8766   return cc;
8767   }
8768 
8769 *end = cc + 1;
8770 #ifdef SUPPORT_UTF
8771 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
8772 #endif
8773 return cc;
8774 }
8775 
compile_iterator_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)8776 static pcre_uchar *compile_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
8777 {
8778 DEFINE_COMPILER;
8779 backtrack_common *backtrack;
8780 pcre_uchar opcode;
8781 pcre_uchar type;
8782 sljit_u32 max = 0, exact;
8783 BOOL fast_fail;
8784 sljit_s32 fast_str_ptr;
8785 BOOL charpos_enabled;
8786 pcre_uchar charpos_char;
8787 unsigned int charpos_othercasebit;
8788 pcre_uchar *end;
8789 jump_list *no_match = NULL;
8790 jump_list *no_char1_match = NULL;
8791 struct sljit_jump *jump = NULL;
8792 struct sljit_label *label;
8793 int private_data_ptr = PRIVATE_DATA(cc);
8794 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
8795 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
8796 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
8797 int tmp_base, tmp_offset;
8798 
8799 PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL);
8800 
8801 fast_str_ptr = PRIVATE_DATA(cc + 1);
8802 fast_fail = TRUE;
8803 
8804 SLJIT_ASSERT(common->fast_forward_bc_ptr == NULL || fast_str_ptr == 0 || cc == common->fast_forward_bc_ptr);
8805 
8806 if (cc == common->fast_forward_bc_ptr)
8807   fast_fail = FALSE;
8808 else if (common->fast_fail_start_ptr == 0)
8809   fast_str_ptr = 0;
8810 
8811 SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || fast_str_ptr == 0
8812   || (fast_str_ptr >= common->fast_fail_start_ptr && fast_str_ptr <= common->fast_fail_end_ptr));
8813 
8814 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
8815 
8816 if (type != OP_EXTUNI)
8817   {
8818   tmp_base = TMP3;
8819   tmp_offset = 0;
8820   }
8821 else
8822   {
8823   tmp_base = SLJIT_MEM1(SLJIT_SP);
8824   tmp_offset = POSSESSIVE0;
8825   }
8826 
8827 if (fast_fail && fast_str_ptr != 0)
8828   add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), fast_str_ptr));
8829 
8830 /* Handle fixed part first. */
8831 if (exact > 1)
8832   {
8833   SLJIT_ASSERT(fast_str_ptr == 0);
8834   if (common->mode == JIT_COMPILE
8835 #ifdef SUPPORT_UTF
8836       && !common->utf
8837 #endif
8838       )
8839     {
8840     OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
8841     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
8842     OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
8843     label = LABEL();
8844     compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
8845     OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
8846     JUMPTO(SLJIT_NOT_ZERO, label);
8847     }
8848   else
8849     {
8850     OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
8851     label = LABEL();
8852     compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
8853     OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
8854     JUMPTO(SLJIT_NOT_ZERO, label);
8855     }
8856   }
8857 else if (exact == 1)
8858   compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
8859 
8860 switch(opcode)
8861   {
8862   case OP_STAR:
8863   case OP_UPTO:
8864   SLJIT_ASSERT(fast_str_ptr == 0 || opcode == OP_STAR);
8865 
8866   if (type == OP_ANYNL || type == OP_EXTUNI)
8867     {
8868     SLJIT_ASSERT(private_data_ptr == 0);
8869     SLJIT_ASSERT(fast_str_ptr == 0);
8870 
8871     allocate_stack(common, 2);
8872     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8873     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
8874 
8875     if (opcode == OP_UPTO)
8876       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max);
8877 
8878     label = LABEL();
8879     compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
8880     if (opcode == OP_UPTO)
8881       {
8882       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
8883       OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8884       jump = JUMP(SLJIT_ZERO);
8885       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
8886       }
8887 
8888     /* We cannot use TMP3 because of this allocate_stack. */
8889     allocate_stack(common, 1);
8890     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8891     JUMPTO(SLJIT_JUMP, label);
8892     if (jump != NULL)
8893       JUMPHERE(jump);
8894     }
8895   else
8896     {
8897     charpos_enabled = FALSE;
8898     charpos_char = 0;
8899     charpos_othercasebit = 0;
8900 
8901     if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
8902       {
8903       charpos_enabled = TRUE;
8904 #ifdef SUPPORT_UTF
8905       charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
8906 #endif
8907       if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
8908         {
8909         charpos_othercasebit = char_get_othercase_bit(common, end + 1);
8910         if (charpos_othercasebit == 0)
8911           charpos_enabled = FALSE;
8912         }
8913 
8914       if (charpos_enabled)
8915         {
8916         charpos_char = end[1];
8917         /* Consumpe the OP_CHAR opcode. */
8918         end += 2;
8919 #if defined COMPILE_PCRE8
8920         SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
8921 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
8922         SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
8923         if ((charpos_othercasebit & 0x100) != 0)
8924           charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
8925 #endif
8926         if (charpos_othercasebit != 0)
8927           charpos_char |= charpos_othercasebit;
8928 
8929         BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE;
8930         BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char;
8931         BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit;
8932         }
8933       }
8934 
8935     if (charpos_enabled)
8936       {
8937       if (opcode == OP_UPTO)
8938         OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1);
8939 
8940       /* Search the first instance of charpos_char. */
8941       jump = JUMP(SLJIT_JUMP);
8942       label = LABEL();
8943       if (opcode == OP_UPTO)
8944         {
8945         OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
8946         add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO));
8947         }
8948       compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
8949       if (fast_str_ptr != 0)
8950         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
8951       JUMPHERE(jump);
8952 
8953       detect_partial_match(common, &backtrack->topbacktracks);
8954       OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8955       if (charpos_othercasebit != 0)
8956         OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
8957       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
8958 
8959       if (private_data_ptr == 0)
8960         allocate_stack(common, 2);
8961       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8962       OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
8963       if (opcode == OP_UPTO)
8964         {
8965         OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
8966         add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
8967         }
8968 
8969       /* Search the last instance of charpos_char. */
8970       label = LABEL();
8971       compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
8972       if (fast_str_ptr != 0)
8973         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
8974       detect_partial_match(common, &no_match);
8975       OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8976       if (charpos_othercasebit != 0)
8977         OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
8978       if (opcode == OP_STAR)
8979         {
8980         CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
8981         OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8982         }
8983       else
8984         {
8985         jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char);
8986         OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8987         JUMPHERE(jump);
8988         }
8989 
8990       if (opcode == OP_UPTO)
8991         {
8992         OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
8993         JUMPTO(SLJIT_NOT_ZERO, label);
8994         }
8995       else
8996         JUMPTO(SLJIT_JUMP, label);
8997 
8998       set_jumps(no_match, LABEL());
8999       OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9000       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9001       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9002       }
9003 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
9004     else if (common->utf)
9005       {
9006       if (private_data_ptr == 0)
9007         allocate_stack(common, 2);
9008 
9009       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9010       OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
9011 
9012       if (opcode == OP_UPTO)
9013         OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
9014 
9015       label = LABEL();
9016       compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
9017       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9018 
9019       if (opcode == OP_UPTO)
9020         {
9021         OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9022         JUMPTO(SLJIT_NOT_ZERO, label);
9023         }
9024       else
9025         JUMPTO(SLJIT_JUMP, label);
9026 
9027       set_jumps(no_match, LABEL());
9028       OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9029       if (fast_str_ptr != 0)
9030         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9031       }
9032 #endif
9033     else
9034       {
9035       if (private_data_ptr == 0)
9036         allocate_stack(common, 2);
9037 
9038       OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
9039       if (opcode == OP_UPTO)
9040         OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
9041 
9042       label = LABEL();
9043       detect_partial_match(common, &no_match);
9044       compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
9045       if (opcode == OP_UPTO)
9046         {
9047         OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9048         JUMPTO(SLJIT_NOT_ZERO, label);
9049         OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9050         }
9051       else
9052         JUMPTO(SLJIT_JUMP, label);
9053 
9054       set_jumps(no_char1_match, LABEL());
9055       OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9056       set_jumps(no_match, LABEL());
9057       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9058       if (fast_str_ptr != 0)
9059         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9060       }
9061     }
9062   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
9063   break;
9064 
9065   case OP_MINSTAR:
9066   if (private_data_ptr == 0)
9067     allocate_stack(common, 1);
9068   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9069   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
9070   if (fast_str_ptr != 0)
9071     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9072   break;
9073 
9074   case OP_MINUPTO:
9075   SLJIT_ASSERT(fast_str_ptr == 0);
9076   if (private_data_ptr == 0)
9077     allocate_stack(common, 2);
9078   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9079   OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
9080   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
9081   break;
9082 
9083   case OP_QUERY:
9084   case OP_MINQUERY:
9085   SLJIT_ASSERT(fast_str_ptr == 0);
9086   if (private_data_ptr == 0)
9087     allocate_stack(common, 1);
9088   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9089   if (opcode == OP_QUERY)
9090     compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
9091   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
9092   break;
9093 
9094   case OP_EXACT:
9095   break;
9096 
9097   case OP_POSSTAR:
9098 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
9099   if (common->utf)
9100     {
9101     OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
9102     label = LABEL();
9103     compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
9104     OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
9105     JUMPTO(SLJIT_JUMP, label);
9106     set_jumps(no_match, LABEL());
9107     OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
9108     if (fast_str_ptr != 0)
9109       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9110     break;
9111     }
9112 #endif
9113   label = LABEL();
9114   detect_partial_match(common, &no_match);
9115   compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
9116   JUMPTO(SLJIT_JUMP, label);
9117   set_jumps(no_char1_match, LABEL());
9118   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9119   set_jumps(no_match, LABEL());
9120   if (fast_str_ptr != 0)
9121     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9122   break;
9123 
9124   case OP_POSUPTO:
9125   SLJIT_ASSERT(fast_str_ptr == 0);
9126 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
9127   if (common->utf)
9128     {
9129     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
9130     OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
9131     label = LABEL();
9132     compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
9133     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
9134     OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9135     JUMPTO(SLJIT_NOT_ZERO, label);
9136     set_jumps(no_match, LABEL());
9137     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
9138     break;
9139     }
9140 #endif
9141   OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
9142   label = LABEL();
9143   detect_partial_match(common, &no_match);
9144   compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
9145   OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9146   JUMPTO(SLJIT_NOT_ZERO, label);
9147   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9148   set_jumps(no_char1_match, LABEL());
9149   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9150   set_jumps(no_match, LABEL());
9151   break;
9152 
9153   case OP_POSQUERY:
9154   SLJIT_ASSERT(fast_str_ptr == 0);
9155   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
9156   compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
9157   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
9158   set_jumps(no_match, LABEL());
9159   OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
9160   break;
9161 
9162   default:
9163   SLJIT_UNREACHABLE();
9164   break;
9165   }
9166 
9167 count_match(common);
9168 return end;
9169 }
9170 
compile_fail_accept_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)9171 static SLJIT_INLINE pcre_uchar *compile_fail_accept_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
9172 {
9173 DEFINE_COMPILER;
9174 backtrack_common *backtrack;
9175 
9176 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
9177 
9178 if (*cc == OP_FAIL)
9179   {
9180   add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
9181   return cc + 1;
9182   }
9183 
9184 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
9185   {
9186   /* No need to check notempty conditions. */
9187   if (common->accept_label == NULL)
9188     add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
9189   else
9190     JUMPTO(SLJIT_JUMP, common->accept_label);
9191   return cc + 1;
9192   }
9193 
9194 if (common->accept_label == NULL)
9195   add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
9196 else
9197   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
9198 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9199 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
9200 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9201 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
9202 if (common->accept_label == NULL)
9203   add_jump(compiler, &common->accept, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9204 else
9205   CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->accept_label);
9206 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
9207 if (common->accept_label == NULL)
9208   add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
9209 else
9210   CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
9211 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
9212 return cc + 1;
9213 }
9214 
compile_close_matchingpath(compiler_common * common,pcre_uchar * cc)9215 static SLJIT_INLINE pcre_uchar *compile_close_matchingpath(compiler_common *common, pcre_uchar *cc)
9216 {
9217 DEFINE_COMPILER;
9218 int offset = GET2(cc, 1);
9219 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
9220 
9221 /* Data will be discarded anyway... */
9222 if (common->currententry != NULL)
9223   return cc + 1 + IMM2_SIZE;
9224 
9225 if (!optimized_cbracket)
9226   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
9227 offset <<= 1;
9228 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
9229 if (!optimized_cbracket)
9230   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
9231 return cc + 1 + IMM2_SIZE;
9232 }
9233 
compile_control_verb_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)9234 static SLJIT_INLINE pcre_uchar *compile_control_verb_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
9235 {
9236 DEFINE_COMPILER;
9237 backtrack_common *backtrack;
9238 pcre_uchar opcode = *cc;
9239 pcre_uchar *ccend = cc + 1;
9240 
9241 if (opcode == OP_PRUNE_ARG || opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
9242   ccend += 2 + cc[1];
9243 
9244 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
9245 
9246 if (opcode == OP_SKIP)
9247   {
9248   allocate_stack(common, 1);
9249   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9250   return ccend;
9251   }
9252 
9253 if (opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
9254   {
9255   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9256   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
9257   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
9258   OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
9259   }
9260 
9261 return ccend;
9262 }
9263 
9264 static pcre_uchar then_trap_opcode[1] = { OP_THEN_TRAP };
9265 
compile_then_trap_matchingpath(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,backtrack_common * parent)9266 static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
9267 {
9268 DEFINE_COMPILER;
9269 backtrack_common *backtrack;
9270 BOOL needs_control_head;
9271 int size;
9272 
9273 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
9274 common->then_trap = BACKTRACK_AS(then_trap_backtrack);
9275 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
9276 BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
9277 BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
9278 
9279 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
9280 size = 3 + (size < 0 ? 0 : size);
9281 
9282 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9283 allocate_stack(common, size);
9284 if (size > 3)
9285   OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
9286 else
9287   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
9288 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
9289 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
9290 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
9291 
9292 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
9293 if (size >= 0)
9294   init_frame(common, cc, ccend, size - 1, 0, FALSE);
9295 }
9296 
compile_matchingpath(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,backtrack_common * parent)9297 static void compile_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
9298 {
9299 DEFINE_COMPILER;
9300 backtrack_common *backtrack;
9301 BOOL has_then_trap = FALSE;
9302 then_trap_backtrack *save_then_trap = NULL;
9303 
9304 SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
9305 
9306 if (common->has_then && common->then_offsets[cc - common->start] != 0)
9307   {
9308   SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
9309   has_then_trap = TRUE;
9310   save_then_trap = common->then_trap;
9311   /* Tail item on backtrack. */
9312   compile_then_trap_matchingpath(common, cc, ccend, parent);
9313   }
9314 
9315 while (cc < ccend)
9316   {
9317   switch(*cc)
9318     {
9319     case OP_SOD:
9320     case OP_SOM:
9321     case OP_NOT_WORD_BOUNDARY:
9322     case OP_WORD_BOUNDARY:
9323     case OP_EODN:
9324     case OP_EOD:
9325     case OP_DOLL:
9326     case OP_DOLLM:
9327     case OP_CIRC:
9328     case OP_CIRCM:
9329     case OP_REVERSE:
9330     cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
9331     break;
9332 
9333     case OP_NOT_DIGIT:
9334     case OP_DIGIT:
9335     case OP_NOT_WHITESPACE:
9336     case OP_WHITESPACE:
9337     case OP_NOT_WORDCHAR:
9338     case OP_WORDCHAR:
9339     case OP_ANY:
9340     case OP_ALLANY:
9341     case OP_ANYBYTE:
9342     case OP_NOTPROP:
9343     case OP_PROP:
9344     case OP_ANYNL:
9345     case OP_NOT_HSPACE:
9346     case OP_HSPACE:
9347     case OP_NOT_VSPACE:
9348     case OP_VSPACE:
9349     case OP_EXTUNI:
9350     case OP_NOT:
9351     case OP_NOTI:
9352     cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
9353     break;
9354 
9355     case OP_SET_SOM:
9356     PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
9357     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
9358     allocate_stack(common, 1);
9359     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
9360     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
9361     cc++;
9362     break;
9363 
9364     case OP_CHAR:
9365     case OP_CHARI:
9366     if (common->mode == JIT_COMPILE)
9367       cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
9368     else
9369       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
9370     break;
9371 
9372     case OP_STAR:
9373     case OP_MINSTAR:
9374     case OP_PLUS:
9375     case OP_MINPLUS:
9376     case OP_QUERY:
9377     case OP_MINQUERY:
9378     case OP_UPTO:
9379     case OP_MINUPTO:
9380     case OP_EXACT:
9381     case OP_POSSTAR:
9382     case OP_POSPLUS:
9383     case OP_POSQUERY:
9384     case OP_POSUPTO:
9385     case OP_STARI:
9386     case OP_MINSTARI:
9387     case OP_PLUSI:
9388     case OP_MINPLUSI:
9389     case OP_QUERYI:
9390     case OP_MINQUERYI:
9391     case OP_UPTOI:
9392     case OP_MINUPTOI:
9393     case OP_EXACTI:
9394     case OP_POSSTARI:
9395     case OP_POSPLUSI:
9396     case OP_POSQUERYI:
9397     case OP_POSUPTOI:
9398     case OP_NOTSTAR:
9399     case OP_NOTMINSTAR:
9400     case OP_NOTPLUS:
9401     case OP_NOTMINPLUS:
9402     case OP_NOTQUERY:
9403     case OP_NOTMINQUERY:
9404     case OP_NOTUPTO:
9405     case OP_NOTMINUPTO:
9406     case OP_NOTEXACT:
9407     case OP_NOTPOSSTAR:
9408     case OP_NOTPOSPLUS:
9409     case OP_NOTPOSQUERY:
9410     case OP_NOTPOSUPTO:
9411     case OP_NOTSTARI:
9412     case OP_NOTMINSTARI:
9413     case OP_NOTPLUSI:
9414     case OP_NOTMINPLUSI:
9415     case OP_NOTQUERYI:
9416     case OP_NOTMINQUERYI:
9417     case OP_NOTUPTOI:
9418     case OP_NOTMINUPTOI:
9419     case OP_NOTEXACTI:
9420     case OP_NOTPOSSTARI:
9421     case OP_NOTPOSPLUSI:
9422     case OP_NOTPOSQUERYI:
9423     case OP_NOTPOSUPTOI:
9424     case OP_TYPESTAR:
9425     case OP_TYPEMINSTAR:
9426     case OP_TYPEPLUS:
9427     case OP_TYPEMINPLUS:
9428     case OP_TYPEQUERY:
9429     case OP_TYPEMINQUERY:
9430     case OP_TYPEUPTO:
9431     case OP_TYPEMINUPTO:
9432     case OP_TYPEEXACT:
9433     case OP_TYPEPOSSTAR:
9434     case OP_TYPEPOSPLUS:
9435     case OP_TYPEPOSQUERY:
9436     case OP_TYPEPOSUPTO:
9437     cc = compile_iterator_matchingpath(common, cc, parent);
9438     break;
9439 
9440     case OP_CLASS:
9441     case OP_NCLASS:
9442     if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRPOSRANGE)
9443       cc = compile_iterator_matchingpath(common, cc, parent);
9444     else
9445       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
9446     break;
9447 
9448 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
9449     case OP_XCLASS:
9450     if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
9451       cc = compile_iterator_matchingpath(common, cc, parent);
9452     else
9453       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
9454     break;
9455 #endif
9456 
9457     case OP_REF:
9458     case OP_REFI:
9459     if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
9460       cc = compile_ref_iterator_matchingpath(common, cc, parent);
9461     else
9462       {
9463       compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
9464       cc += 1 + IMM2_SIZE;
9465       }
9466     break;
9467 
9468     case OP_DNREF:
9469     case OP_DNREFI:
9470     if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
9471       cc = compile_ref_iterator_matchingpath(common, cc, parent);
9472     else
9473       {
9474       compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
9475       compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
9476       cc += 1 + 2 * IMM2_SIZE;
9477       }
9478     break;
9479 
9480     case OP_RECURSE:
9481     cc = compile_recurse_matchingpath(common, cc, parent);
9482     break;
9483 
9484     case OP_CALLOUT:
9485     cc = compile_callout_matchingpath(common, cc, parent);
9486     break;
9487 
9488     case OP_ASSERT:
9489     case OP_ASSERT_NOT:
9490     case OP_ASSERTBACK:
9491     case OP_ASSERTBACK_NOT:
9492     PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
9493     cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
9494     break;
9495 
9496     case OP_BRAMINZERO:
9497     PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
9498     cc = bracketend(cc + 1);
9499     if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
9500       {
9501       allocate_stack(common, 1);
9502       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9503       }
9504     else
9505       {
9506       allocate_stack(common, 2);
9507       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9508       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
9509       }
9510     BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
9511     count_match(common);
9512     break;
9513 
9514     case OP_ONCE:
9515     case OP_ONCE_NC:
9516     case OP_BRA:
9517     case OP_CBRA:
9518     case OP_COND:
9519     case OP_SBRA:
9520     case OP_SCBRA:
9521     case OP_SCOND:
9522     cc = compile_bracket_matchingpath(common, cc, parent);
9523     break;
9524 
9525     case OP_BRAZERO:
9526     if (cc[1] > OP_ASSERTBACK_NOT)
9527       cc = compile_bracket_matchingpath(common, cc, parent);
9528     else
9529       {
9530       PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
9531       cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
9532       }
9533     break;
9534 
9535     case OP_BRAPOS:
9536     case OP_CBRAPOS:
9537     case OP_SBRAPOS:
9538     case OP_SCBRAPOS:
9539     case OP_BRAPOSZERO:
9540     cc = compile_bracketpos_matchingpath(common, cc, parent);
9541     break;
9542 
9543     case OP_MARK:
9544     PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
9545     SLJIT_ASSERT(common->mark_ptr != 0);
9546     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
9547     allocate_stack(common, common->has_skip_arg ? 5 : 1);
9548     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9549     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
9550     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
9551     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
9552     OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
9553     if (common->has_skip_arg)
9554       {
9555       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9556       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
9557       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
9558       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
9559       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
9560       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
9561       }
9562     cc += 1 + 2 + cc[1];
9563     break;
9564 
9565     case OP_PRUNE:
9566     case OP_PRUNE_ARG:
9567     case OP_SKIP:
9568     case OP_SKIP_ARG:
9569     case OP_THEN:
9570     case OP_THEN_ARG:
9571     case OP_COMMIT:
9572     cc = compile_control_verb_matchingpath(common, cc, parent);
9573     break;
9574 
9575     case OP_FAIL:
9576     case OP_ACCEPT:
9577     case OP_ASSERT_ACCEPT:
9578     cc = compile_fail_accept_matchingpath(common, cc, parent);
9579     break;
9580 
9581     case OP_CLOSE:
9582     cc = compile_close_matchingpath(common, cc);
9583     break;
9584 
9585     case OP_SKIPZERO:
9586     cc = bracketend(cc + 1);
9587     break;
9588 
9589     default:
9590     SLJIT_UNREACHABLE();
9591     return;
9592     }
9593   if (cc == NULL)
9594     return;
9595   }
9596 
9597 if (has_then_trap)
9598   {
9599   /* Head item on backtrack. */
9600   PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
9601   BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
9602   BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
9603   common->then_trap = save_then_trap;
9604   }
9605 SLJIT_ASSERT(cc == ccend);
9606 }
9607 
9608 #undef PUSH_BACKTRACK
9609 #undef PUSH_BACKTRACK_NOVALUE
9610 #undef BACKTRACK_AS
9611 
9612 #define COMPILE_BACKTRACKINGPATH(current) \
9613   do \
9614     { \
9615     compile_backtrackingpath(common, (current)); \
9616     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9617       return; \
9618     } \
9619   while (0)
9620 
9621 #define CURRENT_AS(type) ((type *)current)
9622 
compile_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)9623 static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9624 {
9625 DEFINE_COMPILER;
9626 pcre_uchar *cc = current->cc;
9627 pcre_uchar opcode;
9628 pcre_uchar type;
9629 sljit_u32 max = 0, exact;
9630 struct sljit_label *label = NULL;
9631 struct sljit_jump *jump = NULL;
9632 jump_list *jumplist = NULL;
9633 pcre_uchar *end;
9634 int private_data_ptr = PRIVATE_DATA(cc);
9635 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
9636 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
9637 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
9638 
9639 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
9640 
9641 switch(opcode)
9642   {
9643   case OP_STAR:
9644   case OP_UPTO:
9645   if (type == OP_ANYNL || type == OP_EXTUNI)
9646     {
9647     SLJIT_ASSERT(private_data_ptr == 0);
9648     set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
9649     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9650     free_stack(common, 1);
9651     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9652     }
9653   else
9654     {
9655     if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled)
9656       {
9657       OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9658       OP1(SLJIT_MOV, TMP2, 0, base, offset1);
9659       OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9660 
9661       jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
9662       label = LABEL();
9663       OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
9664       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9665       if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0)
9666         OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit);
9667       CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9668       skip_char_back(common);
9669       CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label);
9670       }
9671     else
9672       {
9673       OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9674       jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
9675       skip_char_back(common);
9676       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9677       JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9678       }
9679     JUMPHERE(jump);
9680     if (private_data_ptr == 0)
9681       free_stack(common, 2);
9682     }
9683   break;
9684 
9685   case OP_MINSTAR:
9686   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9687   compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
9688   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9689   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9690   set_jumps(jumplist, LABEL());
9691   if (private_data_ptr == 0)
9692     free_stack(common, 1);
9693   break;
9694 
9695   case OP_MINUPTO:
9696   OP1(SLJIT_MOV, TMP1, 0, base, offset1);
9697   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9698   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9699   add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
9700 
9701   OP1(SLJIT_MOV, base, offset1, TMP1, 0);
9702   compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
9703   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9704   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9705 
9706   set_jumps(jumplist, LABEL());
9707   if (private_data_ptr == 0)
9708     free_stack(common, 2);
9709   break;
9710 
9711   case OP_QUERY:
9712   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9713   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
9714   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9715   jump = JUMP(SLJIT_JUMP);
9716   set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
9717   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9718   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
9719   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9720   JUMPHERE(jump);
9721   if (private_data_ptr == 0)
9722     free_stack(common, 1);
9723   break;
9724 
9725   case OP_MINQUERY:
9726   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9727   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
9728   jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9729   compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
9730   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9731   set_jumps(jumplist, LABEL());
9732   JUMPHERE(jump);
9733   if (private_data_ptr == 0)
9734     free_stack(common, 1);
9735   break;
9736 
9737   case OP_EXACT:
9738   case OP_POSSTAR:
9739   case OP_POSQUERY:
9740   case OP_POSUPTO:
9741   break;
9742 
9743   default:
9744   SLJIT_UNREACHABLE();
9745   break;
9746   }
9747 
9748 set_jumps(current->topbacktracks, LABEL());
9749 }
9750 
compile_ref_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)9751 static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9752 {
9753 DEFINE_COMPILER;
9754 pcre_uchar *cc = current->cc;
9755 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9756 pcre_uchar type;
9757 
9758 type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
9759 
9760 if ((type & 0x1) == 0)
9761   {
9762   /* Maximize case. */
9763   set_jumps(current->topbacktracks, LABEL());
9764   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9765   free_stack(common, 1);
9766   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
9767   return;
9768   }
9769 
9770 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9771 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
9772 set_jumps(current->topbacktracks, LABEL());
9773 free_stack(common, ref ? 2 : 3);
9774 }
9775 
compile_recurse_backtrackingpath(compiler_common * common,struct backtrack_common * current)9776 static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9777 {
9778 DEFINE_COMPILER;
9779 
9780 if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
9781   compile_backtrackingpath(common, current->top);
9782 set_jumps(current->topbacktracks, LABEL());
9783 if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
9784   return;
9785 
9786 if (common->has_set_som && common->mark_ptr != 0)
9787   {
9788   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9789   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9790   free_stack(common, 2);
9791   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP2, 0);
9792   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
9793   }
9794 else if (common->has_set_som || common->mark_ptr != 0)
9795   {
9796   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9797   free_stack(common, 1);
9798   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr, TMP2, 0);
9799   }
9800 }
9801 
compile_assert_backtrackingpath(compiler_common * common,struct backtrack_common * current)9802 static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9803 {
9804 DEFINE_COMPILER;
9805 pcre_uchar *cc = current->cc;
9806 pcre_uchar bra = OP_BRA;
9807 struct sljit_jump *brajump = NULL;
9808 
9809 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
9810 if (*cc == OP_BRAZERO)
9811   {
9812   bra = *cc;
9813   cc++;
9814   }
9815 
9816 if (bra == OP_BRAZERO)
9817   {
9818   SLJIT_ASSERT(current->topbacktracks == NULL);
9819   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9820   }
9821 
9822 if (CURRENT_AS(assert_backtrack)->framesize < 0)
9823   {
9824   set_jumps(current->topbacktracks, LABEL());
9825 
9826   if (bra == OP_BRAZERO)
9827     {
9828     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9829     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
9830     free_stack(common, 1);
9831     }
9832   return;
9833   }
9834 
9835 if (bra == OP_BRAZERO)
9836   {
9837   if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
9838     {
9839     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9840     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
9841     free_stack(common, 1);
9842     return;
9843     }
9844   free_stack(common, 1);
9845   brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9846   }
9847 
9848 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
9849   {
9850   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
9851   add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9852   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(assert_backtrack)->framesize - 1));
9853 
9854   set_jumps(current->topbacktracks, LABEL());
9855   }
9856 else
9857   set_jumps(current->topbacktracks, LABEL());
9858 
9859 if (bra == OP_BRAZERO)
9860   {
9861   /* We know there is enough place on the stack. */
9862   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9863   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9864   JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
9865   JUMPHERE(brajump);
9866   }
9867 }
9868 
compile_bracket_backtrackingpath(compiler_common * common,struct backtrack_common * current)9869 static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9870 {
9871 DEFINE_COMPILER;
9872 int opcode, stacksize, alt_count, alt_max;
9873 int offset = 0;
9874 int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
9875 int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
9876 pcre_uchar *cc = current->cc;
9877 pcre_uchar *ccbegin;
9878 pcre_uchar *ccprev;
9879 pcre_uchar bra = OP_BRA;
9880 pcre_uchar ket;
9881 assert_backtrack *assert;
9882 sljit_uw *next_update_addr = NULL;
9883 BOOL has_alternatives;
9884 BOOL needs_control_head = FALSE;
9885 struct sljit_jump *brazero = NULL;
9886 struct sljit_jump *alt1 = NULL;
9887 struct sljit_jump *alt2 = NULL;
9888 struct sljit_jump *once = NULL;
9889 struct sljit_jump *cond = NULL;
9890 struct sljit_label *rmin_label = NULL;
9891 struct sljit_label *exact_label = NULL;
9892 
9893 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
9894   {
9895   bra = *cc;
9896   cc++;
9897   }
9898 
9899 opcode = *cc;
9900 ccbegin = bracketend(cc) - 1 - LINK_SIZE;
9901 ket = *ccbegin;
9902 if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
9903   {
9904   repeat_ptr = PRIVATE_DATA(ccbegin);
9905   repeat_type = PRIVATE_DATA(ccbegin + 2);
9906   repeat_count = PRIVATE_DATA(ccbegin + 3);
9907   SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
9908   if (repeat_type == OP_UPTO)
9909     ket = OP_KETRMAX;
9910   if (repeat_type == OP_MINUPTO)
9911     ket = OP_KETRMIN;
9912   }
9913 ccbegin = cc;
9914 cc += GET(cc, 1);
9915 has_alternatives = *cc == OP_ALT;
9916 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
9917   has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
9918 if (opcode == OP_CBRA || opcode == OP_SCBRA)
9919   offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
9920 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
9921   opcode = OP_SCOND;
9922 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
9923   opcode = OP_ONCE;
9924 
9925 alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
9926 
9927 /* Decoding the needs_control_head in framesize. */
9928 if (opcode == OP_ONCE)
9929   {
9930   needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
9931   CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
9932   }
9933 
9934 if (ket != OP_KET && repeat_type != 0)
9935   {
9936   /* TMP1 is used in OP_KETRMIN below. */
9937   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9938   free_stack(common, 1);
9939   if (repeat_type == OP_UPTO)
9940     OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
9941   else
9942     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
9943   }
9944 
9945 if (ket == OP_KETRMAX)
9946   {
9947   if (bra == OP_BRAZERO)
9948     {
9949     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9950     free_stack(common, 1);
9951     brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
9952     }
9953   }
9954 else if (ket == OP_KETRMIN)
9955   {
9956   if (bra != OP_BRAMINZERO)
9957     {
9958     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9959     if (repeat_type != 0)
9960       {
9961       /* TMP1 was set a few lines above. */
9962       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
9963       /* Drop STR_PTR for non-greedy plus quantifier. */
9964       if (opcode != OP_ONCE)
9965         free_stack(common, 1);
9966       }
9967     else if (opcode >= OP_SBRA || opcode == OP_ONCE)
9968       {
9969       /* Checking zero-length iteration. */
9970       if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
9971         CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
9972       else
9973         {
9974         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9975         CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
9976         }
9977       /* Drop STR_PTR for non-greedy plus quantifier. */
9978       if (opcode != OP_ONCE)
9979         free_stack(common, 1);
9980       }
9981     else
9982       JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
9983     }
9984   rmin_label = LABEL();
9985   if (repeat_type != 0)
9986     OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
9987   }
9988 else if (bra == OP_BRAZERO)
9989   {
9990   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9991   free_stack(common, 1);
9992   brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
9993   }
9994 else if (repeat_type == OP_EXACT)
9995   {
9996   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
9997   exact_label = LABEL();
9998   }
9999 
10000 if (offset != 0)
10001   {
10002   if (common->capture_last_ptr != 0)
10003     {
10004     SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
10005     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10006     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10007     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
10008     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
10009     free_stack(common, 3);
10010     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
10011     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
10012     }
10013   else if (common->optimized_cbracket[offset >> 1] == 0)
10014     {
10015     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10016     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10017     free_stack(common, 2);
10018     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10019     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
10020     }
10021   }
10022 
10023 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
10024   {
10025   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
10026     {
10027     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10028     add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10029     }
10030   once = JUMP(SLJIT_JUMP);
10031   }
10032 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
10033   {
10034   if (has_alternatives)
10035     {
10036     /* Always exactly one alternative. */
10037     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10038     free_stack(common, 1);
10039 
10040     alt_max = 2;
10041     alt1 = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
10042     }
10043   }
10044 else if (has_alternatives)
10045   {
10046   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10047   free_stack(common, 1);
10048 
10049   if (alt_max > 4)
10050     {
10051     /* Table jump if alt_max is greater than 4. */
10052     next_update_addr = allocate_read_only_data(common, alt_max * sizeof(sljit_uw));
10053     if (SLJIT_UNLIKELY(next_update_addr == NULL))
10054       return;
10055     sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)next_update_addr);
10056     add_label_addr(common, next_update_addr++);
10057     }
10058   else
10059     {
10060     if (alt_max == 4)
10061       alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
10062     alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
10063     }
10064   }
10065 
10066 COMPILE_BACKTRACKINGPATH(current->top);
10067 if (current->topbacktracks)
10068   set_jumps(current->topbacktracks, LABEL());
10069 
10070 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
10071   {
10072   /* Conditional block always has at most one alternative. */
10073   if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
10074     {
10075     SLJIT_ASSERT(has_alternatives);
10076     assert = CURRENT_AS(bracket_backtrack)->u.assert;
10077     if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
10078       {
10079       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
10080       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10081       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-assert->framesize - 1));
10082       }
10083     cond = JUMP(SLJIT_JUMP);
10084     set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
10085     }
10086   else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
10087     {
10088     SLJIT_ASSERT(has_alternatives);
10089     cond = JUMP(SLJIT_JUMP);
10090     set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
10091     }
10092   else
10093     SLJIT_ASSERT(!has_alternatives);
10094   }
10095 
10096 if (has_alternatives)
10097   {
10098   alt_count = sizeof(sljit_uw);
10099   do
10100     {
10101     current->top = NULL;
10102     current->topbacktracks = NULL;
10103     current->nextbacktracks = NULL;
10104     /* Conditional blocks always have an additional alternative, even if it is empty. */
10105     if (*cc == OP_ALT)
10106       {
10107       ccprev = cc + 1 + LINK_SIZE;
10108       cc += GET(cc, 1);
10109       if (opcode != OP_COND && opcode != OP_SCOND)
10110         {
10111         if (opcode != OP_ONCE)
10112           {
10113           if (private_data_ptr != 0)
10114             OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10115           else
10116             OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10117           }
10118         else
10119           OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
10120         }
10121       compile_matchingpath(common, ccprev, cc, current);
10122       if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10123         return;
10124       }
10125 
10126     /* Instructions after the current alternative is successfully matched. */
10127     /* There is a similar code in compile_bracket_matchingpath. */
10128     if (opcode == OP_ONCE)
10129       match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
10130 
10131     stacksize = 0;
10132     if (repeat_type == OP_MINUPTO)
10133       {
10134       /* We need to preserve the counter. TMP2 will be used below. */
10135       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10136       stacksize++;
10137       }
10138     if (ket != OP_KET || bra != OP_BRA)
10139       stacksize++;
10140     if (offset != 0)
10141       {
10142       if (common->capture_last_ptr != 0)
10143         stacksize++;
10144       if (common->optimized_cbracket[offset >> 1] == 0)
10145         stacksize += 2;
10146       }
10147     if (opcode != OP_ONCE)
10148       stacksize++;
10149 
10150     if (stacksize > 0)
10151       allocate_stack(common, stacksize);
10152 
10153     stacksize = 0;
10154     if (repeat_type == OP_MINUPTO)
10155       {
10156       /* TMP2 was set above. */
10157       OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
10158       stacksize++;
10159       }
10160 
10161     if (ket != OP_KET || bra != OP_BRA)
10162       {
10163       if (ket != OP_KET)
10164         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10165       else
10166         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10167       stacksize++;
10168       }
10169 
10170     if (offset != 0)
10171       stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
10172 
10173     if (opcode != OP_ONCE)
10174       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
10175 
10176     if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
10177       {
10178       /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
10179       SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
10180       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10181       }
10182 
10183     JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
10184 
10185     if (opcode != OP_ONCE)
10186       {
10187       if (alt_max > 4)
10188         add_label_addr(common, next_update_addr++);
10189       else
10190         {
10191         if (alt_count != 2 * sizeof(sljit_uw))
10192           {
10193           JUMPHERE(alt1);
10194           if (alt_max == 3 && alt_count == sizeof(sljit_uw))
10195             alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
10196           }
10197         else
10198           {
10199           JUMPHERE(alt2);
10200           if (alt_max == 4)
10201             alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw));
10202           }
10203         }
10204       alt_count += sizeof(sljit_uw);
10205       }
10206 
10207     COMPILE_BACKTRACKINGPATH(current->top);
10208     if (current->topbacktracks)
10209       set_jumps(current->topbacktracks, LABEL());
10210     SLJIT_ASSERT(!current->nextbacktracks);
10211     }
10212   while (*cc == OP_ALT);
10213 
10214   if (cond != NULL)
10215     {
10216     SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
10217     assert = CURRENT_AS(bracket_backtrack)->u.assert;
10218     if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
10219       {
10220       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
10221       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10222       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-assert->framesize - 1));
10223       }
10224     JUMPHERE(cond);
10225     }
10226 
10227   /* Free the STR_PTR. */
10228   if (private_data_ptr == 0)
10229     free_stack(common, 1);
10230   }
10231 
10232 if (offset != 0)
10233   {
10234   /* Using both tmp register is better for instruction scheduling. */
10235   if (common->optimized_cbracket[offset >> 1] != 0)
10236     {
10237     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10238     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10239     free_stack(common, 2);
10240     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10241     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
10242     }
10243   else
10244     {
10245     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10246     free_stack(common, 1);
10247     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10248     }
10249   }
10250 else if (opcode == OP_SBRA || opcode == OP_SCOND)
10251   {
10252   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
10253   free_stack(common, 1);
10254   }
10255 else if (opcode == OP_ONCE)
10256   {
10257   cc = ccbegin + GET(ccbegin, 1);
10258   stacksize = needs_control_head ? 1 : 0;
10259 
10260   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
10261     {
10262     /* Reset head and drop saved frame. */
10263     stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
10264     }
10265   else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
10266     {
10267     /* The STR_PTR must be released. */
10268     stacksize++;
10269     }
10270 
10271   if (stacksize > 0)
10272     free_stack(common, stacksize);
10273 
10274   JUMPHERE(once);
10275   /* Restore previous private_data_ptr */
10276   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
10277     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1));
10278   else if (ket == OP_KETRMIN)
10279     {
10280     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10281     /* See the comment below. */
10282     free_stack(common, 2);
10283     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10284     }
10285   }
10286 
10287 if (repeat_type == OP_EXACT)
10288   {
10289   OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10290   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
10291   CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
10292   }
10293 else if (ket == OP_KETRMAX)
10294   {
10295   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10296   if (bra != OP_BRAZERO)
10297     free_stack(common, 1);
10298 
10299   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
10300   if (bra == OP_BRAZERO)
10301     {
10302     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10303     JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
10304     JUMPHERE(brazero);
10305     free_stack(common, 1);
10306     }
10307   }
10308 else if (ket == OP_KETRMIN)
10309   {
10310   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10311 
10312   /* OP_ONCE removes everything in case of a backtrack, so we don't
10313   need to explicitly release the STR_PTR. The extra release would
10314   affect badly the free_stack(2) above. */
10315   if (opcode != OP_ONCE)
10316     free_stack(common, 1);
10317   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
10318   if (opcode == OP_ONCE)
10319     free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
10320   else if (bra == OP_BRAMINZERO)
10321     free_stack(common, 1);
10322   }
10323 else if (bra == OP_BRAZERO)
10324   {
10325   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10326   JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
10327   JUMPHERE(brazero);
10328   }
10329 }
10330 
compile_bracketpos_backtrackingpath(compiler_common * common,struct backtrack_common * current)10331 static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10332 {
10333 DEFINE_COMPILER;
10334 int offset;
10335 struct sljit_jump *jump;
10336 
10337 if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
10338   {
10339   if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
10340     {
10341     offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
10342     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10343     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10344     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10345     if (common->capture_last_ptr != 0)
10346       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
10347     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
10348     if (common->capture_last_ptr != 0)
10349       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
10350     }
10351   set_jumps(current->topbacktracks, LABEL());
10352   free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
10353   return;
10354   }
10355 
10356 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
10357 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10358 
10359 if (current->topbacktracks)
10360   {
10361   jump = JUMP(SLJIT_JUMP);
10362   set_jumps(current->topbacktracks, LABEL());
10363   /* Drop the stack frame. */
10364   free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
10365   JUMPHERE(jump);
10366   }
10367 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1));
10368 }
10369 
compile_braminzero_backtrackingpath(compiler_common * common,struct backtrack_common * current)10370 static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10371 {
10372 assert_backtrack backtrack;
10373 
10374 current->top = NULL;
10375 current->topbacktracks = NULL;
10376 current->nextbacktracks = NULL;
10377 if (current->cc[1] > OP_ASSERTBACK_NOT)
10378   {
10379   /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
10380   compile_bracket_matchingpath(common, current->cc, current);
10381   compile_bracket_backtrackingpath(common, current->top);
10382   }
10383 else
10384   {
10385   memset(&backtrack, 0, sizeof(backtrack));
10386   backtrack.common.cc = current->cc;
10387   backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
10388   /* Manual call of compile_assert_matchingpath. */
10389   compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
10390   }
10391 SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
10392 }
10393 
compile_control_verb_backtrackingpath(compiler_common * common,struct backtrack_common * current)10394 static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10395 {
10396 DEFINE_COMPILER;
10397 pcre_uchar opcode = *current->cc;
10398 struct sljit_label *loop;
10399 struct sljit_jump *jump;
10400 
10401 if (opcode == OP_THEN || opcode == OP_THEN_ARG)
10402   {
10403   if (common->then_trap != NULL)
10404     {
10405     SLJIT_ASSERT(common->control_head_ptr != 0);
10406 
10407     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10408     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
10409     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
10410     jump = JUMP(SLJIT_JUMP);
10411 
10412     loop = LABEL();
10413     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10414     JUMPHERE(jump);
10415     CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop);
10416     CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop);
10417     add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
10418     return;
10419     }
10420   else if (common->positive_assert)
10421     {
10422     add_jump(compiler, &common->positive_assert_quit, JUMP(SLJIT_JUMP));
10423     return;
10424     }
10425   }
10426 
10427 if (common->local_exit)
10428   {
10429   if (common->quit_label == NULL)
10430     add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
10431   else
10432     JUMPTO(SLJIT_JUMP, common->quit_label);
10433   return;
10434   }
10435 
10436 if (opcode == OP_SKIP_ARG)
10437   {
10438   SLJIT_ASSERT(common->control_head_ptr != 0);
10439   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10440   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
10441   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
10442   sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark));
10443   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
10444 
10445   OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
10446   add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, -1));
10447   return;
10448   }
10449 
10450 if (opcode == OP_SKIP)
10451   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10452 else
10453   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
10454 add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
10455 }
10456 
compile_then_trap_backtrackingpath(compiler_common * common,struct backtrack_common * current)10457 static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10458 {
10459 DEFINE_COMPILER;
10460 struct sljit_jump *jump;
10461 int size;
10462 
10463 if (CURRENT_AS(then_trap_backtrack)->then_trap)
10464   {
10465   common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
10466   return;
10467   }
10468 
10469 size = CURRENT_AS(then_trap_backtrack)->framesize;
10470 size = 3 + (size < 0 ? 0 : size);
10471 
10472 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
10473 free_stack(common, size);
10474 jump = JUMP(SLJIT_JUMP);
10475 
10476 set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
10477 /* STACK_TOP is set by THEN. */
10478 if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
10479   add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10480 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10481 free_stack(common, 3);
10482 
10483 JUMPHERE(jump);
10484 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
10485 }
10486 
compile_backtrackingpath(compiler_common * common,struct backtrack_common * current)10487 static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10488 {
10489 DEFINE_COMPILER;
10490 then_trap_backtrack *save_then_trap = common->then_trap;
10491 
10492 while (current)
10493   {
10494   if (current->nextbacktracks != NULL)
10495     set_jumps(current->nextbacktracks, LABEL());
10496   switch(*current->cc)
10497     {
10498     case OP_SET_SOM:
10499     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10500     free_stack(common, 1);
10501     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
10502     break;
10503 
10504     case OP_STAR:
10505     case OP_MINSTAR:
10506     case OP_PLUS:
10507     case OP_MINPLUS:
10508     case OP_QUERY:
10509     case OP_MINQUERY:
10510     case OP_UPTO:
10511     case OP_MINUPTO:
10512     case OP_EXACT:
10513     case OP_POSSTAR:
10514     case OP_POSPLUS:
10515     case OP_POSQUERY:
10516     case OP_POSUPTO:
10517     case OP_STARI:
10518     case OP_MINSTARI:
10519     case OP_PLUSI:
10520     case OP_MINPLUSI:
10521     case OP_QUERYI:
10522     case OP_MINQUERYI:
10523     case OP_UPTOI:
10524     case OP_MINUPTOI:
10525     case OP_EXACTI:
10526     case OP_POSSTARI:
10527     case OP_POSPLUSI:
10528     case OP_POSQUERYI:
10529     case OP_POSUPTOI:
10530     case OP_NOTSTAR:
10531     case OP_NOTMINSTAR:
10532     case OP_NOTPLUS:
10533     case OP_NOTMINPLUS:
10534     case OP_NOTQUERY:
10535     case OP_NOTMINQUERY:
10536     case OP_NOTUPTO:
10537     case OP_NOTMINUPTO:
10538     case OP_NOTEXACT:
10539     case OP_NOTPOSSTAR:
10540     case OP_NOTPOSPLUS:
10541     case OP_NOTPOSQUERY:
10542     case OP_NOTPOSUPTO:
10543     case OP_NOTSTARI:
10544     case OP_NOTMINSTARI:
10545     case OP_NOTPLUSI:
10546     case OP_NOTMINPLUSI:
10547     case OP_NOTQUERYI:
10548     case OP_NOTMINQUERYI:
10549     case OP_NOTUPTOI:
10550     case OP_NOTMINUPTOI:
10551     case OP_NOTEXACTI:
10552     case OP_NOTPOSSTARI:
10553     case OP_NOTPOSPLUSI:
10554     case OP_NOTPOSQUERYI:
10555     case OP_NOTPOSUPTOI:
10556     case OP_TYPESTAR:
10557     case OP_TYPEMINSTAR:
10558     case OP_TYPEPLUS:
10559     case OP_TYPEMINPLUS:
10560     case OP_TYPEQUERY:
10561     case OP_TYPEMINQUERY:
10562     case OP_TYPEUPTO:
10563     case OP_TYPEMINUPTO:
10564     case OP_TYPEEXACT:
10565     case OP_TYPEPOSSTAR:
10566     case OP_TYPEPOSPLUS:
10567     case OP_TYPEPOSQUERY:
10568     case OP_TYPEPOSUPTO:
10569     case OP_CLASS:
10570     case OP_NCLASS:
10571 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
10572     case OP_XCLASS:
10573 #endif
10574     compile_iterator_backtrackingpath(common, current);
10575     break;
10576 
10577     case OP_REF:
10578     case OP_REFI:
10579     case OP_DNREF:
10580     case OP_DNREFI:
10581     compile_ref_iterator_backtrackingpath(common, current);
10582     break;
10583 
10584     case OP_RECURSE:
10585     compile_recurse_backtrackingpath(common, current);
10586     break;
10587 
10588     case OP_ASSERT:
10589     case OP_ASSERT_NOT:
10590     case OP_ASSERTBACK:
10591     case OP_ASSERTBACK_NOT:
10592     compile_assert_backtrackingpath(common, current);
10593     break;
10594 
10595     case OP_ONCE:
10596     case OP_ONCE_NC:
10597     case OP_BRA:
10598     case OP_CBRA:
10599     case OP_COND:
10600     case OP_SBRA:
10601     case OP_SCBRA:
10602     case OP_SCOND:
10603     compile_bracket_backtrackingpath(common, current);
10604     break;
10605 
10606     case OP_BRAZERO:
10607     if (current->cc[1] > OP_ASSERTBACK_NOT)
10608       compile_bracket_backtrackingpath(common, current);
10609     else
10610       compile_assert_backtrackingpath(common, current);
10611     break;
10612 
10613     case OP_BRAPOS:
10614     case OP_CBRAPOS:
10615     case OP_SBRAPOS:
10616     case OP_SCBRAPOS:
10617     case OP_BRAPOSZERO:
10618     compile_bracketpos_backtrackingpath(common, current);
10619     break;
10620 
10621     case OP_BRAMINZERO:
10622     compile_braminzero_backtrackingpath(common, current);
10623     break;
10624 
10625     case OP_MARK:
10626     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
10627     if (common->has_skip_arg)
10628       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10629     free_stack(common, common->has_skip_arg ? 5 : 1);
10630     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
10631     if (common->has_skip_arg)
10632       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
10633     break;
10634 
10635     case OP_THEN:
10636     case OP_THEN_ARG:
10637     case OP_PRUNE:
10638     case OP_PRUNE_ARG:
10639     case OP_SKIP:
10640     case OP_SKIP_ARG:
10641     compile_control_verb_backtrackingpath(common, current);
10642     break;
10643 
10644     case OP_COMMIT:
10645     if (!common->local_exit)
10646       OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
10647     if (common->quit_label == NULL)
10648       add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
10649     else
10650       JUMPTO(SLJIT_JUMP, common->quit_label);
10651     break;
10652 
10653     case OP_CALLOUT:
10654     case OP_FAIL:
10655     case OP_ACCEPT:
10656     case OP_ASSERT_ACCEPT:
10657     set_jumps(current->topbacktracks, LABEL());
10658     break;
10659 
10660     case OP_THEN_TRAP:
10661     /* A virtual opcode for then traps. */
10662     compile_then_trap_backtrackingpath(common, current);
10663     break;
10664 
10665     default:
10666     SLJIT_UNREACHABLE();
10667     break;
10668     }
10669   current = current->prev;
10670   }
10671 common->then_trap = save_then_trap;
10672 }
10673 
compile_recurse(compiler_common * common)10674 static SLJIT_INLINE void compile_recurse(compiler_common *common)
10675 {
10676 DEFINE_COMPILER;
10677 pcre_uchar *cc = common->start + common->currententry->start;
10678 pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
10679 pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);
10680 BOOL needs_control_head;
10681 int framesize = get_framesize(common, cc, NULL, TRUE, &needs_control_head);
10682 int private_data_size = get_private_data_copy_length(common, ccbegin, ccend, needs_control_head);
10683 int alternativesize;
10684 BOOL needs_frame;
10685 backtrack_common altbacktrack;
10686 struct sljit_jump *jump;
10687 
10688 /* Recurse captures then. */
10689 common->then_trap = NULL;
10690 
10691 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
10692 needs_frame = framesize >= 0;
10693 if (!needs_frame)
10694   framesize = 0;
10695 alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0;
10696 
10697 SLJIT_ASSERT(common->currententry->entry == NULL && common->recursive_head_ptr != 0);
10698 common->currententry->entry = LABEL();
10699 set_jumps(common->currententry->calls, common->currententry->entry);
10700 
10701 sljit_emit_fast_enter(compiler, TMP2, 0);
10702 count_match(common);
10703 allocate_stack(common, private_data_size + framesize + alternativesize);
10704 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(private_data_size + framesize + alternativesize - 1), TMP2, 0);
10705 copy_private_data(common, ccbegin, ccend, TRUE, framesize + alternativesize, private_data_size + framesize + alternativesize, needs_control_head);
10706 if (needs_control_head)
10707   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
10708 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
10709 if (needs_frame)
10710   init_frame(common, cc, NULL, framesize + alternativesize - 1, alternativesize, TRUE);
10711 
10712 if (alternativesize > 0)
10713   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10714 
10715 memset(&altbacktrack, 0, sizeof(backtrack_common));
10716 common->quit_label = NULL;
10717 common->accept_label = NULL;
10718 common->quit = NULL;
10719 common->accept = NULL;
10720 altbacktrack.cc = ccbegin;
10721 cc += GET(cc, 1);
10722 while (1)
10723   {
10724   altbacktrack.top = NULL;
10725   altbacktrack.topbacktracks = NULL;
10726 
10727   if (altbacktrack.cc != ccbegin)
10728     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10729 
10730   compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
10731   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10732     return;
10733 
10734   add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
10735 
10736   compile_backtrackingpath(common, altbacktrack.top);
10737   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10738     return;
10739   set_jumps(altbacktrack.topbacktracks, LABEL());
10740 
10741   if (*cc != OP_ALT)
10742     break;
10743 
10744   altbacktrack.cc = cc + 1 + LINK_SIZE;
10745   cc += GET(cc, 1);
10746   }
10747 
10748 /* None of them matched. */
10749 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
10750 jump = JUMP(SLJIT_JUMP);
10751 
10752 if (common->quit != NULL)
10753   {
10754   set_jumps(common->quit, LABEL());
10755   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
10756   if (needs_frame)
10757     {
10758     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
10759     add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10760     OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
10761     }
10762   OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
10763   common->quit = NULL;
10764   add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
10765   }
10766 
10767 set_jumps(common->accept, LABEL());
10768 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
10769 if (needs_frame)
10770   {
10771   OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
10772   add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10773   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
10774   }
10775 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
10776 
10777 JUMPHERE(jump);
10778 if (common->quit != NULL)
10779   set_jumps(common->quit, LABEL());
10780 copy_private_data(common, ccbegin, ccend, FALSE, framesize + alternativesize, private_data_size + framesize + alternativesize, needs_control_head);
10781 free_stack(common, private_data_size + framesize + alternativesize);
10782 if (needs_control_head)
10783   {
10784   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-3));
10785   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
10786   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP1, 0);
10787   OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
10788   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
10789   }
10790 else
10791   {
10792   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
10793   OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
10794   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP2, 0);
10795   }
10796 sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), STACK(-1));
10797 }
10798 
10799 #undef COMPILE_BACKTRACKINGPATH
10800 #undef CURRENT_AS
10801 
10802 void
PRIV(jit_compile)10803 PRIV(jit_compile)(const REAL_PCRE *re, PUBL(extra) *extra, int mode)
10804 {
10805 struct sljit_compiler *compiler;
10806 backtrack_common rootbacktrack;
10807 compiler_common common_data;
10808 compiler_common *common = &common_data;
10809 const sljit_u8 *tables = re->tables;
10810 pcre_study_data *study;
10811 int private_data_size;
10812 pcre_uchar *ccend;
10813 executable_functions *functions;
10814 void *executable_func;
10815 sljit_uw executable_size;
10816 sljit_uw total_length;
10817 label_addr_list *label_addr;
10818 struct sljit_label *mainloop_label = NULL;
10819 struct sljit_label *continue_match_label;
10820 struct sljit_label *empty_match_found_label = NULL;
10821 struct sljit_label *empty_match_backtrack_label = NULL;
10822 struct sljit_label *reset_match_label;
10823 struct sljit_label *quit_label;
10824 struct sljit_jump *jump;
10825 struct sljit_jump *minlength_check_failed = NULL;
10826 struct sljit_jump *reqbyte_notfound = NULL;
10827 struct sljit_jump *empty_match = NULL;
10828 
10829 SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0);
10830 study = extra->study_data;
10831 
10832 if (!tables)
10833   tables = PRIV(default_tables);
10834 
10835 memset(&rootbacktrack, 0, sizeof(backtrack_common));
10836 memset(common, 0, sizeof(compiler_common));
10837 rootbacktrack.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size;
10838 
10839 common->start = rootbacktrack.cc;
10840 common->read_only_data_head = NULL;
10841 common->fcc = tables + fcc_offset;
10842 common->lcc = (sljit_sw)(tables + lcc_offset);
10843 common->mode = mode;
10844 common->might_be_empty = study->minlength == 0;
10845 common->nltype = NLTYPE_FIXED;
10846 switch(re->options & PCRE_NEWLINE_BITS)
10847   {
10848   case 0:
10849   /* Compile-time default */
10850   switch(NEWLINE)
10851     {
10852     case -1: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
10853     case -2: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
10854     default: common->newline = NEWLINE; break;
10855     }
10856   break;
10857   case PCRE_NEWLINE_CR: common->newline = CHAR_CR; break;
10858   case PCRE_NEWLINE_LF: common->newline = CHAR_NL; break;
10859   case PCRE_NEWLINE_CR+
10860        PCRE_NEWLINE_LF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
10861   case PCRE_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
10862   case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
10863   default: return;
10864   }
10865 common->nlmax = READ_CHAR_MAX;
10866 common->nlmin = 0;
10867 if ((re->options & PCRE_BSR_ANYCRLF) != 0)
10868   common->bsr_nltype = NLTYPE_ANYCRLF;
10869 else if ((re->options & PCRE_BSR_UNICODE) != 0)
10870   common->bsr_nltype = NLTYPE_ANY;
10871 else
10872   {
10873 #ifdef BSR_ANYCRLF
10874   common->bsr_nltype = NLTYPE_ANYCRLF;
10875 #else
10876   common->bsr_nltype = NLTYPE_ANY;
10877 #endif
10878   }
10879 common->bsr_nlmax = READ_CHAR_MAX;
10880 common->bsr_nlmin = 0;
10881 common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
10882 common->ctypes = (sljit_sw)(tables + ctypes_offset);
10883 common->name_table = ((pcre_uchar *)re) + re->name_table_offset;
10884 common->name_count = re->name_count;
10885 common->name_entry_size = re->name_entry_size;
10886 common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
10887 #ifdef SUPPORT_UTF
10888 /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
10889 common->utf = (re->options & PCRE_UTF8) != 0;
10890 #ifdef SUPPORT_UCP
10891 common->use_ucp = (re->options & PCRE_UCP) != 0;
10892 #endif
10893 if (common->utf)
10894   {
10895   if (common->nltype == NLTYPE_ANY)
10896     common->nlmax = 0x2029;
10897   else if (common->nltype == NLTYPE_ANYCRLF)
10898     common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
10899   else
10900     {
10901     /* We only care about the first newline character. */
10902     common->nlmax = common->newline & 0xff;
10903     }
10904 
10905   if (common->nltype == NLTYPE_FIXED)
10906     common->nlmin = common->newline & 0xff;
10907   else
10908     common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
10909 
10910   if (common->bsr_nltype == NLTYPE_ANY)
10911     common->bsr_nlmax = 0x2029;
10912   else
10913     common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
10914   common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
10915   }
10916 #endif /* SUPPORT_UTF */
10917 ccend = bracketend(common->start);
10918 
10919 /* Calculate the local space size on the stack. */
10920 common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
10921 common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, compiler->allocator_data);
10922 if (!common->optimized_cbracket)
10923   return;
10924 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
10925 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
10926 #else
10927 memset(common->optimized_cbracket, 1, re->top_bracket + 1);
10928 #endif
10929 
10930 SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
10931 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
10932 common->capture_last_ptr = common->ovector_start;
10933 common->ovector_start += sizeof(sljit_sw);
10934 #endif
10935 if (!check_opcode_types(common, common->start, ccend))
10936   {
10937   SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
10938   return;
10939   }
10940 
10941 /* Checking flags and updating ovector_start. */
10942 if (mode == JIT_COMPILE && (re->flags & PCRE_REQCHSET) != 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
10943   {
10944   common->req_char_ptr = common->ovector_start;
10945   common->ovector_start += sizeof(sljit_sw);
10946   }
10947 if (mode != JIT_COMPILE)
10948   {
10949   common->start_used_ptr = common->ovector_start;
10950   common->ovector_start += sizeof(sljit_sw);
10951   if (mode == JIT_PARTIAL_SOFT_COMPILE)
10952     {
10953     common->hit_start = common->ovector_start;
10954     common->ovector_start += 2 * sizeof(sljit_sw);
10955     }
10956   }
10957 if ((re->options & PCRE_FIRSTLINE) != 0)
10958   {
10959   common->match_end_ptr = common->ovector_start;
10960   common->ovector_start += sizeof(sljit_sw);
10961   }
10962 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
10963 common->control_head_ptr = 1;
10964 #endif
10965 if (common->control_head_ptr != 0)
10966   {
10967   common->control_head_ptr = common->ovector_start;
10968   common->ovector_start += sizeof(sljit_sw);
10969   }
10970 if (common->has_set_som)
10971   {
10972   /* Saving the real start pointer is necessary. */
10973   common->start_ptr = common->ovector_start;
10974   common->ovector_start += sizeof(sljit_sw);
10975   }
10976 
10977 /* Aligning ovector to even number of sljit words. */
10978 if ((common->ovector_start & sizeof(sljit_sw)) != 0)
10979   common->ovector_start += sizeof(sljit_sw);
10980 
10981 if (common->start_ptr == 0)
10982   common->start_ptr = OVECTOR(0);
10983 
10984 /* Capturing brackets cannot be optimized if callouts are allowed. */
10985 if (common->capture_last_ptr != 0)
10986   memset(common->optimized_cbracket, 0, re->top_bracket + 1);
10987 
10988 SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
10989 common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
10990 
10991 total_length = ccend - common->start;
10992 common->private_data_ptrs = (sljit_s32 *)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), compiler->allocator_data);
10993 if (!common->private_data_ptrs)
10994   {
10995   SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
10996   return;
10997   }
10998 memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
10999 
11000 private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
11001 set_private_data_ptrs(common, &private_data_size, ccend);
11002 if ((re->options & PCRE_ANCHORED) == 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
11003   {
11004   if (!detect_fast_forward_skip(common, &private_data_size) && !common->has_skip_in_assert_back)
11005     detect_fast_fail(common, common->start, &private_data_size, 4);
11006   }
11007 
11008 SLJIT_ASSERT(common->fast_fail_start_ptr <= common->fast_fail_end_ptr);
11009 
11010 if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
11011   {
11012   SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
11013   SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11014   return;
11015   }
11016 
11017 if (common->has_then)
11018   {
11019   common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length);
11020   memset(common->then_offsets, 0, total_length);
11021   set_then_offsets(common, common->start, NULL);
11022   }
11023 
11024 compiler = sljit_create_compiler(NULL);
11025 if (!compiler)
11026   {
11027   SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11028   SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
11029   return;
11030   }
11031 common->compiler = compiler;
11032 
11033 /* Main pcre_jit_exec entry. */
11034 sljit_emit_enter(compiler, 0, 1, 5, 5, 0, 0, private_data_size);
11035 
11036 /* Register init. */
11037 reset_ovector(common, (re->top_bracket + 1) * 2);
11038 if (common->req_char_ptr != 0)
11039   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
11040 
11041 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
11042 OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
11043 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
11044 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
11045 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
11046 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
11047 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, base));
11048 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, limit));
11049 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
11050 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
11051 
11052 if (common->fast_fail_start_ptr < common->fast_fail_end_ptr)
11053   reset_fast_fail(common);
11054 
11055 if (mode == JIT_PARTIAL_SOFT_COMPILE)
11056   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
11057 if (common->mark_ptr != 0)
11058   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
11059 if (common->control_head_ptr != 0)
11060   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
11061 
11062 /* Main part of the matching */
11063 if ((re->options & PCRE_ANCHORED) == 0)
11064   {
11065   mainloop_label = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0);
11066   continue_match_label = LABEL();
11067   /* Forward search if possible. */
11068   if ((re->options & PCRE_NO_START_OPTIMIZE) == 0)
11069     {
11070     if (mode == JIT_COMPILE && fast_forward_first_n_chars(common))
11071       ;
11072     else if ((re->flags & PCRE_FIRSTSET) != 0)
11073       fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0);
11074     else if ((re->flags & PCRE_STARTLINE) != 0)
11075       fast_forward_newline(common);
11076     else if (study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)
11077       fast_forward_start_bits(common, study->start_bits);
11078     }
11079   }
11080 else
11081   continue_match_label = LABEL();
11082 
11083 if (mode == JIT_COMPILE && study->minlength > 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
11084   {
11085   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
11086   OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength));
11087   minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
11088   }
11089 if (common->req_char_ptr != 0)
11090   reqbyte_notfound = search_requested_char(common, (pcre_uchar)re->req_char, (re->flags & PCRE_RCH_CASELESS) != 0, (re->flags & PCRE_FIRSTSET) != 0);
11091 
11092 /* Store the current STR_PTR in OVECTOR(0). */
11093 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
11094 /* Copy the limit of allowed recursions. */
11095 OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
11096 if (common->capture_last_ptr != 0)
11097   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, -1);
11098 if (common->fast_forward_bc_ptr != NULL)
11099   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1), STR_PTR, 0);
11100 
11101 if (common->start_ptr != OVECTOR(0))
11102   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
11103 
11104 /* Copy the beginning of the string. */
11105 if (mode == JIT_PARTIAL_SOFT_COMPILE)
11106   {
11107   jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
11108   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
11109   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start + sizeof(sljit_sw), STR_PTR, 0);
11110   JUMPHERE(jump);
11111   }
11112 else if (mode == JIT_PARTIAL_HARD_COMPILE)
11113   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
11114 
11115 compile_matchingpath(common, common->start, ccend, &rootbacktrack);
11116 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11117   {
11118   sljit_free_compiler(compiler);
11119   SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11120   SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
11121   free_read_only_data(common->read_only_data_head, compiler->allocator_data);
11122   return;
11123   }
11124 
11125 if (common->might_be_empty)
11126   {
11127   empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
11128   empty_match_found_label = LABEL();
11129   }
11130 
11131 common->accept_label = LABEL();
11132 if (common->accept != NULL)
11133   set_jumps(common->accept, common->accept_label);
11134 
11135 /* This means we have a match. Update the ovector. */
11136 copy_ovector(common, re->top_bracket + 1);
11137 common->quit_label = common->forced_quit_label = LABEL();
11138 if (common->quit != NULL)
11139   set_jumps(common->quit, common->quit_label);
11140 if (common->forced_quit != NULL)
11141   set_jumps(common->forced_quit, common->forced_quit_label);
11142 if (minlength_check_failed != NULL)
11143   SET_LABEL(minlength_check_failed, common->forced_quit_label);
11144 sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
11145 
11146 if (mode != JIT_COMPILE)
11147   {
11148   common->partialmatchlabel = LABEL();
11149   set_jumps(common->partialmatch, common->partialmatchlabel);
11150   return_with_partial_match(common, common->quit_label);
11151   }
11152 
11153 if (common->might_be_empty)
11154   empty_match_backtrack_label = LABEL();
11155 compile_backtrackingpath(common, rootbacktrack.top);
11156 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11157   {
11158   sljit_free_compiler(compiler);
11159   SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11160   SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
11161   free_read_only_data(common->read_only_data_head, compiler->allocator_data);
11162   return;
11163   }
11164 
11165 SLJIT_ASSERT(rootbacktrack.prev == NULL);
11166 reset_match_label = LABEL();
11167 
11168 if (mode == JIT_PARTIAL_SOFT_COMPILE)
11169   {
11170   /* Update hit_start only in the first time. */
11171   jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
11172   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr);
11173   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
11174   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
11175   JUMPHERE(jump);
11176   }
11177 
11178 /* Check we have remaining characters. */
11179 if ((re->options & PCRE_ANCHORED) == 0 && (re->options & PCRE_FIRSTLINE) != 0)
11180   {
11181   SLJIT_ASSERT(common->match_end_ptr != 0);
11182   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
11183   }
11184 
11185 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
11186     (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1)) : common->start_ptr);
11187 
11188 if ((re->options & PCRE_ANCHORED) == 0)
11189   {
11190   if (common->ff_newline_shortcut != NULL)
11191     {
11192     if ((re->options & PCRE_FIRSTLINE) == 0)
11193       CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
11194     /* There cannot be more newlines here. */
11195     }
11196   else
11197     CMPTO(SLJIT_LESS, STR_PTR, 0, ((re->options & PCRE_FIRSTLINE) == 0) ? STR_END : TMP1, 0, mainloop_label);
11198   }
11199 
11200 /* No more remaining characters. */
11201 if (reqbyte_notfound != NULL)
11202   JUMPHERE(reqbyte_notfound);
11203 
11204 if (mode == JIT_PARTIAL_SOFT_COMPILE)
11205   CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
11206 
11207 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
11208 JUMPTO(SLJIT_JUMP, common->quit_label);
11209 
11210 flush_stubs(common);
11211 
11212 if (common->might_be_empty)
11213   {
11214   JUMPHERE(empty_match);
11215   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11216   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
11217   CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_backtrack_label);
11218   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
11219   CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found_label);
11220   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
11221   CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
11222   JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
11223   }
11224 
11225 common->fast_forward_bc_ptr = NULL;
11226 common->fast_fail_start_ptr = 0;
11227 common->fast_fail_end_ptr = 0;
11228 common->currententry = common->entries;
11229 common->local_exit = TRUE;
11230 quit_label = common->quit_label;
11231 while (common->currententry != NULL)
11232   {
11233   /* Might add new entries. */
11234   compile_recurse(common);
11235   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11236     {
11237     sljit_free_compiler(compiler);
11238     SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11239     SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
11240     free_read_only_data(common->read_only_data_head, compiler->allocator_data);
11241     return;
11242     }
11243   flush_stubs(common);
11244   common->currententry = common->currententry->next;
11245   }
11246 common->local_exit = FALSE;
11247 common->quit_label = quit_label;
11248 
11249 /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
11250 /* This is a (really) rare case. */
11251 set_jumps(common->stackalloc, LABEL());
11252 /* RETURN_ADDR is not a saved register. */
11253 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
11254 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
11255 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11256 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
11257 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top), STACK_TOP, 0);
11258 OP2(SLJIT_SUB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit), SLJIT_IMM, STACK_GROWTH_RATE);
11259 
11260 sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
11261 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
11262 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11263 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
11264 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top));
11265 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit));
11266 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
11267 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
11268 
11269 /* Allocation failed. */
11270 JUMPHERE(jump);
11271 /* We break the return address cache here, but this is a really rare case. */
11272 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_JIT_STACKLIMIT);
11273 JUMPTO(SLJIT_JUMP, common->quit_label);
11274 
11275 /* Call limit reached. */
11276 set_jumps(common->calllimit, LABEL());
11277 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_MATCHLIMIT);
11278 JUMPTO(SLJIT_JUMP, common->quit_label);
11279 
11280 if (common->revertframes != NULL)
11281   {
11282   set_jumps(common->revertframes, LABEL());
11283   do_revertframes(common);
11284   }
11285 if (common->wordboundary != NULL)
11286   {
11287   set_jumps(common->wordboundary, LABEL());
11288   check_wordboundary(common);
11289   }
11290 if (common->anynewline != NULL)
11291   {
11292   set_jumps(common->anynewline, LABEL());
11293   check_anynewline(common);
11294   }
11295 if (common->hspace != NULL)
11296   {
11297   set_jumps(common->hspace, LABEL());
11298   check_hspace(common);
11299   }
11300 if (common->vspace != NULL)
11301   {
11302   set_jumps(common->vspace, LABEL());
11303   check_vspace(common);
11304   }
11305 if (common->casefulcmp != NULL)
11306   {
11307   set_jumps(common->casefulcmp, LABEL());
11308   do_casefulcmp(common);
11309   }
11310 if (common->caselesscmp != NULL)
11311   {
11312   set_jumps(common->caselesscmp, LABEL());
11313   do_caselesscmp(common);
11314   }
11315 if (common->reset_match != NULL)
11316   {
11317   set_jumps(common->reset_match, LABEL());
11318   do_reset_match(common, (re->top_bracket + 1) * 2);
11319   CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
11320   OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
11321   JUMPTO(SLJIT_JUMP, reset_match_label);
11322   }
11323 #ifdef SUPPORT_UTF
11324 #ifdef COMPILE_PCRE8
11325 if (common->utfreadchar != NULL)
11326   {
11327   set_jumps(common->utfreadchar, LABEL());
11328   do_utfreadchar(common);
11329   }
11330 if (common->utfreadchar16 != NULL)
11331   {
11332   set_jumps(common->utfreadchar16, LABEL());
11333   do_utfreadchar16(common);
11334   }
11335 if (common->utfreadtype8 != NULL)
11336   {
11337   set_jumps(common->utfreadtype8, LABEL());
11338   do_utfreadtype8(common);
11339   }
11340 #endif /* COMPILE_PCRE8 */
11341 #endif /* SUPPORT_UTF */
11342 #ifdef SUPPORT_UCP
11343 if (common->getucd != NULL)
11344   {
11345   set_jumps(common->getucd, LABEL());
11346   do_getucd(common);
11347   }
11348 #endif
11349 
11350 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11351 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
11352 
11353 executable_func = sljit_generate_code(compiler);
11354 executable_size = sljit_get_generated_code_size(compiler);
11355 label_addr = common->label_addrs;
11356 while (label_addr != NULL)
11357   {
11358   *label_addr->update_addr = sljit_get_label_addr(label_addr->label);
11359   label_addr = label_addr->next;
11360   }
11361 sljit_free_compiler(compiler);
11362 if (executable_func == NULL)
11363   {
11364   free_read_only_data(common->read_only_data_head, compiler->allocator_data);
11365   return;
11366   }
11367 
11368 /* Reuse the function descriptor if possible. */
11369 if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && extra->executable_jit != NULL)
11370   functions = (executable_functions *)extra->executable_jit;
11371 else
11372   {
11373   /* Note: If your memory-checker has flagged the allocation below as a
11374    * memory leak, it is probably because you either forgot to call
11375    * pcre_free_study() (or pcre16_free_study()) on the pcre_extra (or
11376    * pcre16_extra) object, or you called said function after having
11377    * cleared the PCRE_EXTRA_EXECUTABLE_JIT bit from the "flags" field
11378    * of the object. (The function will only free the JIT data if the
11379    * bit remains set, as the bit indicates that the pointer to the data
11380    * is valid.)
11381    */
11382   functions = SLJIT_MALLOC(sizeof(executable_functions), compiler->allocator_data);
11383   if (functions == NULL)
11384     {
11385     /* This case is highly unlikely since we just recently
11386     freed a lot of memory. Not impossible though. */
11387     sljit_free_code(executable_func);
11388     free_read_only_data(common->read_only_data_head, compiler->allocator_data);
11389     return;
11390     }
11391   memset(functions, 0, sizeof(executable_functions));
11392   functions->top_bracket = (re->top_bracket + 1) * 2;
11393   functions->limit_match = (re->flags & PCRE_MLSET) != 0 ? re->limit_match : 0;
11394   extra->executable_jit = functions;
11395   extra->flags |= PCRE_EXTRA_EXECUTABLE_JIT;
11396   }
11397 
11398 functions->executable_funcs[mode] = executable_func;
11399 functions->read_only_data_heads[mode] = common->read_only_data_head;
11400 functions->executable_sizes[mode] = executable_size;
11401 }
11402 
jit_machine_stack_exec(jit_arguments * arguments,void * executable_func)11403 static SLJIT_NOINLINE int jit_machine_stack_exec(jit_arguments *arguments, void *executable_func)
11404 {
11405 union {
11406    void *executable_func;
11407    jit_function call_executable_func;
11408 } convert_executable_func;
11409 sljit_u8 local_space[MACHINE_STACK_SIZE];
11410 struct sljit_stack local_stack;
11411 
11412 local_stack.max_limit = local_space;
11413 local_stack.limit = local_space;
11414 local_stack.base = local_space + MACHINE_STACK_SIZE;
11415 local_stack.top = local_space + MACHINE_STACK_SIZE;
11416 arguments->stack = &local_stack;
11417 convert_executable_func.executable_func = executable_func;
11418 return convert_executable_func.call_executable_func(arguments);
11419 }
11420 
11421 int
PRIV(jit_exec)11422 PRIV(jit_exec)(const PUBL(extra) *extra_data, const pcre_uchar *subject,
11423   int length, int start_offset, int options, int *offsets, int offset_count)
11424 {
11425 executable_functions *functions = (executable_functions *)extra_data->executable_jit;
11426 union {
11427    void *executable_func;
11428    jit_function call_executable_func;
11429 } convert_executable_func;
11430 jit_arguments arguments;
11431 int max_offset_count;
11432 int retval;
11433 int mode = JIT_COMPILE;
11434 
11435 if ((options & PCRE_PARTIAL_HARD) != 0)
11436   mode = JIT_PARTIAL_HARD_COMPILE;
11437 else if ((options & PCRE_PARTIAL_SOFT) != 0)
11438   mode = JIT_PARTIAL_SOFT_COMPILE;
11439 
11440 if (functions->executable_funcs[mode] == NULL)
11441   return PCRE_ERROR_JIT_BADOPTION;
11442 
11443 /* Sanity checks should be handled by pcre_exec. */
11444 arguments.str = subject + start_offset;
11445 arguments.begin = subject;
11446 arguments.end = subject + length;
11447 arguments.mark_ptr = NULL;
11448 /* JIT decreases this value less frequently than the interpreter. */
11449 arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (sljit_u32)(extra_data->match_limit);
11450 if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
11451   arguments.limit_match = functions->limit_match;
11452 arguments.notbol = (options & PCRE_NOTBOL) != 0;
11453 arguments.noteol = (options & PCRE_NOTEOL) != 0;
11454 arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
11455 arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
11456 arguments.offsets = offsets;
11457 arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL;
11458 arguments.real_offset_count = offset_count;
11459 
11460 /* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
11461 the output vector for storing captured strings, with the remainder used as
11462 workspace. We don't need the workspace here. For compatibility, we limit the
11463 number of captured strings in the same way as pcre_exec(), so that the user
11464 gets the same result with and without JIT. */
11465 
11466 if (offset_count != 2)
11467   offset_count = ((offset_count - (offset_count % 3)) * 2) / 3;
11468 max_offset_count = functions->top_bracket;
11469 if (offset_count > max_offset_count)
11470   offset_count = max_offset_count;
11471 arguments.offset_count = offset_count;
11472 
11473 if (functions->callback)
11474   arguments.stack = (struct sljit_stack *)functions->callback(functions->userdata);
11475 else
11476   arguments.stack = (struct sljit_stack *)functions->userdata;
11477 
11478 if (arguments.stack == NULL)
11479   retval = jit_machine_stack_exec(&arguments, functions->executable_funcs[mode]);
11480 else
11481   {
11482   convert_executable_func.executable_func = functions->executable_funcs[mode];
11483   retval = convert_executable_func.call_executable_func(&arguments);
11484   }
11485 
11486 if (retval * 2 > offset_count)
11487   retval = 0;
11488 if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
11489   *(extra_data->mark) = arguments.mark_ptr;
11490 
11491 return retval;
11492 }
11493 
11494 #if defined COMPILE_PCRE8
11495 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_jit_exec(const pcre * argument_re,const pcre_extra * extra_data,PCRE_SPTR subject,int length,int start_offset,int options,int * offsets,int offset_count,pcre_jit_stack * stack)11496 pcre_jit_exec(const pcre *argument_re, const pcre_extra *extra_data,
11497   PCRE_SPTR subject, int length, int start_offset, int options,
11498   int *offsets, int offset_count, pcre_jit_stack *stack)
11499 #elif defined COMPILE_PCRE16
11500 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
11501 pcre16_jit_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
11502   PCRE_SPTR16 subject, int length, int start_offset, int options,
11503   int *offsets, int offset_count, pcre16_jit_stack *stack)
11504 #elif defined COMPILE_PCRE32
11505 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
11506 pcre32_jit_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
11507   PCRE_SPTR32 subject, int length, int start_offset, int options,
11508   int *offsets, int offset_count, pcre32_jit_stack *stack)
11509 #endif
11510 {
11511 pcre_uchar *subject_ptr = (pcre_uchar *)subject;
11512 executable_functions *functions = (executable_functions *)extra_data->executable_jit;
11513 union {
11514    void *executable_func;
11515    jit_function call_executable_func;
11516 } convert_executable_func;
11517 jit_arguments arguments;
11518 int max_offset_count;
11519 int retval;
11520 int mode = JIT_COMPILE;
11521 
11522 SLJIT_UNUSED_ARG(argument_re);
11523 
11524 /* Plausibility checks */
11525 if ((options & ~PUBLIC_JIT_EXEC_OPTIONS) != 0) return PCRE_ERROR_JIT_BADOPTION;
11526 
11527 if ((options & PCRE_PARTIAL_HARD) != 0)
11528   mode = JIT_PARTIAL_HARD_COMPILE;
11529 else if ((options & PCRE_PARTIAL_SOFT) != 0)
11530   mode = JIT_PARTIAL_SOFT_COMPILE;
11531 
11532 if (functions->executable_funcs[mode] == NULL)
11533   return PCRE_ERROR_JIT_BADOPTION;
11534 
11535 /* Sanity checks should be handled by pcre_exec. */
11536 arguments.stack = (struct sljit_stack *)stack;
11537 arguments.str = subject_ptr + start_offset;
11538 arguments.begin = subject_ptr;
11539 arguments.end = subject_ptr + length;
11540 arguments.mark_ptr = NULL;
11541 /* JIT decreases this value less frequently than the interpreter. */
11542 arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (sljit_u32)(extra_data->match_limit);
11543 if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
11544   arguments.limit_match = functions->limit_match;
11545 arguments.notbol = (options & PCRE_NOTBOL) != 0;
11546 arguments.noteol = (options & PCRE_NOTEOL) != 0;
11547 arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
11548 arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
11549 arguments.offsets = offsets;
11550 arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL;
11551 arguments.real_offset_count = offset_count;
11552 
11553 /* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
11554 the output vector for storing captured strings, with the remainder used as
11555 workspace. We don't need the workspace here. For compatibility, we limit the
11556 number of captured strings in the same way as pcre_exec(), so that the user
11557 gets the same result with and without JIT. */
11558 
11559 if (offset_count != 2)
11560   offset_count = ((offset_count - (offset_count % 3)) * 2) / 3;
11561 max_offset_count = functions->top_bracket;
11562 if (offset_count > max_offset_count)
11563   offset_count = max_offset_count;
11564 arguments.offset_count = offset_count;
11565 
11566 convert_executable_func.executable_func = functions->executable_funcs[mode];
11567 retval = convert_executable_func.call_executable_func(&arguments);
11568 
11569 if (retval * 2 > offset_count)
11570   retval = 0;
11571 if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
11572   *(extra_data->mark) = arguments.mark_ptr;
11573 
11574 return retval;
11575 }
11576 
11577 void
PRIV(jit_free)11578 PRIV(jit_free)(void *executable_funcs)
11579 {
11580 int i;
11581 executable_functions *functions = (executable_functions *)executable_funcs;
11582 for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
11583   {
11584   if (functions->executable_funcs[i] != NULL)
11585     sljit_free_code(functions->executable_funcs[i]);
11586   free_read_only_data(functions->read_only_data_heads[i], NULL);
11587   }
11588 SLJIT_FREE(functions, compiler->allocator_data);
11589 }
11590 
11591 int
PRIV(jit_get_size)11592 PRIV(jit_get_size)(void *executable_funcs)
11593 {
11594 int i;
11595 sljit_uw size = 0;
11596 sljit_uw *executable_sizes = ((executable_functions *)executable_funcs)->executable_sizes;
11597 for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
11598   size += executable_sizes[i];
11599 return (int)size;
11600 }
11601 
11602 const char*
PRIV(jit_get_target)11603 PRIV(jit_get_target)(void)
11604 {
11605 return sljit_get_platform_name();
11606 }
11607 
11608 #if defined COMPILE_PCRE8
11609 PCRE_EXP_DECL pcre_jit_stack *
pcre_jit_stack_alloc(int startsize,int maxsize)11610 pcre_jit_stack_alloc(int startsize, int maxsize)
11611 #elif defined COMPILE_PCRE16
11612 PCRE_EXP_DECL pcre16_jit_stack *
11613 pcre16_jit_stack_alloc(int startsize, int maxsize)
11614 #elif defined COMPILE_PCRE32
11615 PCRE_EXP_DECL pcre32_jit_stack *
11616 pcre32_jit_stack_alloc(int startsize, int maxsize)
11617 #endif
11618 {
11619 if (startsize < 1 || maxsize < 1)
11620   return NULL;
11621 if (startsize > maxsize)
11622   startsize = maxsize;
11623 startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
11624 maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
11625 return (PUBL(jit_stack)*)sljit_allocate_stack(startsize, maxsize, NULL);
11626 }
11627 
11628 #if defined COMPILE_PCRE8
11629 PCRE_EXP_DECL void
pcre_jit_stack_free(pcre_jit_stack * stack)11630 pcre_jit_stack_free(pcre_jit_stack *stack)
11631 #elif defined COMPILE_PCRE16
11632 PCRE_EXP_DECL void
11633 pcre16_jit_stack_free(pcre16_jit_stack *stack)
11634 #elif defined COMPILE_PCRE32
11635 PCRE_EXP_DECL void
11636 pcre32_jit_stack_free(pcre32_jit_stack *stack)
11637 #endif
11638 {
11639 sljit_free_stack((struct sljit_stack *)stack, NULL);
11640 }
11641 
11642 #if defined COMPILE_PCRE8
11643 PCRE_EXP_DECL void
pcre_assign_jit_stack(pcre_extra * extra,pcre_jit_callback callback,void * userdata)11644 pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
11645 #elif defined COMPILE_PCRE16
11646 PCRE_EXP_DECL void
11647 pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
11648 #elif defined COMPILE_PCRE32
11649 PCRE_EXP_DECL void
11650 pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata)
11651 #endif
11652 {
11653 executable_functions *functions;
11654 if (extra != NULL &&
11655     (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
11656     extra->executable_jit != NULL)
11657   {
11658   functions = (executable_functions *)extra->executable_jit;
11659   functions->callback = callback;
11660   functions->userdata = userdata;
11661   }
11662 }
11663 
11664 #if defined COMPILE_PCRE8
11665 PCRE_EXP_DECL void
pcre_jit_free_unused_memory(void)11666 pcre_jit_free_unused_memory(void)
11667 #elif defined COMPILE_PCRE16
11668 PCRE_EXP_DECL void
11669 pcre16_jit_free_unused_memory(void)
11670 #elif defined COMPILE_PCRE32
11671 PCRE_EXP_DECL void
11672 pcre32_jit_free_unused_memory(void)
11673 #endif
11674 {
11675 sljit_free_unused_memory_exec();
11676 }
11677 
11678 #else  /* SUPPORT_JIT */
11679 
11680 /* These are dummy functions to avoid linking errors when JIT support is not
11681 being compiled. */
11682 
11683 #if defined COMPILE_PCRE8
11684 PCRE_EXP_DECL pcre_jit_stack *
pcre_jit_stack_alloc(int startsize,int maxsize)11685 pcre_jit_stack_alloc(int startsize, int maxsize)
11686 #elif defined COMPILE_PCRE16
11687 PCRE_EXP_DECL pcre16_jit_stack *
11688 pcre16_jit_stack_alloc(int startsize, int maxsize)
11689 #elif defined COMPILE_PCRE32
11690 PCRE_EXP_DECL pcre32_jit_stack *
11691 pcre32_jit_stack_alloc(int startsize, int maxsize)
11692 #endif
11693 {
11694 (void)startsize;
11695 (void)maxsize;
11696 return NULL;
11697 }
11698 
11699 #if defined COMPILE_PCRE8
11700 PCRE_EXP_DECL void
pcre_jit_stack_free(pcre_jit_stack * stack)11701 pcre_jit_stack_free(pcre_jit_stack *stack)
11702 #elif defined COMPILE_PCRE16
11703 PCRE_EXP_DECL void
11704 pcre16_jit_stack_free(pcre16_jit_stack *stack)
11705 #elif defined COMPILE_PCRE32
11706 PCRE_EXP_DECL void
11707 pcre32_jit_stack_free(pcre32_jit_stack *stack)
11708 #endif
11709 {
11710 (void)stack;
11711 }
11712 
11713 #if defined COMPILE_PCRE8
11714 PCRE_EXP_DECL void
pcre_assign_jit_stack(pcre_extra * extra,pcre_jit_callback callback,void * userdata)11715 pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
11716 #elif defined COMPILE_PCRE16
11717 PCRE_EXP_DECL void
11718 pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
11719 #elif defined COMPILE_PCRE32
11720 PCRE_EXP_DECL void
11721 pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata)
11722 #endif
11723 {
11724 (void)extra;
11725 (void)callback;
11726 (void)userdata;
11727 }
11728 
11729 #if defined COMPILE_PCRE8
11730 PCRE_EXP_DECL void
pcre_jit_free_unused_memory(void)11731 pcre_jit_free_unused_memory(void)
11732 #elif defined COMPILE_PCRE16
11733 PCRE_EXP_DECL void
11734 pcre16_jit_free_unused_memory(void)
11735 #elif defined COMPILE_PCRE32
11736 PCRE_EXP_DECL void
11737 pcre32_jit_free_unused_memory(void)
11738 #endif
11739 {
11740 }
11741 
11742 #endif
11743 
11744 /* End of pcre_jit_compile.c */
11745