xref: /PHP-5.4/ext/pcre/pcrelib/pcre_jit_compile.c (revision 95fa7279)
1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9            Copyright (c) 1997-2013 University of Cambridge
10 
11   The machine code generator part (this module) was written by Zoltan Herczeg
12                       Copyright (c) 2010-2013
13 
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17 
18     * Redistributions of source code must retain the above copyright notice,
19       this list of conditions and the following disclaimer.
20 
21     * Redistributions in binary form must reproduce the above copyright
22       notice, this list of conditions and the following disclaimer in the
23       documentation and/or other materials provided with the distribution.
24 
25     * Neither the name of the University of Cambridge nor the names of its
26       contributors may be used to endorse or promote products derived from
27       this software without specific prior written permission.
28 
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42 
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46 
47 #include "pcre_internal.h"
48 
49 #if defined SUPPORT_JIT
50 
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54 
55 #define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61 
62 #include "sljit/sljitLir.c"
63 
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67 
68 /* Defines for debugging purposes. */
69 
70 /* 1 - Use unoptimized capturing brackets.
71    2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73 
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76 
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80 
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84 
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89 
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92 
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98 
99   'ab' - 'a' and 'b' regexps are concatenated
100   'a+' - 'a' is the sub-expression of the '+' operator
101 
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107 
108  Greedy star operator (*) :
109    Matching path: match happens.
110    Backtrack path: match failed.
111  Non-greedy star operator (*?) :
112    Matching path: no need to perform a match.
113    Backtrack path: match is required.
114 
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118 
119    A(B|C)D
120 
121 The generated code will be the following:
122 
123  A matching path
124  '(' matching path (pushing arguments to the stack)
125  B matching path
126  ')' matching path (pushing arguments to the stack)
127  D matching path
128  return with successful match
129 
130  D backtrack path
131  ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132  B backtrack path
133  C expected path
134  jump to D matching path
135  C backtrack path
136  A backtrack path
137 
138  Notice, that the order of backtrack code paths are the opposite of the fast
139  code paths. In this way the topmost value on the stack is always belong
140  to the current backtrack code path. The backtrack path must check
141  whether there is a next alternative. If so, it needs to jump back to
142  the matching path eventually. Otherwise it needs to clear out its own stack
143  frame and continue the execution on the backtrack code paths.
144 */
145 
146 /*
147 Saved stack frames:
148 
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153 
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156 
157 Thus we can restore the private data to a particular point in the stack.
158 */
159 
160 typedef struct jit_arguments {
161   /* Pointers first. */
162   struct sljit_stack *stack;
163   const pcre_uchar *str;
164   const pcre_uchar *begin;
165   const pcre_uchar *end;
166   int *offsets;
167   pcre_uchar *uchar_ptr;
168   pcre_uchar *mark_ptr;
169   void *callout_data;
170   /* Everything else after. */
171   pcre_uint32 limit_match;
172   int real_offset_count;
173   int offset_count;
174   pcre_uint8 notbol;
175   pcre_uint8 noteol;
176   pcre_uint8 notempty;
177   pcre_uint8 notempty_atstart;
178 } jit_arguments;
179 
180 typedef struct executable_functions {
181   void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182   void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
183   sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
184   PUBL(jit_callback) callback;
185   void *userdata;
186   pcre_uint32 top_bracket;
187   pcre_uint32 limit_match;
188 } executable_functions;
189 
190 typedef struct jump_list {
191   struct sljit_jump *jump;
192   struct jump_list *next;
193 } jump_list;
194 
195 typedef struct stub_list {
196   struct sljit_jump *start;
197   struct sljit_label *quit;
198   struct stub_list *next;
199 } stub_list;
200 
201 typedef struct label_addr_list {
202   struct sljit_label *label;
203   sljit_uw *update_addr;
204   struct label_addr_list *next;
205 } label_addr_list;
206 
207 enum frame_types {
208   no_frame = -1,
209   no_stack = -2
210 };
211 
212 enum control_types {
213   type_mark = 0,
214   type_then_trap = 1
215 };
216 
217 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
218 
219 /* The following structure is the key data type for the recursive
220 code generator. It is allocated by compile_matchingpath, and contains
221 the arguments for compile_backtrackingpath. Must be the first member
222 of its descendants. */
223 typedef struct backtrack_common {
224   /* Concatenation stack. */
225   struct backtrack_common *prev;
226   jump_list *nextbacktracks;
227   /* Internal stack (for component operators). */
228   struct backtrack_common *top;
229   jump_list *topbacktracks;
230   /* Opcode pointer. */
231   pcre_uchar *cc;
232 } backtrack_common;
233 
234 typedef struct assert_backtrack {
235   backtrack_common common;
236   jump_list *condfailed;
237   /* Less than 0 if a frame is not needed. */
238   int framesize;
239   /* Points to our private memory word on the stack. */
240   int private_data_ptr;
241   /* For iterators. */
242   struct sljit_label *matchingpath;
243 } assert_backtrack;
244 
245 typedef struct bracket_backtrack {
246   backtrack_common common;
247   /* Where to coninue if an alternative is successfully matched. */
248   struct sljit_label *alternative_matchingpath;
249   /* For rmin and rmax iterators. */
250   struct sljit_label *recursive_matchingpath;
251   /* For greedy ? operator. */
252   struct sljit_label *zero_matchingpath;
253   /* Contains the branches of a failed condition. */
254   union {
255     /* Both for OP_COND, OP_SCOND. */
256     jump_list *condfailed;
257     assert_backtrack *assert;
258     /* For OP_ONCE. Less than 0 if not needed. */
259     int framesize;
260   } u;
261   /* Points to our private memory word on the stack. */
262   int private_data_ptr;
263 } bracket_backtrack;
264 
265 typedef struct bracketpos_backtrack {
266   backtrack_common common;
267   /* Points to our private memory word on the stack. */
268   int private_data_ptr;
269   /* Reverting stack is needed. */
270   int framesize;
271   /* Allocated stack size. */
272   int stacksize;
273 } bracketpos_backtrack;
274 
275 typedef struct braminzero_backtrack {
276   backtrack_common common;
277   struct sljit_label *matchingpath;
278 } braminzero_backtrack;
279 
280 typedef struct iterator_backtrack {
281   backtrack_common common;
282   /* Next iteration. */
283   struct sljit_label *matchingpath;
284 } iterator_backtrack;
285 
286 typedef struct recurse_entry {
287   struct recurse_entry *next;
288   /* Contains the function entry. */
289   struct sljit_label *entry;
290   /* Collects the calls until the function is not created. */
291   jump_list *calls;
292   /* Points to the starting opcode. */
293   sljit_sw start;
294 } recurse_entry;
295 
296 typedef struct recurse_backtrack {
297   backtrack_common common;
298   BOOL inlined_pattern;
299 } recurse_backtrack;
300 
301 #define OP_THEN_TRAP OP_TABLE_LENGTH
302 
303 typedef struct then_trap_backtrack {
304   backtrack_common common;
305   /* If then_trap is not NULL, this structure contains the real
306   then_trap for the backtracking path. */
307   struct then_trap_backtrack *then_trap;
308   /* Points to the starting opcode. */
309   sljit_sw start;
310   /* Exit point for the then opcodes of this alternative. */
311   jump_list *quit;
312   /* Frame size of the current alternative. */
313   int framesize;
314 } then_trap_backtrack;
315 
316 #define MAX_RANGE_SIZE 4
317 
318 typedef struct compiler_common {
319   /* The sljit ceneric compiler. */
320   struct sljit_compiler *compiler;
321   /* First byte code. */
322   pcre_uchar *start;
323   /* Maps private data offset to each opcode. */
324   sljit_si *private_data_ptrs;
325   /* Chain list of read-only data ptrs. */
326   void *read_only_data_head;
327   /* Tells whether the capturing bracket is optimized. */
328   pcre_uint8 *optimized_cbracket;
329   /* Tells whether the starting offset is a target of then. */
330   pcre_uint8 *then_offsets;
331   /* Current position where a THEN must jump. */
332   then_trap_backtrack *then_trap;
333   /* Starting offset of private data for capturing brackets. */
334   int cbra_ptr;
335   /* Output vector starting point. Must be divisible by 2. */
336   int ovector_start;
337   /* Last known position of the requested byte. */
338   int req_char_ptr;
339   /* Head of the last recursion. */
340   int recursive_head_ptr;
341   /* First inspected character for partial matching. */
342   int start_used_ptr;
343   /* Starting pointer for partial soft matches. */
344   int hit_start;
345   /* End pointer of the first line. */
346   int first_line_end;
347   /* Points to the marked string. */
348   int mark_ptr;
349   /* Recursive control verb management chain. */
350   int control_head_ptr;
351   /* Points to the last matched capture block index. */
352   int capture_last_ptr;
353   /* Points to the starting position of the current match. */
354   int start_ptr;
355 
356   /* Flipped and lower case tables. */
357   const pcre_uint8 *fcc;
358   sljit_sw lcc;
359   /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
360   int mode;
361   /* TRUE, when minlength is greater than 0. */
362   BOOL might_be_empty;
363   /* \K is found in the pattern. */
364   BOOL has_set_som;
365   /* (*SKIP:arg) is found in the pattern. */
366   BOOL has_skip_arg;
367   /* (*THEN) is found in the pattern. */
368   BOOL has_then;
369   /* Needs to know the start position anytime. */
370   BOOL needs_start_ptr;
371   /* Currently in recurse or negative assert. */
372   BOOL local_exit;
373   /* Currently in a positive assert. */
374   BOOL positive_assert;
375   /* Newline control. */
376   int nltype;
377   pcre_uint32 nlmax;
378   pcre_uint32 nlmin;
379   int newline;
380   int bsr_nltype;
381   pcre_uint32 bsr_nlmax;
382   pcre_uint32 bsr_nlmin;
383   /* Dollar endonly. */
384   int endonly;
385   /* Tables. */
386   sljit_sw ctypes;
387   /* Named capturing brackets. */
388   pcre_uchar *name_table;
389   sljit_sw name_count;
390   sljit_sw name_entry_size;
391 
392   /* Labels and jump lists. */
393   struct sljit_label *partialmatchlabel;
394   struct sljit_label *quit_label;
395   struct sljit_label *forced_quit_label;
396   struct sljit_label *accept_label;
397   struct sljit_label *ff_newline_shortcut;
398   stub_list *stubs;
399   label_addr_list *label_addrs;
400   recurse_entry *entries;
401   recurse_entry *currententry;
402   jump_list *partialmatch;
403   jump_list *quit;
404   jump_list *positive_assert_quit;
405   jump_list *forced_quit;
406   jump_list *accept;
407   jump_list *calllimit;
408   jump_list *stackalloc;
409   jump_list *revertframes;
410   jump_list *wordboundary;
411   jump_list *anynewline;
412   jump_list *hspace;
413   jump_list *vspace;
414   jump_list *casefulcmp;
415   jump_list *caselesscmp;
416   jump_list *reset_match;
417   BOOL jscript_compat;
418 #ifdef SUPPORT_UTF
419   BOOL utf;
420 #ifdef SUPPORT_UCP
421   BOOL use_ucp;
422 #endif
423 #ifdef COMPILE_PCRE8
424   jump_list *utfreadchar;
425   jump_list *utfreadchar16;
426   jump_list *utfreadtype8;
427 #endif
428 #endif /* SUPPORT_UTF */
429 #ifdef SUPPORT_UCP
430   jump_list *getucd;
431 #endif
432 } compiler_common;
433 
434 /* For byte_sequence_compare. */
435 
436 typedef struct compare_context {
437   int length;
438   int sourcereg;
439 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
440   int ucharptr;
441   union {
442     sljit_si asint;
443     sljit_uh asushort;
444 #if defined COMPILE_PCRE8
445     sljit_ub asbyte;
446     sljit_ub asuchars[4];
447 #elif defined COMPILE_PCRE16
448     sljit_uh asuchars[2];
449 #elif defined COMPILE_PCRE32
450     sljit_ui asuchars[1];
451 #endif
452   } c;
453   union {
454     sljit_si asint;
455     sljit_uh asushort;
456 #if defined COMPILE_PCRE8
457     sljit_ub asbyte;
458     sljit_ub asuchars[4];
459 #elif defined COMPILE_PCRE16
460     sljit_uh asuchars[2];
461 #elif defined COMPILE_PCRE32
462     sljit_ui asuchars[1];
463 #endif
464   } oc;
465 #endif
466 } compare_context;
467 
468 /* Undefine sljit macros. */
469 #undef CMP
470 
471 /* Used for accessing the elements of the stack. */
472 #define STACK(i)      ((-(i) - 1) * (int)sizeof(sljit_sw))
473 
474 #define TMP1          SLJIT_R0
475 #define TMP2          SLJIT_R2
476 #define TMP3          SLJIT_R3
477 #define STR_PTR       SLJIT_S0
478 #define STR_END       SLJIT_S1
479 #define STACK_TOP     SLJIT_R1
480 #define STACK_LIMIT   SLJIT_S2
481 #define COUNT_MATCH   SLJIT_S3
482 #define ARGUMENTS     SLJIT_S4
483 #define RETURN_ADDR   SLJIT_R4
484 
485 /* Local space layout. */
486 /* These two locals can be used by the current opcode. */
487 #define LOCALS0          (0 * sizeof(sljit_sw))
488 #define LOCALS1          (1 * sizeof(sljit_sw))
489 /* Two local variables for possessive quantifiers (char1 cannot use them). */
490 #define POSSESSIVE0      (2 * sizeof(sljit_sw))
491 #define POSSESSIVE1      (3 * sizeof(sljit_sw))
492 /* Max limit of recursions. */
493 #define LIMIT_MATCH      (4 * sizeof(sljit_sw))
494 /* The output vector is stored on the stack, and contains pointers
495 to characters. The vector data is divided into two groups: the first
496 group contains the start / end character pointers, and the second is
497 the start pointers when the end of the capturing group has not yet reached. */
498 #define OVECTOR_START    (common->ovector_start)
499 #define OVECTOR(i)       (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
500 #define OVECTOR_PRIV(i)  (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
501 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
502 
503 #if defined COMPILE_PCRE8
504 #define MOV_UCHAR  SLJIT_MOV_UB
505 #define MOVU_UCHAR SLJIT_MOVU_UB
506 #elif defined COMPILE_PCRE16
507 #define MOV_UCHAR  SLJIT_MOV_UH
508 #define MOVU_UCHAR SLJIT_MOVU_UH
509 #elif defined COMPILE_PCRE32
510 #define MOV_UCHAR  SLJIT_MOV_UI
511 #define MOVU_UCHAR SLJIT_MOVU_UI
512 #else
513 #error Unsupported compiling mode
514 #endif
515 
516 /* Shortcuts. */
517 #define DEFINE_COMPILER \
518   struct sljit_compiler *compiler = common->compiler
519 #define OP1(op, dst, dstw, src, srcw) \
520   sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
521 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
522   sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
523 #define LABEL() \
524   sljit_emit_label(compiler)
525 #define JUMP(type) \
526   sljit_emit_jump(compiler, (type))
527 #define JUMPTO(type, label) \
528   sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
529 #define JUMPHERE(jump) \
530   sljit_set_label((jump), sljit_emit_label(compiler))
531 #define SET_LABEL(jump, label) \
532   sljit_set_label((jump), (label))
533 #define CMP(type, src1, src1w, src2, src2w) \
534   sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
535 #define CMPTO(type, src1, src1w, src2, src2w, label) \
536   sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
537 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
538   sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
539 #define GET_LOCAL_BASE(dst, dstw, offset) \
540   sljit_get_local_base(compiler, (dst), (dstw), (offset))
541 
542 #define READ_CHAR_MAX 0x7fffffff
543 
bracketend(pcre_uchar * cc)544 static pcre_uchar *bracketend(pcre_uchar *cc)
545 {
546 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
547 do cc += GET(cc, 1); while (*cc == OP_ALT);
548 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
549 cc += 1 + LINK_SIZE;
550 return cc;
551 }
552 
no_alternatives(pcre_uchar * cc)553 static int no_alternatives(pcre_uchar *cc)
554 {
555 int count = 0;
556 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
557 do
558   {
559   cc += GET(cc, 1);
560   count++;
561   }
562 while (*cc == OP_ALT);
563 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
564 return count;
565 }
566 
567 static int ones_in_half_byte[16] = {
568   /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
569   /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
570 };
571 
572 /* Functions whose might need modification for all new supported opcodes:
573  next_opcode
574  check_opcode_types
575  set_private_data_ptrs
576  get_framesize
577  init_frame
578  get_private_data_copy_length
579  copy_private_data
580  compile_matchingpath
581  compile_backtrackingpath
582 */
583 
next_opcode(compiler_common * common,pcre_uchar * cc)584 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
585 {
586 SLJIT_UNUSED_ARG(common);
587 switch(*cc)
588   {
589   case OP_SOD:
590   case OP_SOM:
591   case OP_SET_SOM:
592   case OP_NOT_WORD_BOUNDARY:
593   case OP_WORD_BOUNDARY:
594   case OP_NOT_DIGIT:
595   case OP_DIGIT:
596   case OP_NOT_WHITESPACE:
597   case OP_WHITESPACE:
598   case OP_NOT_WORDCHAR:
599   case OP_WORDCHAR:
600   case OP_ANY:
601   case OP_ALLANY:
602   case OP_NOTPROP:
603   case OP_PROP:
604   case OP_ANYNL:
605   case OP_NOT_HSPACE:
606   case OP_HSPACE:
607   case OP_NOT_VSPACE:
608   case OP_VSPACE:
609   case OP_EXTUNI:
610   case OP_EODN:
611   case OP_EOD:
612   case OP_CIRC:
613   case OP_CIRCM:
614   case OP_DOLL:
615   case OP_DOLLM:
616   case OP_CRSTAR:
617   case OP_CRMINSTAR:
618   case OP_CRPLUS:
619   case OP_CRMINPLUS:
620   case OP_CRQUERY:
621   case OP_CRMINQUERY:
622   case OP_CRRANGE:
623   case OP_CRMINRANGE:
624   case OP_CRPOSSTAR:
625   case OP_CRPOSPLUS:
626   case OP_CRPOSQUERY:
627   case OP_CRPOSRANGE:
628   case OP_CLASS:
629   case OP_NCLASS:
630   case OP_REF:
631   case OP_REFI:
632   case OP_DNREF:
633   case OP_DNREFI:
634   case OP_RECURSE:
635   case OP_CALLOUT:
636   case OP_ALT:
637   case OP_KET:
638   case OP_KETRMAX:
639   case OP_KETRMIN:
640   case OP_KETRPOS:
641   case OP_REVERSE:
642   case OP_ASSERT:
643   case OP_ASSERT_NOT:
644   case OP_ASSERTBACK:
645   case OP_ASSERTBACK_NOT:
646   case OP_ONCE:
647   case OP_ONCE_NC:
648   case OP_BRA:
649   case OP_BRAPOS:
650   case OP_CBRA:
651   case OP_CBRAPOS:
652   case OP_COND:
653   case OP_SBRA:
654   case OP_SBRAPOS:
655   case OP_SCBRA:
656   case OP_SCBRAPOS:
657   case OP_SCOND:
658   case OP_CREF:
659   case OP_DNCREF:
660   case OP_RREF:
661   case OP_DNRREF:
662   case OP_DEF:
663   case OP_BRAZERO:
664   case OP_BRAMINZERO:
665   case OP_BRAPOSZERO:
666   case OP_PRUNE:
667   case OP_SKIP:
668   case OP_THEN:
669   case OP_COMMIT:
670   case OP_FAIL:
671   case OP_ACCEPT:
672   case OP_ASSERT_ACCEPT:
673   case OP_CLOSE:
674   case OP_SKIPZERO:
675   return cc + PRIV(OP_lengths)[*cc];
676 
677   case OP_CHAR:
678   case OP_CHARI:
679   case OP_NOT:
680   case OP_NOTI:
681   case OP_STAR:
682   case OP_MINSTAR:
683   case OP_PLUS:
684   case OP_MINPLUS:
685   case OP_QUERY:
686   case OP_MINQUERY:
687   case OP_UPTO:
688   case OP_MINUPTO:
689   case OP_EXACT:
690   case OP_POSSTAR:
691   case OP_POSPLUS:
692   case OP_POSQUERY:
693   case OP_POSUPTO:
694   case OP_STARI:
695   case OP_MINSTARI:
696   case OP_PLUSI:
697   case OP_MINPLUSI:
698   case OP_QUERYI:
699   case OP_MINQUERYI:
700   case OP_UPTOI:
701   case OP_MINUPTOI:
702   case OP_EXACTI:
703   case OP_POSSTARI:
704   case OP_POSPLUSI:
705   case OP_POSQUERYI:
706   case OP_POSUPTOI:
707   case OP_NOTSTAR:
708   case OP_NOTMINSTAR:
709   case OP_NOTPLUS:
710   case OP_NOTMINPLUS:
711   case OP_NOTQUERY:
712   case OP_NOTMINQUERY:
713   case OP_NOTUPTO:
714   case OP_NOTMINUPTO:
715   case OP_NOTEXACT:
716   case OP_NOTPOSSTAR:
717   case OP_NOTPOSPLUS:
718   case OP_NOTPOSQUERY:
719   case OP_NOTPOSUPTO:
720   case OP_NOTSTARI:
721   case OP_NOTMINSTARI:
722   case OP_NOTPLUSI:
723   case OP_NOTMINPLUSI:
724   case OP_NOTQUERYI:
725   case OP_NOTMINQUERYI:
726   case OP_NOTUPTOI:
727   case OP_NOTMINUPTOI:
728   case OP_NOTEXACTI:
729   case OP_NOTPOSSTARI:
730   case OP_NOTPOSPLUSI:
731   case OP_NOTPOSQUERYI:
732   case OP_NOTPOSUPTOI:
733   cc += PRIV(OP_lengths)[*cc];
734 #ifdef SUPPORT_UTF
735   if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
736 #endif
737   return cc;
738 
739   /* Special cases. */
740   case OP_TYPESTAR:
741   case OP_TYPEMINSTAR:
742   case OP_TYPEPLUS:
743   case OP_TYPEMINPLUS:
744   case OP_TYPEQUERY:
745   case OP_TYPEMINQUERY:
746   case OP_TYPEUPTO:
747   case OP_TYPEMINUPTO:
748   case OP_TYPEEXACT:
749   case OP_TYPEPOSSTAR:
750   case OP_TYPEPOSPLUS:
751   case OP_TYPEPOSQUERY:
752   case OP_TYPEPOSUPTO:
753   return cc + PRIV(OP_lengths)[*cc] - 1;
754 
755   case OP_ANYBYTE:
756 #ifdef SUPPORT_UTF
757   if (common->utf) return NULL;
758 #endif
759   return cc + 1;
760 
761 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
762   case OP_XCLASS:
763   return cc + GET(cc, 1);
764 #endif
765 
766   case OP_MARK:
767   case OP_PRUNE_ARG:
768   case OP_SKIP_ARG:
769   case OP_THEN_ARG:
770   return cc + 1 + 2 + cc[1];
771 
772   default:
773   /* All opcodes are supported now! */
774   SLJIT_ASSERT_STOP();
775   return NULL;
776   }
777 }
778 
check_opcode_types(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend)779 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
780 {
781 int count;
782 pcre_uchar *slot;
783 
784 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
785 while (cc < ccend)
786   {
787   switch(*cc)
788     {
789     case OP_SET_SOM:
790     common->has_set_som = TRUE;
791     common->might_be_empty = TRUE;
792     cc += 1;
793     break;
794 
795     case OP_REF:
796     case OP_REFI:
797     common->optimized_cbracket[GET2(cc, 1)] = 0;
798     cc += 1 + IMM2_SIZE;
799     break;
800 
801     case OP_CBRAPOS:
802     case OP_SCBRAPOS:
803     common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
804     cc += 1 + LINK_SIZE + IMM2_SIZE;
805     break;
806 
807     case OP_COND:
808     case OP_SCOND:
809     /* Only AUTO_CALLOUT can insert this opcode. We do
810        not intend to support this case. */
811     if (cc[1 + LINK_SIZE] == OP_CALLOUT)
812       return FALSE;
813     cc += 1 + LINK_SIZE;
814     break;
815 
816     case OP_CREF:
817     common->optimized_cbracket[GET2(cc, 1)] = 0;
818     cc += 1 + IMM2_SIZE;
819     break;
820 
821     case OP_DNREF:
822     case OP_DNREFI:
823     case OP_DNCREF:
824     count = GET2(cc, 1 + IMM2_SIZE);
825     slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
826     while (count-- > 0)
827       {
828       common->optimized_cbracket[GET2(slot, 0)] = 0;
829       slot += common->name_entry_size;
830       }
831     cc += 1 + 2 * IMM2_SIZE;
832     break;
833 
834     case OP_RECURSE:
835     /* Set its value only once. */
836     if (common->recursive_head_ptr == 0)
837       {
838       common->recursive_head_ptr = common->ovector_start;
839       common->ovector_start += sizeof(sljit_sw);
840       }
841     cc += 1 + LINK_SIZE;
842     break;
843 
844     case OP_CALLOUT:
845     if (common->capture_last_ptr == 0)
846       {
847       common->capture_last_ptr = common->ovector_start;
848       common->ovector_start += sizeof(sljit_sw);
849       }
850     cc += 2 + 2 * LINK_SIZE;
851     break;
852 
853     case OP_THEN_ARG:
854     common->has_then = TRUE;
855     common->control_head_ptr = 1;
856     /* Fall through. */
857 
858     case OP_PRUNE_ARG:
859     common->needs_start_ptr = TRUE;
860     /* Fall through. */
861 
862     case OP_MARK:
863     if (common->mark_ptr == 0)
864       {
865       common->mark_ptr = common->ovector_start;
866       common->ovector_start += sizeof(sljit_sw);
867       }
868     cc += 1 + 2 + cc[1];
869     break;
870 
871     case OP_THEN:
872     common->has_then = TRUE;
873     common->control_head_ptr = 1;
874     /* Fall through. */
875 
876     case OP_PRUNE:
877     case OP_SKIP:
878     common->needs_start_ptr = TRUE;
879     cc += 1;
880     break;
881 
882     case OP_SKIP_ARG:
883     common->control_head_ptr = 1;
884     common->has_skip_arg = TRUE;
885     cc += 1 + 2 + cc[1];
886     break;
887 
888     default:
889     cc = next_opcode(common, cc);
890     if (cc == NULL)
891       return FALSE;
892     break;
893     }
894   }
895 return TRUE;
896 }
897 
get_class_iterator_size(pcre_uchar * cc)898 static int get_class_iterator_size(pcre_uchar *cc)
899 {
900 switch(*cc)
901   {
902   case OP_CRSTAR:
903   case OP_CRPLUS:
904   return 2;
905 
906   case OP_CRMINSTAR:
907   case OP_CRMINPLUS:
908   case OP_CRQUERY:
909   case OP_CRMINQUERY:
910   return 1;
911 
912   case OP_CRRANGE:
913   case OP_CRMINRANGE:
914   if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
915     return 0;
916   return 2;
917 
918   default:
919   return 0;
920   }
921 }
922 
detect_repeat(compiler_common * common,pcre_uchar * begin)923 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
924 {
925 pcre_uchar *end = bracketend(begin);
926 pcre_uchar *next;
927 pcre_uchar *next_end;
928 pcre_uchar *max_end;
929 pcre_uchar type;
930 sljit_sw length = end - begin;
931 int min, max, i;
932 
933 /* Detect fixed iterations first. */
934 if (end[-(1 + LINK_SIZE)] != OP_KET)
935   return FALSE;
936 
937 /* Already detected repeat. */
938 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
939   return TRUE;
940 
941 next = end;
942 min = 1;
943 while (1)
944   {
945   if (*next != *begin)
946     break;
947   next_end = bracketend(next);
948   if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
949     break;
950   next = next_end;
951   min++;
952   }
953 
954 if (min == 2)
955   return FALSE;
956 
957 max = 0;
958 max_end = next;
959 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
960   {
961   type = *next;
962   while (1)
963     {
964     if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
965       break;
966     next_end = bracketend(next + 2 + LINK_SIZE);
967     if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
968       break;
969     next = next_end;
970     max++;
971     }
972 
973   if (next[0] == type && next[1] == *begin && max >= 1)
974     {
975     next_end = bracketend(next + 1);
976     if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
977       {
978       for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
979         if (*next_end != OP_KET)
980           break;
981 
982       if (i == max)
983         {
984         common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
985         common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
986         /* +2 the original and the last. */
987         common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
988         if (min == 1)
989           return TRUE;
990         min--;
991         max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
992         }
993       }
994     }
995   }
996 
997 if (min >= 3)
998   {
999   common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1000   common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1001   common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1002   return TRUE;
1003   }
1004 
1005 return FALSE;
1006 }
1007 
1008 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1009     case OP_MINSTAR: \
1010     case OP_MINPLUS: \
1011     case OP_QUERY: \
1012     case OP_MINQUERY: \
1013     case OP_MINSTARI: \
1014     case OP_MINPLUSI: \
1015     case OP_QUERYI: \
1016     case OP_MINQUERYI: \
1017     case OP_NOTMINSTAR: \
1018     case OP_NOTMINPLUS: \
1019     case OP_NOTQUERY: \
1020     case OP_NOTMINQUERY: \
1021     case OP_NOTMINSTARI: \
1022     case OP_NOTMINPLUSI: \
1023     case OP_NOTQUERYI: \
1024     case OP_NOTMINQUERYI:
1025 
1026 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1027     case OP_STAR: \
1028     case OP_PLUS: \
1029     case OP_STARI: \
1030     case OP_PLUSI: \
1031     case OP_NOTSTAR: \
1032     case OP_NOTPLUS: \
1033     case OP_NOTSTARI: \
1034     case OP_NOTPLUSI:
1035 
1036 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1037     case OP_UPTO: \
1038     case OP_MINUPTO: \
1039     case OP_UPTOI: \
1040     case OP_MINUPTOI: \
1041     case OP_NOTUPTO: \
1042     case OP_NOTMINUPTO: \
1043     case OP_NOTUPTOI: \
1044     case OP_NOTMINUPTOI:
1045 
1046 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1047     case OP_TYPEMINSTAR: \
1048     case OP_TYPEMINPLUS: \
1049     case OP_TYPEQUERY: \
1050     case OP_TYPEMINQUERY:
1051 
1052 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1053     case OP_TYPESTAR: \
1054     case OP_TYPEPLUS:
1055 
1056 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1057     case OP_TYPEUPTO: \
1058     case OP_TYPEMINUPTO:
1059 
set_private_data_ptrs(compiler_common * common,int * private_data_start,pcre_uchar * ccend)1060 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1061 {
1062 pcre_uchar *cc = common->start;
1063 pcre_uchar *alternative;
1064 pcre_uchar *end = NULL;
1065 int private_data_ptr = *private_data_start;
1066 int space, size, bracketlen;
1067 
1068 while (cc < ccend)
1069   {
1070   space = 0;
1071   size = 0;
1072   bracketlen = 0;
1073   if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1074     return;
1075 
1076   if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)
1077     if (detect_repeat(common, cc))
1078       {
1079       /* These brackets are converted to repeats, so no global
1080       based single character repeat is allowed. */
1081       if (cc >= end)
1082         end = bracketend(cc);
1083       }
1084 
1085   switch(*cc)
1086     {
1087     case OP_KET:
1088     if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1089       {
1090       common->private_data_ptrs[cc - common->start] = private_data_ptr;
1091       private_data_ptr += sizeof(sljit_sw);
1092       cc += common->private_data_ptrs[cc + 1 - common->start];
1093       }
1094     cc += 1 + LINK_SIZE;
1095     break;
1096 
1097     case OP_ASSERT:
1098     case OP_ASSERT_NOT:
1099     case OP_ASSERTBACK:
1100     case OP_ASSERTBACK_NOT:
1101     case OP_ONCE:
1102     case OP_ONCE_NC:
1103     case OP_BRAPOS:
1104     case OP_SBRA:
1105     case OP_SBRAPOS:
1106     case OP_SCOND:
1107     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1108     private_data_ptr += sizeof(sljit_sw);
1109     bracketlen = 1 + LINK_SIZE;
1110     break;
1111 
1112     case OP_CBRAPOS:
1113     case OP_SCBRAPOS:
1114     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1115     private_data_ptr += sizeof(sljit_sw);
1116     bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1117     break;
1118 
1119     case OP_COND:
1120     /* Might be a hidden SCOND. */
1121     alternative = cc + GET(cc, 1);
1122     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1123       {
1124       common->private_data_ptrs[cc - common->start] = private_data_ptr;
1125       private_data_ptr += sizeof(sljit_sw);
1126       }
1127     bracketlen = 1 + LINK_SIZE;
1128     break;
1129 
1130     case OP_BRA:
1131     bracketlen = 1 + LINK_SIZE;
1132     break;
1133 
1134     case OP_CBRA:
1135     case OP_SCBRA:
1136     bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1137     break;
1138 
1139     CASE_ITERATOR_PRIVATE_DATA_1
1140     space = 1;
1141     size = -2;
1142     break;
1143 
1144     CASE_ITERATOR_PRIVATE_DATA_2A
1145     space = 2;
1146     size = -2;
1147     break;
1148 
1149     CASE_ITERATOR_PRIVATE_DATA_2B
1150     space = 2;
1151     size = -(2 + IMM2_SIZE);
1152     break;
1153 
1154     CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1155     space = 1;
1156     size = 1;
1157     break;
1158 
1159     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1160     if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1161       space = 2;
1162     size = 1;
1163     break;
1164 
1165     CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1166     if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1167       space = 2;
1168     size = 1 + IMM2_SIZE;
1169     break;
1170 
1171     case OP_CLASS:
1172     case OP_NCLASS:
1173     size += 1 + 32 / sizeof(pcre_uchar);
1174     space = get_class_iterator_size(cc + size);
1175     break;
1176 
1177 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1178     case OP_XCLASS:
1179     size = GET(cc, 1);
1180     space = get_class_iterator_size(cc + size);
1181     break;
1182 #endif
1183 
1184     default:
1185     cc = next_opcode(common, cc);
1186     SLJIT_ASSERT(cc != NULL);
1187     break;
1188     }
1189 
1190   /* Character iterators, which are not inside a repeated bracket,
1191      gets a private slot instead of allocating it on the stack. */
1192   if (space > 0 && cc >= end)
1193     {
1194     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1195     private_data_ptr += sizeof(sljit_sw) * space;
1196     }
1197 
1198   if (size != 0)
1199     {
1200     if (size < 0)
1201       {
1202       cc += -size;
1203 #ifdef SUPPORT_UTF
1204       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1205 #endif
1206       }
1207     else
1208       cc += size;
1209     }
1210 
1211   if (bracketlen > 0)
1212     {
1213     if (cc >= end)
1214       {
1215       end = bracketend(cc);
1216       if (end[-1 - LINK_SIZE] == OP_KET)
1217         end = NULL;
1218       }
1219     cc += bracketlen;
1220     }
1221   }
1222 *private_data_start = private_data_ptr;
1223 }
1224 
1225 /* Returns with a frame_types (always < 0) if no need for frame. */
get_framesize(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,BOOL recursive,BOOL * needs_control_head)1226 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL *needs_control_head)
1227 {
1228 int length = 0;
1229 int possessive = 0;
1230 BOOL stack_restore = FALSE;
1231 BOOL setsom_found = recursive;
1232 BOOL setmark_found = recursive;
1233 /* The last capture is a local variable even for recursions. */
1234 BOOL capture_last_found = FALSE;
1235 
1236 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1237 SLJIT_ASSERT(common->control_head_ptr != 0);
1238 *needs_control_head = TRUE;
1239 #else
1240 *needs_control_head = FALSE;
1241 #endif
1242 
1243 if (ccend == NULL)
1244   {
1245   ccend = bracketend(cc) - (1 + LINK_SIZE);
1246   if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1247     {
1248     possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1249     /* This is correct regardless of common->capture_last_ptr. */
1250     capture_last_found = TRUE;
1251     }
1252   cc = next_opcode(common, cc);
1253   }
1254 
1255 SLJIT_ASSERT(cc != NULL);
1256 while (cc < ccend)
1257   switch(*cc)
1258     {
1259     case OP_SET_SOM:
1260     SLJIT_ASSERT(common->has_set_som);
1261     stack_restore = TRUE;
1262     if (!setsom_found)
1263       {
1264       length += 2;
1265       setsom_found = TRUE;
1266       }
1267     cc += 1;
1268     break;
1269 
1270     case OP_MARK:
1271     case OP_PRUNE_ARG:
1272     case OP_THEN_ARG:
1273     SLJIT_ASSERT(common->mark_ptr != 0);
1274     stack_restore = TRUE;
1275     if (!setmark_found)
1276       {
1277       length += 2;
1278       setmark_found = TRUE;
1279       }
1280     if (common->control_head_ptr != 0)
1281       *needs_control_head = TRUE;
1282     cc += 1 + 2 + cc[1];
1283     break;
1284 
1285     case OP_RECURSE:
1286     stack_restore = TRUE;
1287     if (common->has_set_som && !setsom_found)
1288       {
1289       length += 2;
1290       setsom_found = TRUE;
1291       }
1292     if (common->mark_ptr != 0 && !setmark_found)
1293       {
1294       length += 2;
1295       setmark_found = TRUE;
1296       }
1297     if (common->capture_last_ptr != 0 && !capture_last_found)
1298       {
1299       length += 2;
1300       capture_last_found = TRUE;
1301       }
1302     cc += 1 + LINK_SIZE;
1303     break;
1304 
1305     case OP_CBRA:
1306     case OP_CBRAPOS:
1307     case OP_SCBRA:
1308     case OP_SCBRAPOS:
1309     stack_restore = TRUE;
1310     if (common->capture_last_ptr != 0 && !capture_last_found)
1311       {
1312       length += 2;
1313       capture_last_found = TRUE;
1314       }
1315     length += 3;
1316     cc += 1 + LINK_SIZE + IMM2_SIZE;
1317     break;
1318 
1319     default:
1320     stack_restore = TRUE;
1321     /* Fall through. */
1322 
1323     case OP_NOT_WORD_BOUNDARY:
1324     case OP_WORD_BOUNDARY:
1325     case OP_NOT_DIGIT:
1326     case OP_DIGIT:
1327     case OP_NOT_WHITESPACE:
1328     case OP_WHITESPACE:
1329     case OP_NOT_WORDCHAR:
1330     case OP_WORDCHAR:
1331     case OP_ANY:
1332     case OP_ALLANY:
1333     case OP_ANYBYTE:
1334     case OP_NOTPROP:
1335     case OP_PROP:
1336     case OP_ANYNL:
1337     case OP_NOT_HSPACE:
1338     case OP_HSPACE:
1339     case OP_NOT_VSPACE:
1340     case OP_VSPACE:
1341     case OP_EXTUNI:
1342     case OP_EODN:
1343     case OP_EOD:
1344     case OP_CIRC:
1345     case OP_CIRCM:
1346     case OP_DOLL:
1347     case OP_DOLLM:
1348     case OP_CHAR:
1349     case OP_CHARI:
1350     case OP_NOT:
1351     case OP_NOTI:
1352 
1353     case OP_EXACT:
1354     case OP_POSSTAR:
1355     case OP_POSPLUS:
1356     case OP_POSQUERY:
1357     case OP_POSUPTO:
1358 
1359     case OP_EXACTI:
1360     case OP_POSSTARI:
1361     case OP_POSPLUSI:
1362     case OP_POSQUERYI:
1363     case OP_POSUPTOI:
1364 
1365     case OP_NOTEXACT:
1366     case OP_NOTPOSSTAR:
1367     case OP_NOTPOSPLUS:
1368     case OP_NOTPOSQUERY:
1369     case OP_NOTPOSUPTO:
1370 
1371     case OP_NOTEXACTI:
1372     case OP_NOTPOSSTARI:
1373     case OP_NOTPOSPLUSI:
1374     case OP_NOTPOSQUERYI:
1375     case OP_NOTPOSUPTOI:
1376 
1377     case OP_TYPEEXACT:
1378     case OP_TYPEPOSSTAR:
1379     case OP_TYPEPOSPLUS:
1380     case OP_TYPEPOSQUERY:
1381     case OP_TYPEPOSUPTO:
1382 
1383     case OP_CLASS:
1384     case OP_NCLASS:
1385     case OP_XCLASS:
1386 
1387     cc = next_opcode(common, cc);
1388     SLJIT_ASSERT(cc != NULL);
1389     break;
1390     }
1391 
1392 /* Possessive quantifiers can use a special case. */
1393 if (SLJIT_UNLIKELY(possessive == length))
1394   return stack_restore ? no_frame : no_stack;
1395 
1396 if (length > 0)
1397   return length + 1;
1398 return stack_restore ? no_frame : no_stack;
1399 }
1400 
init_frame(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,int stackpos,int stacktop,BOOL recursive)1401 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1402 {
1403 DEFINE_COMPILER;
1404 BOOL setsom_found = recursive;
1405 BOOL setmark_found = recursive;
1406 /* The last capture is a local variable even for recursions. */
1407 BOOL capture_last_found = FALSE;
1408 int offset;
1409 
1410 /* >= 1 + shortest item size (2) */
1411 SLJIT_UNUSED_ARG(stacktop);
1412 SLJIT_ASSERT(stackpos >= stacktop + 2);
1413 
1414 stackpos = STACK(stackpos);
1415 if (ccend == NULL)
1416   {
1417   ccend = bracketend(cc) - (1 + LINK_SIZE);
1418   if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1419     cc = next_opcode(common, cc);
1420   }
1421 
1422 SLJIT_ASSERT(cc != NULL);
1423 while (cc < ccend)
1424   switch(*cc)
1425     {
1426     case OP_SET_SOM:
1427     SLJIT_ASSERT(common->has_set_som);
1428     if (!setsom_found)
1429       {
1430       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1431       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1432       stackpos += (int)sizeof(sljit_sw);
1433       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1434       stackpos += (int)sizeof(sljit_sw);
1435       setsom_found = TRUE;
1436       }
1437     cc += 1;
1438     break;
1439 
1440     case OP_MARK:
1441     case OP_PRUNE_ARG:
1442     case OP_THEN_ARG:
1443     SLJIT_ASSERT(common->mark_ptr != 0);
1444     if (!setmark_found)
1445       {
1446       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1447       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1448       stackpos += (int)sizeof(sljit_sw);
1449       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1450       stackpos += (int)sizeof(sljit_sw);
1451       setmark_found = TRUE;
1452       }
1453     cc += 1 + 2 + cc[1];
1454     break;
1455 
1456     case OP_RECURSE:
1457     if (common->has_set_som && !setsom_found)
1458       {
1459       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1460       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1461       stackpos += (int)sizeof(sljit_sw);
1462       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1463       stackpos += (int)sizeof(sljit_sw);
1464       setsom_found = TRUE;
1465       }
1466     if (common->mark_ptr != 0 && !setmark_found)
1467       {
1468       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1469       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1470       stackpos += (int)sizeof(sljit_sw);
1471       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1472       stackpos += (int)sizeof(sljit_sw);
1473       setmark_found = TRUE;
1474       }
1475     if (common->capture_last_ptr != 0 && !capture_last_found)
1476       {
1477       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1478       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1479       stackpos += (int)sizeof(sljit_sw);
1480       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1481       stackpos += (int)sizeof(sljit_sw);
1482       capture_last_found = TRUE;
1483       }
1484     cc += 1 + LINK_SIZE;
1485     break;
1486 
1487     case OP_CBRA:
1488     case OP_CBRAPOS:
1489     case OP_SCBRA:
1490     case OP_SCBRAPOS:
1491     if (common->capture_last_ptr != 0 && !capture_last_found)
1492       {
1493       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1494       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1495       stackpos += (int)sizeof(sljit_sw);
1496       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1497       stackpos += (int)sizeof(sljit_sw);
1498       capture_last_found = TRUE;
1499       }
1500     offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1501     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1502     stackpos += (int)sizeof(sljit_sw);
1503     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
1504     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
1505     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1506     stackpos += (int)sizeof(sljit_sw);
1507     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1508     stackpos += (int)sizeof(sljit_sw);
1509 
1510     cc += 1 + LINK_SIZE + IMM2_SIZE;
1511     break;
1512 
1513     default:
1514     cc = next_opcode(common, cc);
1515     SLJIT_ASSERT(cc != NULL);
1516     break;
1517     }
1518 
1519 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1520 SLJIT_ASSERT(stackpos == STACK(stacktop));
1521 }
1522 
get_private_data_copy_length(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,BOOL needs_control_head)1523 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1524 {
1525 int private_data_length = needs_control_head ? 3 : 2;
1526 int size;
1527 pcre_uchar *alternative;
1528 /* Calculate the sum of the private machine words. */
1529 while (cc < ccend)
1530   {
1531   size = 0;
1532   switch(*cc)
1533     {
1534     case OP_KET:
1535     if (PRIVATE_DATA(cc) != 0)
1536       {
1537       private_data_length++;
1538       SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1539       cc += PRIVATE_DATA(cc + 1);
1540       }
1541     cc += 1 + LINK_SIZE;
1542     break;
1543 
1544     case OP_ASSERT:
1545     case OP_ASSERT_NOT:
1546     case OP_ASSERTBACK:
1547     case OP_ASSERTBACK_NOT:
1548     case OP_ONCE:
1549     case OP_ONCE_NC:
1550     case OP_BRAPOS:
1551     case OP_SBRA:
1552     case OP_SBRAPOS:
1553     case OP_SCOND:
1554     private_data_length++;
1555     SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
1556     cc += 1 + LINK_SIZE;
1557     break;
1558 
1559     case OP_CBRA:
1560     case OP_SCBRA:
1561     if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1562       private_data_length++;
1563     cc += 1 + LINK_SIZE + IMM2_SIZE;
1564     break;
1565 
1566     case OP_CBRAPOS:
1567     case OP_SCBRAPOS:
1568     private_data_length += 2;
1569     cc += 1 + LINK_SIZE + IMM2_SIZE;
1570     break;
1571 
1572     case OP_COND:
1573     /* Might be a hidden SCOND. */
1574     alternative = cc + GET(cc, 1);
1575     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1576       private_data_length++;
1577     cc += 1 + LINK_SIZE;
1578     break;
1579 
1580     CASE_ITERATOR_PRIVATE_DATA_1
1581     if (PRIVATE_DATA(cc))
1582       private_data_length++;
1583     cc += 2;
1584 #ifdef SUPPORT_UTF
1585     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1586 #endif
1587     break;
1588 
1589     CASE_ITERATOR_PRIVATE_DATA_2A
1590     if (PRIVATE_DATA(cc))
1591       private_data_length += 2;
1592     cc += 2;
1593 #ifdef SUPPORT_UTF
1594     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1595 #endif
1596     break;
1597 
1598     CASE_ITERATOR_PRIVATE_DATA_2B
1599     if (PRIVATE_DATA(cc))
1600       private_data_length += 2;
1601     cc += 2 + IMM2_SIZE;
1602 #ifdef SUPPORT_UTF
1603     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1604 #endif
1605     break;
1606 
1607     CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1608     if (PRIVATE_DATA(cc))
1609       private_data_length++;
1610     cc += 1;
1611     break;
1612 
1613     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1614     if (PRIVATE_DATA(cc))
1615       private_data_length += 2;
1616     cc += 1;
1617     break;
1618 
1619     CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1620     if (PRIVATE_DATA(cc))
1621       private_data_length += 2;
1622     cc += 1 + IMM2_SIZE;
1623     break;
1624 
1625     case OP_CLASS:
1626     case OP_NCLASS:
1627 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1628     case OP_XCLASS:
1629     size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1630 #else
1631     size = 1 + 32 / (int)sizeof(pcre_uchar);
1632 #endif
1633     if (PRIVATE_DATA(cc))
1634       private_data_length += get_class_iterator_size(cc + size);
1635     cc += size;
1636     break;
1637 
1638     default:
1639     cc = next_opcode(common, cc);
1640     SLJIT_ASSERT(cc != NULL);
1641     break;
1642     }
1643   }
1644 SLJIT_ASSERT(cc == ccend);
1645 return private_data_length;
1646 }
1647 
copy_private_data(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,BOOL save,int stackptr,int stacktop,BOOL needs_control_head)1648 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1649   BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1650 {
1651 DEFINE_COMPILER;
1652 int srcw[2];
1653 int count, size;
1654 BOOL tmp1next = TRUE;
1655 BOOL tmp1empty = TRUE;
1656 BOOL tmp2empty = TRUE;
1657 pcre_uchar *alternative;
1658 enum {
1659   start,
1660   loop,
1661   end
1662 } status;
1663 
1664 status = save ? start : loop;
1665 stackptr = STACK(stackptr - 2);
1666 stacktop = STACK(stacktop - 1);
1667 
1668 if (!save)
1669   {
1670   stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1671   if (stackptr < stacktop)
1672     {
1673     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1674     stackptr += sizeof(sljit_sw);
1675     tmp1empty = FALSE;
1676     }
1677   if (stackptr < stacktop)
1678     {
1679     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1680     stackptr += sizeof(sljit_sw);
1681     tmp2empty = FALSE;
1682     }
1683   /* The tmp1next must be TRUE in either way. */
1684   }
1685 
1686 do
1687   {
1688   count = 0;
1689   switch(status)
1690     {
1691     case start:
1692     SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1693     count = 1;
1694     srcw[0] = common->recursive_head_ptr;
1695     if (needs_control_head)
1696       {
1697       SLJIT_ASSERT(common->control_head_ptr != 0);
1698       count = 2;
1699       srcw[1] = common->control_head_ptr;
1700       }
1701     status = loop;
1702     break;
1703 
1704     case loop:
1705     if (cc >= ccend)
1706       {
1707       status = end;
1708       break;
1709       }
1710 
1711     switch(*cc)
1712       {
1713       case OP_KET:
1714       if (PRIVATE_DATA(cc) != 0)
1715         {
1716         count = 1;
1717         srcw[0] = PRIVATE_DATA(cc);
1718         SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1719         cc += PRIVATE_DATA(cc + 1);
1720         }
1721       cc += 1 + LINK_SIZE;
1722       break;
1723 
1724       case OP_ASSERT:
1725       case OP_ASSERT_NOT:
1726       case OP_ASSERTBACK:
1727       case OP_ASSERTBACK_NOT:
1728       case OP_ONCE:
1729       case OP_ONCE_NC:
1730       case OP_BRAPOS:
1731       case OP_SBRA:
1732       case OP_SBRAPOS:
1733       case OP_SCOND:
1734       count = 1;
1735       srcw[0] = PRIVATE_DATA(cc);
1736       SLJIT_ASSERT(srcw[0] != 0);
1737       cc += 1 + LINK_SIZE;
1738       break;
1739 
1740       case OP_CBRA:
1741       case OP_SCBRA:
1742       if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1743         {
1744         count = 1;
1745         srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1746         }
1747       cc += 1 + LINK_SIZE + IMM2_SIZE;
1748       break;
1749 
1750       case OP_CBRAPOS:
1751       case OP_SCBRAPOS:
1752       count = 2;
1753       srcw[0] = PRIVATE_DATA(cc);
1754       srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1755       SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1756       cc += 1 + LINK_SIZE + IMM2_SIZE;
1757       break;
1758 
1759       case OP_COND:
1760       /* Might be a hidden SCOND. */
1761       alternative = cc + GET(cc, 1);
1762       if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1763         {
1764         count = 1;
1765         srcw[0] = PRIVATE_DATA(cc);
1766         SLJIT_ASSERT(srcw[0] != 0);
1767         }
1768       cc += 1 + LINK_SIZE;
1769       break;
1770 
1771       CASE_ITERATOR_PRIVATE_DATA_1
1772       if (PRIVATE_DATA(cc))
1773         {
1774         count = 1;
1775         srcw[0] = PRIVATE_DATA(cc);
1776         }
1777       cc += 2;
1778 #ifdef SUPPORT_UTF
1779       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1780 #endif
1781       break;
1782 
1783       CASE_ITERATOR_PRIVATE_DATA_2A
1784       if (PRIVATE_DATA(cc))
1785         {
1786         count = 2;
1787         srcw[0] = PRIVATE_DATA(cc);
1788         srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1789         }
1790       cc += 2;
1791 #ifdef SUPPORT_UTF
1792       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1793 #endif
1794       break;
1795 
1796       CASE_ITERATOR_PRIVATE_DATA_2B
1797       if (PRIVATE_DATA(cc))
1798         {
1799         count = 2;
1800         srcw[0] = PRIVATE_DATA(cc);
1801         srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1802         }
1803       cc += 2 + IMM2_SIZE;
1804 #ifdef SUPPORT_UTF
1805       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1806 #endif
1807       break;
1808 
1809       CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1810       if (PRIVATE_DATA(cc))
1811         {
1812         count = 1;
1813         srcw[0] = PRIVATE_DATA(cc);
1814         }
1815       cc += 1;
1816       break;
1817 
1818       CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1819       if (PRIVATE_DATA(cc))
1820         {
1821         count = 2;
1822         srcw[0] = PRIVATE_DATA(cc);
1823         srcw[1] = srcw[0] + sizeof(sljit_sw);
1824         }
1825       cc += 1;
1826       break;
1827 
1828       CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1829       if (PRIVATE_DATA(cc))
1830         {
1831         count = 2;
1832         srcw[0] = PRIVATE_DATA(cc);
1833         srcw[1] = srcw[0] + sizeof(sljit_sw);
1834         }
1835       cc += 1 + IMM2_SIZE;
1836       break;
1837 
1838       case OP_CLASS:
1839       case OP_NCLASS:
1840 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1841       case OP_XCLASS:
1842       size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1843 #else
1844       size = 1 + 32 / (int)sizeof(pcre_uchar);
1845 #endif
1846       if (PRIVATE_DATA(cc))
1847         switch(get_class_iterator_size(cc + size))
1848           {
1849           case 1:
1850           count = 1;
1851           srcw[0] = PRIVATE_DATA(cc);
1852           break;
1853 
1854           case 2:
1855           count = 2;
1856           srcw[0] = PRIVATE_DATA(cc);
1857           srcw[1] = srcw[0] + sizeof(sljit_sw);
1858           break;
1859 
1860           default:
1861           SLJIT_ASSERT_STOP();
1862           break;
1863           }
1864       cc += size;
1865       break;
1866 
1867       default:
1868       cc = next_opcode(common, cc);
1869       SLJIT_ASSERT(cc != NULL);
1870       break;
1871       }
1872     break;
1873 
1874     case end:
1875     SLJIT_ASSERT_STOP();
1876     break;
1877     }
1878 
1879   while (count > 0)
1880     {
1881     count--;
1882     if (save)
1883       {
1884       if (tmp1next)
1885         {
1886         if (!tmp1empty)
1887           {
1888           OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1889           stackptr += sizeof(sljit_sw);
1890           }
1891         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
1892         tmp1empty = FALSE;
1893         tmp1next = FALSE;
1894         }
1895       else
1896         {
1897         if (!tmp2empty)
1898           {
1899           OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1900           stackptr += sizeof(sljit_sw);
1901           }
1902         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
1903         tmp2empty = FALSE;
1904         tmp1next = TRUE;
1905         }
1906       }
1907     else
1908       {
1909       if (tmp1next)
1910         {
1911         SLJIT_ASSERT(!tmp1empty);
1912         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
1913         tmp1empty = stackptr >= stacktop;
1914         if (!tmp1empty)
1915           {
1916           OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1917           stackptr += sizeof(sljit_sw);
1918           }
1919         tmp1next = FALSE;
1920         }
1921       else
1922         {
1923         SLJIT_ASSERT(!tmp2empty);
1924         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
1925         tmp2empty = stackptr >= stacktop;
1926         if (!tmp2empty)
1927           {
1928           OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1929           stackptr += sizeof(sljit_sw);
1930           }
1931         tmp1next = TRUE;
1932         }
1933       }
1934     }
1935   }
1936 while (status != end);
1937 
1938 if (save)
1939   {
1940   if (tmp1next)
1941     {
1942     if (!tmp1empty)
1943       {
1944       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1945       stackptr += sizeof(sljit_sw);
1946       }
1947     if (!tmp2empty)
1948       {
1949       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1950       stackptr += sizeof(sljit_sw);
1951       }
1952     }
1953   else
1954     {
1955     if (!tmp2empty)
1956       {
1957       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1958       stackptr += sizeof(sljit_sw);
1959       }
1960     if (!tmp1empty)
1961       {
1962       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1963       stackptr += sizeof(sljit_sw);
1964       }
1965     }
1966   }
1967 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1968 }
1969 
set_then_offsets(compiler_common * common,pcre_uchar * cc,pcre_uint8 * current_offset)1970 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1971 {
1972 pcre_uchar *end = bracketend(cc);
1973 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1974 
1975 /* Assert captures then. */
1976 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1977   current_offset = NULL;
1978 /* Conditional block does not. */
1979 if (*cc == OP_COND || *cc == OP_SCOND)
1980   has_alternatives = FALSE;
1981 
1982 cc = next_opcode(common, cc);
1983 if (has_alternatives)
1984   current_offset = common->then_offsets + (cc - common->start);
1985 
1986 while (cc < end)
1987   {
1988   if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
1989     cc = set_then_offsets(common, cc, current_offset);
1990   else
1991     {
1992     if (*cc == OP_ALT && has_alternatives)
1993       current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
1994     if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
1995       *current_offset = 1;
1996     cc = next_opcode(common, cc);
1997     }
1998   }
1999 
2000 return end;
2001 }
2002 
2003 #undef CASE_ITERATOR_PRIVATE_DATA_1
2004 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2005 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2006 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2007 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2008 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2009 
is_powerof2(unsigned int value)2010 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2011 {
2012 return (value & (value - 1)) == 0;
2013 }
2014 
set_jumps(jump_list * list,struct sljit_label * label)2015 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2016 {
2017 while (list)
2018   {
2019   /* sljit_set_label is clever enough to do nothing
2020   if either the jump or the label is NULL. */
2021   SET_LABEL(list->jump, label);
2022   list = list->next;
2023   }
2024 }
2025 
add_jump(struct sljit_compiler * compiler,jump_list ** list,struct sljit_jump * jump)2026 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
2027 {
2028 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2029 if (list_item)
2030   {
2031   list_item->next = *list;
2032   list_item->jump = jump;
2033   *list = list_item;
2034   }
2035 }
2036 
add_stub(compiler_common * common,struct sljit_jump * start)2037 static void add_stub(compiler_common *common, struct sljit_jump *start)
2038 {
2039 DEFINE_COMPILER;
2040 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2041 
2042 if (list_item)
2043   {
2044   list_item->start = start;
2045   list_item->quit = LABEL();
2046   list_item->next = common->stubs;
2047   common->stubs = list_item;
2048   }
2049 }
2050 
flush_stubs(compiler_common * common)2051 static void flush_stubs(compiler_common *common)
2052 {
2053 DEFINE_COMPILER;
2054 stub_list *list_item = common->stubs;
2055 
2056 while (list_item)
2057   {
2058   JUMPHERE(list_item->start);
2059   add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2060   JUMPTO(SLJIT_JUMP, list_item->quit);
2061   list_item = list_item->next;
2062   }
2063 common->stubs = NULL;
2064 }
2065 
add_label_addr(compiler_common * common,sljit_uw * update_addr)2066 static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
2067 {
2068 DEFINE_COMPILER;
2069 label_addr_list *label_addr;
2070 
2071 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2072 if (label_addr == NULL)
2073   return;
2074 label_addr->label = LABEL();
2075 label_addr->update_addr = update_addr;
2076 label_addr->next = common->label_addrs;
2077 common->label_addrs = label_addr;
2078 }
2079 
count_match(compiler_common * common)2080 static SLJIT_INLINE void count_match(compiler_common *common)
2081 {
2082 DEFINE_COMPILER;
2083 
2084 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2085 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
2086 }
2087 
allocate_stack(compiler_common * common,int size)2088 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2089 {
2090 /* May destroy all locals and registers except TMP2. */
2091 DEFINE_COMPILER;
2092 
2093 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2094 #ifdef DESTROY_REGISTERS
2095 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2096 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2097 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2098 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
2099 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
2100 #endif
2101 add_stub(common, CMP(SLJIT_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2102 }
2103 
free_stack(compiler_common * common,int size)2104 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2105 {
2106 DEFINE_COMPILER;
2107 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2108 }
2109 
allocate_read_only_data(compiler_common * common,sljit_uw size)2110 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
2111 {
2112 DEFINE_COMPILER;
2113 sljit_uw *result;
2114 
2115 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
2116   return NULL;
2117 
2118 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
2119 if (SLJIT_UNLIKELY(result == NULL))
2120   {
2121   sljit_set_compiler_memory_error(compiler);
2122   return NULL;
2123   }
2124 
2125 *(void**)result = common->read_only_data_head;
2126 common->read_only_data_head = (void *)result;
2127 return result + 1;
2128 }
2129 
free_read_only_data(void * current,void * allocator_data)2130 static void free_read_only_data(void *current, void *allocator_data)
2131 {
2132 void *next;
2133 
2134 SLJIT_UNUSED_ARG(allocator_data);
2135 
2136 while (current != NULL)
2137   {
2138   next = *(void**)current;
2139   SLJIT_FREE(current, allocator_data);
2140   current = next;
2141   }
2142 }
2143 
reset_ovector(compiler_common * common,int length)2144 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2145 {
2146 DEFINE_COMPILER;
2147 struct sljit_label *loop;
2148 int i;
2149 
2150 /* At this point we can freely use all temporary registers. */
2151 SLJIT_ASSERT(length > 1);
2152 /* TMP1 returns with begin - 1. */
2153 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2154 if (length < 8)
2155   {
2156   for (i = 1; i < length; i++)
2157     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
2158   }
2159 else
2160   {
2161   GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
2162   OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2163   loop = LABEL();
2164   OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0);
2165   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2166   JUMPTO(SLJIT_NOT_ZERO, loop);
2167   }
2168 }
2169 
do_reset_match(compiler_common * common,int length)2170 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2171 {
2172 DEFINE_COMPILER;
2173 struct sljit_label *loop;
2174 int i;
2175 
2176 SLJIT_ASSERT(length > 1);
2177 /* OVECTOR(1) contains the "string begin - 1" constant. */
2178 if (length > 2)
2179   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2180 if (length < 8)
2181   {
2182   for (i = 2; i < length; i++)
2183     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
2184   }
2185 else
2186   {
2187   GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2188   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2189   loop = LABEL();
2190   OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2191   OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2192   JUMPTO(SLJIT_NOT_ZERO, loop);
2193   }
2194 
2195 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2196 if (common->mark_ptr != 0)
2197   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
2198 if (common->control_head_ptr != 0)
2199   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
2200 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2201 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2202 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2203 }
2204 
do_search_mark(sljit_sw * current,const pcre_uchar * skip_arg)2205 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2206 {
2207 while (current != NULL)
2208   {
2209   switch (current[-2])
2210     {
2211     case type_then_trap:
2212     break;
2213 
2214     case type_mark:
2215     if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2216       return current[-4];
2217     break;
2218 
2219     default:
2220     SLJIT_ASSERT_STOP();
2221     break;
2222     }
2223   current = (sljit_sw*)current[-1];
2224   }
2225 return -1;
2226 }
2227 
copy_ovector(compiler_common * common,int topbracket)2228 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2229 {
2230 DEFINE_COMPILER;
2231 struct sljit_label *loop;
2232 struct sljit_jump *early_quit;
2233 
2234 /* At this point we can freely use all registers. */
2235 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2236 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
2237 
2238 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
2239 if (common->mark_ptr != 0)
2240   OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2241 OP1(SLJIT_MOV_SI, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count));
2242 if (common->mark_ptr != 0)
2243   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
2244 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2245 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
2246 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START);
2247 /* Unlikely, but possible */
2248 early_quit = CMP(SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0);
2249 loop = LABEL();
2250 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0);
2251 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
2252 /* Copy the integer value to the output buffer */
2253 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2254 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2255 #endif
2256 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_R2), sizeof(int), SLJIT_S1, 0);
2257 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2258 JUMPTO(SLJIT_NOT_ZERO, loop);
2259 JUMPHERE(early_quit);
2260 
2261 /* Calculate the return value, which is the maximum ovector value. */
2262 if (topbracket > 1)
2263   {
2264   GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2265   OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2266 
2267   /* OVECTOR(0) is never equal to SLJIT_S2. */
2268   loop = LABEL();
2269   OP1(SLJIT_MOVU, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
2270   OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2271   CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2272   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
2273   }
2274 else
2275   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2276 }
2277 
return_with_partial_match(compiler_common * common,struct sljit_label * quit)2278 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2279 {
2280 DEFINE_COMPILER;
2281 struct sljit_jump *jump;
2282 
2283 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2);
2284 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2285   && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2286 
2287 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
2288 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2289 OP1(SLJIT_MOV_SI, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2290 CMPTO(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit);
2291 
2292 /* Store match begin and end. */
2293 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
2294 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets));
2295 
2296 jump = CMP(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3);
2297 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0);
2298 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2299 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2300 #endif
2301 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0);
2302 JUMPHERE(jump);
2303 
2304 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2305 OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0);
2306 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2307 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2308 #endif
2309 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0);
2310 
2311 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
2312 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2313 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2314 #endif
2315 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0);
2316 
2317 JUMPTO(SLJIT_JUMP, quit);
2318 }
2319 
check_start_used_ptr(compiler_common * common)2320 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2321 {
2322 /* May destroy TMP1. */
2323 DEFINE_COMPILER;
2324 struct sljit_jump *jump;
2325 
2326 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2327   {
2328   /* The value of -1 must be kept for start_used_ptr! */
2329   OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
2330   /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2331   is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2332   jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2333   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2334   JUMPHERE(jump);
2335   }
2336 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2337   {
2338   jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2339   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2340   JUMPHERE(jump);
2341   }
2342 }
2343 
char_has_othercase(compiler_common * common,pcre_uchar * cc)2344 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar *cc)
2345 {
2346 /* Detects if the character has an othercase. */
2347 unsigned int c;
2348 
2349 #ifdef SUPPORT_UTF
2350 if (common->utf)
2351   {
2352   GETCHAR(c, cc);
2353   if (c > 127)
2354     {
2355 #ifdef SUPPORT_UCP
2356     return c != UCD_OTHERCASE(c);
2357 #else
2358     return FALSE;
2359 #endif
2360     }
2361 #ifndef COMPILE_PCRE8
2362   return common->fcc[c] != c;
2363 #endif
2364   }
2365 else
2366 #endif
2367   c = *cc;
2368 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2369 }
2370 
char_othercase(compiler_common * common,unsigned int c)2371 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2372 {
2373 /* Returns with the othercase. */
2374 #ifdef SUPPORT_UTF
2375 if (common->utf && c > 127)
2376   {
2377 #ifdef SUPPORT_UCP
2378   return UCD_OTHERCASE(c);
2379 #else
2380   return c;
2381 #endif
2382   }
2383 #endif
2384 return TABLE_GET(c, common->fcc, c);
2385 }
2386 
char_get_othercase_bit(compiler_common * common,pcre_uchar * cc)2387 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar *cc)
2388 {
2389 /* Detects if the character and its othercase has only 1 bit difference. */
2390 unsigned int c, oc, bit;
2391 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2392 int n;
2393 #endif
2394 
2395 #ifdef SUPPORT_UTF
2396 if (common->utf)
2397   {
2398   GETCHAR(c, cc);
2399   if (c <= 127)
2400     oc = common->fcc[c];
2401   else
2402     {
2403 #ifdef SUPPORT_UCP
2404     oc = UCD_OTHERCASE(c);
2405 #else
2406     oc = c;
2407 #endif
2408     }
2409   }
2410 else
2411   {
2412   c = *cc;
2413   oc = TABLE_GET(c, common->fcc, c);
2414   }
2415 #else
2416 c = *cc;
2417 oc = TABLE_GET(c, common->fcc, c);
2418 #endif
2419 
2420 SLJIT_ASSERT(c != oc);
2421 
2422 bit = c ^ oc;
2423 /* Optimized for English alphabet. */
2424 if (c <= 127 && bit == 0x20)
2425   return (0 << 8) | 0x20;
2426 
2427 /* Since c != oc, they must have at least 1 bit difference. */
2428 if (!is_powerof2(bit))
2429   return 0;
2430 
2431 #if defined COMPILE_PCRE8
2432 
2433 #ifdef SUPPORT_UTF
2434 if (common->utf && c > 127)
2435   {
2436   n = GET_EXTRALEN(*cc);
2437   while ((bit & 0x3f) == 0)
2438     {
2439     n--;
2440     bit >>= 6;
2441     }
2442   return (n << 8) | bit;
2443   }
2444 #endif /* SUPPORT_UTF */
2445 return (0 << 8) | bit;
2446 
2447 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2448 
2449 #ifdef SUPPORT_UTF
2450 if (common->utf && c > 65535)
2451   {
2452   if (bit >= (1 << 10))
2453     bit >>= 10;
2454   else
2455     return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2456   }
2457 #endif /* SUPPORT_UTF */
2458 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2459 
2460 #endif /* COMPILE_PCRE[8|16|32] */
2461 }
2462 
check_partial(compiler_common * common,BOOL force)2463 static void check_partial(compiler_common *common, BOOL force)
2464 {
2465 /* Checks whether a partial matching is occurred. Does not modify registers. */
2466 DEFINE_COMPILER;
2467 struct sljit_jump *jump = NULL;
2468 
2469 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2470 
2471 if (common->mode == JIT_COMPILE)
2472   return;
2473 
2474 if (!force)
2475   jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2476 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2477   jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
2478 
2479 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2480   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2481 else
2482   {
2483   if (common->partialmatchlabel != NULL)
2484     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2485   else
2486     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2487   }
2488 
2489 if (jump != NULL)
2490   JUMPHERE(jump);
2491 }
2492 
check_str_end(compiler_common * common,jump_list ** end_reached)2493 static void check_str_end(compiler_common *common, jump_list **end_reached)
2494 {
2495 /* Does not affect registers. Usually used in a tight spot. */
2496 DEFINE_COMPILER;
2497 struct sljit_jump *jump;
2498 
2499 if (common->mode == JIT_COMPILE)
2500   {
2501   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2502   return;
2503   }
2504 
2505 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2506 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2507   {
2508   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2509   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2510   add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2511   }
2512 else
2513   {
2514   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2515   if (common->partialmatchlabel != NULL)
2516     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2517   else
2518     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2519   }
2520 JUMPHERE(jump);
2521 }
2522 
detect_partial_match(compiler_common * common,jump_list ** backtracks)2523 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2524 {
2525 DEFINE_COMPILER;
2526 struct sljit_jump *jump;
2527 
2528 if (common->mode == JIT_COMPILE)
2529   {
2530   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2531   return;
2532   }
2533 
2534 /* Partial matching mode. */
2535 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2536 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2537 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2538   {
2539   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2540   add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2541   }
2542 else
2543   {
2544   if (common->partialmatchlabel != NULL)
2545     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2546   else
2547     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2548   }
2549 JUMPHERE(jump);
2550 }
2551 
peek_char(compiler_common * common,pcre_uint32 max)2552 static void peek_char(compiler_common *common, pcre_uint32 max)
2553 {
2554 /* Reads the character into TMP1, keeps STR_PTR.
2555 Does not check STR_END. TMP2 Destroyed. */
2556 DEFINE_COMPILER;
2557 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2558 struct sljit_jump *jump;
2559 #endif
2560 
2561 SLJIT_UNUSED_ARG(max);
2562 
2563 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2564 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2565 if (common->utf)
2566   {
2567   if (max < 128) return;
2568 
2569   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2570   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2571   add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2572   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2573   JUMPHERE(jump);
2574   }
2575 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2576 
2577 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2578 if (common->utf)
2579   {
2580   if (max < 0xd800) return;
2581 
2582   OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2583   jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2584   /* TMP2 contains the high surrogate. */
2585   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2586   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2587   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2588   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2589   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2590   JUMPHERE(jump);
2591   }
2592 #endif
2593 }
2594 
2595 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2596 
is_char7_bitset(const pcre_uint8 * bitset,BOOL nclass)2597 static BOOL is_char7_bitset(const pcre_uint8 *bitset, BOOL nclass)
2598 {
2599 /* Tells whether the character codes below 128 are enough
2600 to determine a match. */
2601 const pcre_uint8 value = nclass ? 0xff : 0;
2602 const pcre_uint8 *end = bitset + 32;
2603 
2604 bitset += 16;
2605 do
2606   {
2607   if (*bitset++ != value)
2608     return FALSE;
2609   }
2610 while (bitset < end);
2611 return TRUE;
2612 }
2613 
read_char7_type(compiler_common * common,BOOL full_read)2614 static void read_char7_type(compiler_common *common, BOOL full_read)
2615 {
2616 /* Reads the precise character type of a character into TMP1, if the character
2617 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2618 full_read argument tells whether characters above max are accepted or not. */
2619 DEFINE_COMPILER;
2620 struct sljit_jump *jump;
2621 
2622 SLJIT_ASSERT(common->utf);
2623 
2624 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2625 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2626 
2627 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2628 
2629 if (full_read)
2630   {
2631   jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2632   OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2633   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2634   JUMPHERE(jump);
2635   }
2636 }
2637 
2638 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2639 
read_char_range(compiler_common * common,pcre_uint32 min,pcre_uint32 max,BOOL update_str_ptr)2640 static void read_char_range(compiler_common *common, pcre_uint32 min, pcre_uint32 max, BOOL update_str_ptr)
2641 {
2642 /* Reads the precise value of a character into TMP1, if the character is
2643 between min and max (c >= min && c <= max). Otherwise it returns with a value
2644 outside the range. Does not check STR_END. */
2645 DEFINE_COMPILER;
2646 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2647 struct sljit_jump *jump;
2648 #endif
2649 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2650 struct sljit_jump *jump2;
2651 #endif
2652 
2653 SLJIT_UNUSED_ARG(update_str_ptr);
2654 SLJIT_UNUSED_ARG(min);
2655 SLJIT_UNUSED_ARG(max);
2656 SLJIT_ASSERT(min <= max);
2657 
2658 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2659 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2660 
2661 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2662 if (common->utf)
2663   {
2664   if (max < 128 && !update_str_ptr) return;
2665 
2666   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2667   if (min >= 0x10000)
2668     {
2669     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2670     if (update_str_ptr)
2671       OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2672     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2673     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2674     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2675     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2676     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2677     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2678     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2679     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2680     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2681     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2682     if (!update_str_ptr)
2683       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2684     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2685     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2686     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2687     JUMPHERE(jump2);
2688     if (update_str_ptr)
2689       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2690     }
2691   else if (min >= 0x800 && max <= 0xffff)
2692     {
2693     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2694     if (update_str_ptr)
2695       OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2696     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2697     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2698     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2699     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2700     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2701     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2702     if (!update_str_ptr)
2703       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2704     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2705     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2706     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2707     JUMPHERE(jump2);
2708     if (update_str_ptr)
2709       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2710     }
2711   else if (max >= 0x800)
2712     add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2713   else if (max < 128)
2714     {
2715     OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2716     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2717     }
2718   else
2719     {
2720     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2721     if (!update_str_ptr)
2722       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2723     else
2724       OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2725     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2726     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2727     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2728     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2729     if (update_str_ptr)
2730       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2731     }
2732   JUMPHERE(jump);
2733   }
2734 #endif
2735 
2736 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2737 if (common->utf)
2738   {
2739   if (max >= 0x10000)
2740     {
2741     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2742     jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2743     /* TMP2 contains the high surrogate. */
2744     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2745     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2746     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2747     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2748     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2749     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2750     JUMPHERE(jump);
2751     return;
2752     }
2753 
2754   if (max < 0xd800 && !update_str_ptr) return;
2755 
2756   /* Skip low surrogate if necessary. */
2757   OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2758   jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2759   if (update_str_ptr)
2760     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2761   if (max >= 0xd800)
2762     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
2763   JUMPHERE(jump);
2764   }
2765 #endif
2766 }
2767 
read_char(compiler_common * common)2768 static SLJIT_INLINE void read_char(compiler_common *common)
2769 {
2770 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
2771 }
2772 
read_char8_type(compiler_common * common,BOOL update_str_ptr)2773 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
2774 {
2775 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2776 DEFINE_COMPILER;
2777 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2778 struct sljit_jump *jump;
2779 #endif
2780 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2781 struct sljit_jump *jump2;
2782 #endif
2783 
2784 SLJIT_UNUSED_ARG(update_str_ptr);
2785 
2786 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2787 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2788 
2789 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2790 if (common->utf)
2791   {
2792   /* This can be an extra read in some situations, but hopefully
2793   it is needed in most cases. */
2794   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2795   jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2796   if (!update_str_ptr)
2797     {
2798     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2799     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2800     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2801     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2802     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2803     OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2804     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2805     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
2806     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2807     JUMPHERE(jump2);
2808     }
2809   else
2810     add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2811   JUMPHERE(jump);
2812   return;
2813   }
2814 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2815 
2816 #if !defined COMPILE_PCRE8
2817 /* The ctypes array contains only 256 values. */
2818 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2819 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
2820 #endif
2821 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2822 #if !defined COMPILE_PCRE8
2823 JUMPHERE(jump);
2824 #endif
2825 
2826 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2827 if (common->utf && update_str_ptr)
2828   {
2829   /* Skip low surrogate if necessary. */
2830   OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2831   jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2832   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2833   JUMPHERE(jump);
2834   }
2835 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
2836 }
2837 
skip_char_back(compiler_common * common)2838 static void skip_char_back(compiler_common *common)
2839 {
2840 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2841 DEFINE_COMPILER;
2842 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2843 #if defined COMPILE_PCRE8
2844 struct sljit_label *label;
2845 
2846 if (common->utf)
2847   {
2848   label = LABEL();
2849   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2850   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2851   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2852   CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2853   return;
2854   }
2855 #elif defined COMPILE_PCRE16
2856 if (common->utf)
2857   {
2858   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2859   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2860   /* Skip low surrogate if necessary. */
2861   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2862   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2863   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
2864   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2865   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2866   return;
2867   }
2868 #endif /* COMPILE_PCRE[8|16] */
2869 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2870 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2871 }
2872 
check_newlinechar(compiler_common * common,int nltype,jump_list ** backtracks,BOOL jumpifmatch)2873 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
2874 {
2875 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2876 DEFINE_COMPILER;
2877 struct sljit_jump *jump;
2878 
2879 if (nltype == NLTYPE_ANY)
2880   {
2881   add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2882   add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
2883   }
2884 else if (nltype == NLTYPE_ANYCRLF)
2885   {
2886   if (jumpifmatch)
2887     {
2888     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
2889     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2890     }
2891   else
2892     {
2893     jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2894     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2895     JUMPHERE(jump);
2896     }
2897   }
2898 else
2899   {
2900   SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2901   add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2902   }
2903 }
2904 
2905 #ifdef SUPPORT_UTF
2906 
2907 #if defined COMPILE_PCRE8
do_utfreadchar(compiler_common * common)2908 static void do_utfreadchar(compiler_common *common)
2909 {
2910 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2911 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
2912 DEFINE_COMPILER;
2913 struct sljit_jump *jump;
2914 
2915 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2916 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2917 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2918 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2919 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2920 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2921 
2922 /* Searching for the first zero. */
2923 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2924 jump = JUMP(SLJIT_NOT_ZERO);
2925 /* Two byte sequence. */
2926 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2927 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2928 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2929 
2930 JUMPHERE(jump);
2931 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2932 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2933 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2934 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2935 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2936 
2937 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2938 jump = JUMP(SLJIT_NOT_ZERO);
2939 /* Three byte sequence. */
2940 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2941 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2942 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2943 
2944 /* Four byte sequence. */
2945 JUMPHERE(jump);
2946 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2947 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2948 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2949 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2950 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2951 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2952 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
2953 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2954 }
2955 
do_utfreadchar16(compiler_common * common)2956 static void do_utfreadchar16(compiler_common *common)
2957 {
2958 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2959 of the character (>= 0xc0). Return value in TMP1. */
2960 DEFINE_COMPILER;
2961 struct sljit_jump *jump;
2962 
2963 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2964 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2965 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2966 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2967 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2968 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2969 
2970 /* Searching for the first zero. */
2971 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2972 jump = JUMP(SLJIT_NOT_ZERO);
2973 /* Two byte sequence. */
2974 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2975 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2976 
2977 JUMPHERE(jump);
2978 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
2979 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_NOT_ZERO);
2980 /* This code runs only in 8 bit mode. No need to shift the value. */
2981 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2982 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2983 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2984 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2985 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2986 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2987 /* Three byte sequence. */
2988 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2989 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2990 }
2991 
do_utfreadtype8(compiler_common * common)2992 static void do_utfreadtype8(compiler_common *common)
2993 {
2994 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2995 of the character (>= 0xc0). Return value in TMP1. */
2996 DEFINE_COMPILER;
2997 struct sljit_jump *jump;
2998 struct sljit_jump *compare;
2999 
3000 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3001 
3002 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
3003 jump = JUMP(SLJIT_NOT_ZERO);
3004 /* Two byte sequence. */
3005 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3006 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3007 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
3008 /* The upper 5 bits are known at this point. */
3009 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
3010 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3011 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3012 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3013 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3014 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3015 
3016 JUMPHERE(compare);
3017 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3018 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3019 
3020 /* We only have types for characters less than 256. */
3021 JUMPHERE(jump);
3022 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3023 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3024 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3025 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3026 }
3027 
3028 #endif /* COMPILE_PCRE8 */
3029 
3030 #endif /* SUPPORT_UTF */
3031 
3032 #ifdef SUPPORT_UCP
3033 
3034 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3035 #define UCD_BLOCK_MASK 127
3036 #define UCD_BLOCK_SHIFT 7
3037 
do_getucd(compiler_common * common)3038 static void do_getucd(compiler_common *common)
3039 {
3040 /* Search the UCD record for the character comes in TMP1.
3041 Returns chartype in TMP1 and UCD offset in TMP2. */
3042 DEFINE_COMPILER;
3043 
3044 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3045 
3046 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3047 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3048 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3049 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3050 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3051 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3052 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3053 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3054 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3055 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3056 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3057 }
3058 #endif
3059 
mainloop_entry(compiler_common * common,BOOL hascrorlf,BOOL firstline)3060 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
3061 {
3062 DEFINE_COMPILER;
3063 struct sljit_label *mainloop;
3064 struct sljit_label *newlinelabel = NULL;
3065 struct sljit_jump *start;
3066 struct sljit_jump *end = NULL;
3067 struct sljit_jump *nl = NULL;
3068 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3069 struct sljit_jump *singlechar;
3070 #endif
3071 jump_list *newline = NULL;
3072 BOOL newlinecheck = FALSE;
3073 BOOL readuchar = FALSE;
3074 
3075 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
3076     common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3077   newlinecheck = TRUE;
3078 
3079 if (firstline)
3080   {
3081   /* Search for the end of the first line. */
3082   SLJIT_ASSERT(common->first_line_end != 0);
3083   OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3084 
3085   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3086     {
3087     mainloop = LABEL();
3088     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3089     end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3090     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3091     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3092     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3093     CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3094     JUMPHERE(end);
3095     OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3096     }
3097   else
3098     {
3099     end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3100     mainloop = LABEL();
3101     /* Continual stores does not cause data dependency. */
3102     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3103     read_char_range(common, common->nlmin, common->nlmax, TRUE);
3104     check_newlinechar(common, common->nltype, &newline, TRUE);
3105     CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3106     JUMPHERE(end);
3107     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3108     set_jumps(newline, LABEL());
3109     }
3110 
3111   OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3112   }
3113 
3114 start = JUMP(SLJIT_JUMP);
3115 
3116 if (newlinecheck)
3117   {
3118   newlinelabel = LABEL();
3119   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3120   end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3121   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3122   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3123   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3124 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3125   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3126 #endif
3127   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3128   nl = JUMP(SLJIT_JUMP);
3129   }
3130 
3131 mainloop = LABEL();
3132 
3133 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3134 #ifdef SUPPORT_UTF
3135 if (common->utf) readuchar = TRUE;
3136 #endif
3137 if (newlinecheck) readuchar = TRUE;
3138 
3139 if (readuchar)
3140   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3141 
3142 if (newlinecheck)
3143   CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3144 
3145 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3146 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3147 #if defined COMPILE_PCRE8
3148 if (common->utf)
3149   {
3150   singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3151   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3152   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3153   JUMPHERE(singlechar);
3154   }
3155 #elif defined COMPILE_PCRE16
3156 if (common->utf)
3157   {
3158   singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3159   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3160   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3161   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3162   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3163   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3164   JUMPHERE(singlechar);
3165   }
3166 #endif /* COMPILE_PCRE[8|16] */
3167 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3168 JUMPHERE(start);
3169 
3170 if (newlinecheck)
3171   {
3172   JUMPHERE(end);
3173   JUMPHERE(nl);
3174   }
3175 
3176 return mainloop;
3177 }
3178 
3179 #define MAX_N_CHARS 16
3180 #define MAX_N_BYTES 8
3181 
add_prefix_byte(pcre_uint8 byte,pcre_uint8 * bytes)3182 static SLJIT_INLINE void add_prefix_byte(pcre_uint8 byte, pcre_uint8 *bytes)
3183 {
3184 pcre_uint8 len = bytes[0];
3185 int i;
3186 
3187 if (len == 255)
3188   return;
3189 
3190 if (len == 0)
3191   {
3192   bytes[0] = 1;
3193   bytes[1] = byte;
3194   return;
3195   }
3196 
3197 for (i = len; i > 0; i--)
3198   if (bytes[i] == byte)
3199     return;
3200 
3201 if (len >= MAX_N_BYTES - 1)
3202   {
3203   bytes[0] = 255;
3204   return;
3205   }
3206 
3207 len++;
3208 bytes[len] = byte;
3209 bytes[0] = len;
3210 }
3211 
scan_prefix(compiler_common * common,pcre_uchar * cc,pcre_uint32 * chars,pcre_uint8 * bytes,int max_chars)3212 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, pcre_uint8 *bytes, int max_chars)
3213 {
3214 /* Recursive function, which scans prefix literals. */
3215 BOOL last, any, caseless;
3216 int len, repeat, len_save, consumed = 0;
3217 pcre_uint32 chr, mask;
3218 pcre_uchar *alternative, *cc_save, *oc;
3219 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3220 pcre_uchar othercase[8];
3221 #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3222 pcre_uchar othercase[2];
3223 #else
3224 pcre_uchar othercase[1];
3225 #endif
3226 
3227 repeat = 1;
3228 while (TRUE)
3229   {
3230   last = TRUE;
3231   any = FALSE;
3232   caseless = FALSE;
3233   switch (*cc)
3234     {
3235     case OP_CHARI:
3236     caseless = TRUE;
3237     case OP_CHAR:
3238     last = FALSE;
3239     cc++;
3240     break;
3241 
3242     case OP_SOD:
3243     case OP_SOM:
3244     case OP_SET_SOM:
3245     case OP_NOT_WORD_BOUNDARY:
3246     case OP_WORD_BOUNDARY:
3247     case OP_EODN:
3248     case OP_EOD:
3249     case OP_CIRC:
3250     case OP_CIRCM:
3251     case OP_DOLL:
3252     case OP_DOLLM:
3253     /* Zero width assertions. */
3254     cc++;
3255     continue;
3256 
3257     case OP_ASSERT:
3258     case OP_ASSERT_NOT:
3259     case OP_ASSERTBACK:
3260     case OP_ASSERTBACK_NOT:
3261     cc = bracketend(cc);
3262     continue;
3263 
3264     case OP_PLUSI:
3265     case OP_MINPLUSI:
3266     case OP_POSPLUSI:
3267     caseless = TRUE;
3268     case OP_PLUS:
3269     case OP_MINPLUS:
3270     case OP_POSPLUS:
3271     cc++;
3272     break;
3273 
3274     case OP_EXACTI:
3275     caseless = TRUE;
3276     case OP_EXACT:
3277     repeat = GET2(cc, 1);
3278     last = FALSE;
3279     cc += 1 + IMM2_SIZE;
3280     break;
3281 
3282     case OP_QUERYI:
3283     case OP_MINQUERYI:
3284     case OP_POSQUERYI:
3285     caseless = TRUE;
3286     case OP_QUERY:
3287     case OP_MINQUERY:
3288     case OP_POSQUERY:
3289     len = 1;
3290     cc++;
3291 #ifdef SUPPORT_UTF
3292     if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3293 #endif
3294     max_chars = scan_prefix(common, cc + len, chars, bytes, max_chars);
3295     if (max_chars == 0)
3296       return consumed;
3297     last = FALSE;
3298     break;
3299 
3300     case OP_KET:
3301     cc += 1 + LINK_SIZE;
3302     continue;
3303 
3304     case OP_ALT:
3305     cc += GET(cc, 1);
3306     continue;
3307 
3308     case OP_ONCE:
3309     case OP_ONCE_NC:
3310     case OP_BRA:
3311     case OP_BRAPOS:
3312     case OP_CBRA:
3313     case OP_CBRAPOS:
3314     alternative = cc + GET(cc, 1);
3315     while (*alternative == OP_ALT)
3316       {
3317       max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, bytes, max_chars);
3318       if (max_chars == 0)
3319         return consumed;
3320       alternative += GET(alternative, 1);
3321       }
3322 
3323     if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3324       cc += IMM2_SIZE;
3325     cc += 1 + LINK_SIZE;
3326     continue;
3327 
3328     case OP_CLASS:
3329 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3330     if (common->utf && !is_char7_bitset((const pcre_uint8 *)(cc + 1), FALSE)) return consumed;
3331 #endif
3332     any = TRUE;
3333     cc += 1 + 32 / sizeof(pcre_uchar);
3334     break;
3335 
3336     case OP_NCLASS:
3337 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3338     if (common->utf) return consumed;
3339 #endif
3340     any = TRUE;
3341     cc += 1 + 32 / sizeof(pcre_uchar);
3342     break;
3343 
3344 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3345     case OP_XCLASS:
3346 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3347     if (common->utf) return consumed;
3348 #endif
3349     any = TRUE;
3350     cc += GET(cc, 1);
3351     break;
3352 #endif
3353 
3354     case OP_DIGIT:
3355 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3356     if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
3357       return consumed;
3358 #endif
3359     any = TRUE;
3360     cc++;
3361     break;
3362 
3363     case OP_WHITESPACE:
3364 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3365     if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_space, FALSE))
3366       return consumed;
3367 #endif
3368     any = TRUE;
3369     cc++;
3370     break;
3371 
3372     case OP_WORDCHAR:
3373 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3374     if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_word, FALSE))
3375       return consumed;
3376 #endif
3377     any = TRUE;
3378     cc++;
3379     break;
3380 
3381     case OP_NOT:
3382     case OP_NOTI:
3383     cc++;
3384     /* Fall through. */
3385     case OP_NOT_DIGIT:
3386     case OP_NOT_WHITESPACE:
3387     case OP_NOT_WORDCHAR:
3388     case OP_ANY:
3389     case OP_ALLANY:
3390 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3391     if (common->utf) return consumed;
3392 #endif
3393     any = TRUE;
3394     cc++;
3395     break;
3396 
3397 #ifdef SUPPORT_UCP
3398     case OP_NOTPROP:
3399     case OP_PROP:
3400 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3401     if (common->utf) return consumed;
3402 #endif
3403     any = TRUE;
3404     cc += 1 + 2;
3405     break;
3406 #endif
3407 
3408     case OP_TYPEEXACT:
3409     repeat = GET2(cc, 1);
3410     cc += 1 + IMM2_SIZE;
3411     continue;
3412 
3413     case OP_NOTEXACT:
3414     case OP_NOTEXACTI:
3415 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3416     if (common->utf) return consumed;
3417 #endif
3418     any = TRUE;
3419     repeat = GET2(cc, 1);
3420     cc += 1 + IMM2_SIZE + 1;
3421     break;
3422 
3423     default:
3424     return consumed;
3425     }
3426 
3427   if (any)
3428     {
3429 #if defined COMPILE_PCRE8
3430     mask = 0xff;
3431 #elif defined COMPILE_PCRE16
3432     mask = 0xffff;
3433 #elif defined COMPILE_PCRE32
3434     mask = 0xffffffff;
3435 #else
3436     SLJIT_ASSERT_STOP();
3437 #endif
3438 
3439     do
3440       {
3441       chars[0] = mask;
3442       chars[1] = mask;
3443       bytes[0] = 255;
3444 
3445       consumed++;
3446       if (--max_chars == 0)
3447         return consumed;
3448       chars += 2;
3449       bytes += MAX_N_BYTES;
3450       }
3451     while (--repeat > 0);
3452 
3453     repeat = 1;
3454     continue;
3455     }
3456 
3457   len = 1;
3458 #ifdef SUPPORT_UTF
3459   if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3460 #endif
3461 
3462   if (caseless && char_has_othercase(common, cc))
3463     {
3464 #ifdef SUPPORT_UTF
3465     if (common->utf)
3466       {
3467       GETCHAR(chr, cc);
3468       if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3469         return consumed;
3470       }
3471     else
3472 #endif
3473       {
3474       chr = *cc;
3475       othercase[0] = TABLE_GET(chr, common->fcc, chr);
3476       }
3477     }
3478   else
3479     caseless = FALSE;
3480 
3481   len_save = len;
3482   cc_save = cc;
3483   while (TRUE)
3484     {
3485     oc = othercase;
3486     do
3487       {
3488       chr = *cc;
3489 #ifdef COMPILE_PCRE32
3490       if (SLJIT_UNLIKELY(chr == NOTACHAR))
3491         return consumed;
3492 #endif
3493       add_prefix_byte((pcre_uint8)chr, bytes);
3494 
3495       mask = 0;
3496       if (caseless)
3497         {
3498         add_prefix_byte((pcre_uint8)*oc, bytes);
3499         mask = *cc ^ *oc;
3500         chr |= mask;
3501         }
3502 
3503 #ifdef COMPILE_PCRE32
3504       if (chars[0] == NOTACHAR && chars[1] == 0)
3505 #else
3506       if (chars[0] == NOTACHAR)
3507 #endif
3508         {
3509         chars[0] = chr;
3510         chars[1] = mask;
3511         }
3512       else
3513         {
3514         mask |= chars[0] ^ chr;
3515         chr |= mask;
3516         chars[0] = chr;
3517         chars[1] |= mask;
3518         }
3519 
3520       len--;
3521       consumed++;
3522       if (--max_chars == 0)
3523         return consumed;
3524       chars += 2;
3525       bytes += MAX_N_BYTES;
3526       cc++;
3527       oc++;
3528       }
3529     while (len > 0);
3530 
3531     if (--repeat == 0)
3532       break;
3533 
3534     len = len_save;
3535     cc = cc_save;
3536     }
3537 
3538   repeat = 1;
3539   if (last)
3540     return consumed;
3541   }
3542 }
3543 
fast_forward_first_n_chars(compiler_common * common,BOOL firstline)3544 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
3545 {
3546 DEFINE_COMPILER;
3547 struct sljit_label *start;
3548 struct sljit_jump *quit;
3549 pcre_uint32 chars[MAX_N_CHARS * 2];
3550 pcre_uint8 bytes[MAX_N_CHARS * MAX_N_BYTES];
3551 pcre_uint8 ones[MAX_N_CHARS];
3552 int offsets[3];
3553 pcre_uint32 mask;
3554 pcre_uint8 *byte_set, *byte_set_end;
3555 int i, max, from;
3556 int range_right = -1, range_len = 3 - 1;
3557 sljit_ub *update_table = NULL;
3558 BOOL in_range;
3559 
3560 for (i = 0; i < MAX_N_CHARS; i++)
3561   {
3562   chars[i << 1] = NOTACHAR;
3563   chars[(i << 1) + 1] = 0;
3564   bytes[i * MAX_N_BYTES] = 0;
3565   }
3566 
3567 max = scan_prefix(common, common->start, chars, bytes, MAX_N_CHARS);
3568 
3569 if (max <= 1)
3570   return FALSE;
3571 
3572 for (i = 0; i < max; i++)
3573   {
3574   mask = chars[(i << 1) + 1];
3575   ones[i] = ones_in_half_byte[mask & 0xf];
3576   mask >>= 4;
3577   while (mask != 0)
3578     {
3579     ones[i] += ones_in_half_byte[mask & 0xf];
3580     mask >>= 4;
3581     }
3582   }
3583 
3584 in_range = FALSE;
3585 from = 0;   /* Prevent compiler "uninitialized" warning */
3586 for (i = 0; i <= max; i++)
3587   {
3588   if (in_range && (i - from) > range_len && (bytes[(i - 1) * MAX_N_BYTES] <= 4))
3589     {
3590     range_len = i - from;
3591     range_right = i - 1;
3592     }
3593 
3594   if (i < max && bytes[i * MAX_N_BYTES] < 255)
3595     {
3596     if (!in_range)
3597       {
3598       in_range = TRUE;
3599       from = i;
3600       }
3601     }
3602   else if (in_range)
3603     in_range = FALSE;
3604   }
3605 
3606 if (range_right >= 0)
3607   {
3608   update_table = (sljit_ub *)allocate_read_only_data(common, 256);
3609   if (update_table == NULL)
3610     return TRUE;
3611   memset(update_table, IN_UCHARS(range_len), 256);
3612 
3613   for (i = 0; i < range_len; i++)
3614     {
3615     byte_set = bytes + ((range_right - i) * MAX_N_BYTES);
3616     SLJIT_ASSERT(byte_set[0] > 0 && byte_set[0] < 255);
3617     byte_set_end = byte_set + byte_set[0];
3618     byte_set++;
3619     while (byte_set <= byte_set_end)
3620       {
3621       if (update_table[*byte_set] > IN_UCHARS(i))
3622         update_table[*byte_set] = IN_UCHARS(i);
3623       byte_set++;
3624       }
3625     }
3626   }
3627 
3628 offsets[0] = -1;
3629 /* Scan forward. */
3630 for (i = 0; i < max; i++)
3631   if (ones[i] <= 2) {
3632     offsets[0] = i;
3633     break;
3634   }
3635 
3636 if (offsets[0] < 0 && range_right < 0)
3637   return FALSE;
3638 
3639 if (offsets[0] >= 0)
3640   {
3641   /* Scan backward. */
3642   offsets[1] = -1;
3643   for (i = max - 1; i > offsets[0]; i--)
3644     if (ones[i] <= 2 && i != range_right)
3645       {
3646       offsets[1] = i;
3647       break;
3648       }
3649 
3650   /* This case is handled better by fast_forward_first_char. */
3651   if (offsets[1] == -1 && offsets[0] == 0 && range_right < 0)
3652     return FALSE;
3653 
3654   offsets[2] = -1;
3655   /* We only search for a middle character if there is no range check. */
3656   if (offsets[1] >= 0 && range_right == -1)
3657     {
3658     /* Scan from middle. */
3659     for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
3660       if (ones[i] <= 2)
3661         {
3662         offsets[2] = i;
3663         break;
3664         }
3665 
3666     if (offsets[2] == -1)
3667       {
3668       for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
3669         if (ones[i] <= 2)
3670           {
3671           offsets[2] = i;
3672           break;
3673           }
3674       }
3675     }
3676 
3677   SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
3678   SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
3679 
3680   chars[0] = chars[offsets[0] << 1];
3681   chars[1] = chars[(offsets[0] << 1) + 1];
3682   if (offsets[2] >= 0)
3683     {
3684     chars[2] = chars[offsets[2] << 1];
3685     chars[3] = chars[(offsets[2] << 1) + 1];
3686     }
3687   if (offsets[1] >= 0)
3688     {
3689     chars[4] = chars[offsets[1] << 1];
3690     chars[5] = chars[(offsets[1] << 1) + 1];
3691     }
3692   }
3693 
3694 max -= 1;
3695 if (firstline)
3696   {
3697   SLJIT_ASSERT(common->first_line_end != 0);
3698   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3699   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3700   OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3701   quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0);
3702   OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
3703   JUMPHERE(quit);
3704   }
3705 else
3706   OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3707 
3708 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3709 if (range_right >= 0)
3710   OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
3711 #endif
3712 
3713 start = LABEL();
3714 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3715 
3716 SLJIT_ASSERT(range_right >= 0 || offsets[0] >= 0);
3717 
3718 if (range_right >= 0)
3719   {
3720 #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
3721   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
3722 #else
3723   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
3724 #endif
3725 
3726 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3727   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
3728 #else
3729   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
3730 #endif
3731   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3732   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
3733   }
3734 
3735 if (offsets[0] >= 0)
3736   {
3737   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
3738   if (offsets[1] >= 0)
3739     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
3740   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3741 
3742   if (chars[1] != 0)
3743     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3744   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3745   if (offsets[2] >= 0)
3746     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
3747 
3748   if (offsets[1] >= 0)
3749     {
3750     if (chars[5] != 0)
3751       OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
3752     CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
3753     }
3754 
3755   if (offsets[2] >= 0)
3756     {
3757     if (chars[3] != 0)
3758       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
3759     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
3760     }
3761   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3762   }
3763 
3764 JUMPHERE(quit);
3765 
3766 if (firstline)
3767   {
3768   if (range_right >= 0)
3769     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3770   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3771   if (range_right >= 0)
3772     {
3773     quit = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3774     OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
3775     JUMPHERE(quit);
3776     }
3777   }
3778 else
3779   OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3780 return TRUE;
3781 }
3782 
3783 #undef MAX_N_CHARS
3784 #undef MAX_N_BYTES
3785 
fast_forward_first_char(compiler_common * common,pcre_uchar first_char,BOOL caseless,BOOL firstline)3786 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3787 {
3788 DEFINE_COMPILER;
3789 struct sljit_label *start;
3790 struct sljit_jump *quit;
3791 struct sljit_jump *found;
3792 pcre_uchar oc, bit;
3793 
3794 if (firstline)
3795   {
3796   SLJIT_ASSERT(common->first_line_end != 0);
3797   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3798   OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3799   }
3800 
3801 start = LABEL();
3802 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3803 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3804 
3805 oc = first_char;
3806 if (caseless)
3807   {
3808   oc = TABLE_GET(first_char, common->fcc, first_char);
3809 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3810   if (first_char > 127 && common->utf)
3811     oc = UCD_OTHERCASE(first_char);
3812 #endif
3813   }
3814 if (first_char == oc)
3815   found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3816 else
3817   {
3818   bit = first_char ^ oc;
3819   if (is_powerof2(bit))
3820     {
3821     OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3822     found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3823     }
3824   else
3825     {
3826     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3827     OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3828     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3829     OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
3830     found = JUMP(SLJIT_NOT_ZERO);
3831     }
3832   }
3833 
3834 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3835 JUMPTO(SLJIT_JUMP, start);
3836 JUMPHERE(found);
3837 JUMPHERE(quit);
3838 
3839 if (firstline)
3840   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3841 }
3842 
fast_forward_newline(compiler_common * common,BOOL firstline)3843 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3844 {
3845 DEFINE_COMPILER;
3846 struct sljit_label *loop;
3847 struct sljit_jump *lastchar;
3848 struct sljit_jump *firstchar;
3849 struct sljit_jump *quit;
3850 struct sljit_jump *foundcr = NULL;
3851 struct sljit_jump *notfoundnl;
3852 jump_list *newline = NULL;
3853 
3854 if (firstline)
3855   {
3856   SLJIT_ASSERT(common->first_line_end != 0);
3857   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3858   OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3859   }
3860 
3861 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3862   {
3863   lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3864   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3865   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3866   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3867   firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3868 
3869   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3870   OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3871   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER_EQUAL);
3872 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3873   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3874 #endif
3875   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3876 
3877   loop = LABEL();
3878   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3879   quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3880   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3881   OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3882   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3883   CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3884 
3885   JUMPHERE(quit);
3886   JUMPHERE(firstchar);
3887   JUMPHERE(lastchar);
3888 
3889   if (firstline)
3890     OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3891   return;
3892   }
3893 
3894 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3895 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3896 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3897 skip_char_back(common);
3898 
3899 loop = LABEL();
3900 common->ff_newline_shortcut = loop;
3901 
3902 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3903 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3904 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3905   foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3906 check_newlinechar(common, common->nltype, &newline, FALSE);
3907 set_jumps(newline, loop);
3908 
3909 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3910   {
3911   quit = JUMP(SLJIT_JUMP);
3912   JUMPHERE(foundcr);
3913   notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3914   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3915   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3916   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3917 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3918   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3919 #endif
3920   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3921   JUMPHERE(notfoundnl);
3922   JUMPHERE(quit);
3923   }
3924 JUMPHERE(lastchar);
3925 JUMPHERE(firstchar);
3926 
3927 if (firstline)
3928   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3929 }
3930 
3931 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
3932 
fast_forward_start_bits(compiler_common * common,pcre_uint8 * start_bits,BOOL firstline)3933 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
3934 {
3935 DEFINE_COMPILER;
3936 struct sljit_label *start;
3937 struct sljit_jump *quit;
3938 struct sljit_jump *found = NULL;
3939 jump_list *matches = NULL;
3940 #ifndef COMPILE_PCRE8
3941 struct sljit_jump *jump;
3942 #endif
3943 
3944 if (firstline)
3945   {
3946   SLJIT_ASSERT(common->first_line_end != 0);
3947   OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3948   OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3949   }
3950 
3951 start = LABEL();
3952 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3953 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3954 #ifdef SUPPORT_UTF
3955 if (common->utf)
3956   OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3957 #endif
3958 
3959 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
3960   {
3961 #ifndef COMPILE_PCRE8
3962   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 255);
3963   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3964   JUMPHERE(jump);
3965 #endif
3966   OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3967   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3968   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
3969   OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3970   OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3971   found = JUMP(SLJIT_NOT_ZERO);
3972   }
3973 
3974 #ifdef SUPPORT_UTF
3975 if (common->utf)
3976   OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3977 #endif
3978 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3979 #ifdef SUPPORT_UTF
3980 #if defined COMPILE_PCRE8
3981 if (common->utf)
3982   {
3983   CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3984   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3985   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3986   }
3987 #elif defined COMPILE_PCRE16
3988 if (common->utf)
3989   {
3990   CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3991   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3992   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3993   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3994   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3995   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3996   }
3997 #endif /* COMPILE_PCRE[8|16] */
3998 #endif /* SUPPORT_UTF */
3999 JUMPTO(SLJIT_JUMP, start);
4000 if (found != NULL)
4001   JUMPHERE(found);
4002 if (matches != NULL)
4003   set_jumps(matches, LABEL());
4004 JUMPHERE(quit);
4005 
4006 if (firstline)
4007   OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
4008 }
4009 
search_requested_char(compiler_common * common,pcre_uchar req_char,BOOL caseless,BOOL has_firstchar)4010 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
4011 {
4012 DEFINE_COMPILER;
4013 struct sljit_label *loop;
4014 struct sljit_jump *toolong;
4015 struct sljit_jump *alreadyfound;
4016 struct sljit_jump *found;
4017 struct sljit_jump *foundoc = NULL;
4018 struct sljit_jump *notfound;
4019 pcre_uint32 oc, bit;
4020 
4021 SLJIT_ASSERT(common->req_char_ptr != 0);
4022 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
4023 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
4024 toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
4025 alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4026 
4027 if (has_firstchar)
4028   OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4029 else
4030   OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
4031 
4032 loop = LABEL();
4033 notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
4034 
4035 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4036 oc = req_char;
4037 if (caseless)
4038   {
4039   oc = TABLE_GET(req_char, common->fcc, req_char);
4040 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
4041   if (req_char > 127 && common->utf)
4042     oc = UCD_OTHERCASE(req_char);
4043 #endif
4044   }
4045 if (req_char == oc)
4046   found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4047 else
4048   {
4049   bit = req_char ^ oc;
4050   if (is_powerof2(bit))
4051     {
4052     OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
4053     found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
4054     }
4055   else
4056     {
4057     found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4058     foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
4059     }
4060   }
4061 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4062 JUMPTO(SLJIT_JUMP, loop);
4063 
4064 JUMPHERE(found);
4065 if (foundoc)
4066   JUMPHERE(foundoc);
4067 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
4068 JUMPHERE(alreadyfound);
4069 JUMPHERE(toolong);
4070 return notfound;
4071 }
4072 
do_revertframes(compiler_common * common)4073 static void do_revertframes(compiler_common *common)
4074 {
4075 DEFINE_COMPILER;
4076 struct sljit_jump *jump;
4077 struct sljit_label *mainloop;
4078 
4079 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4080 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
4081 GET_LOCAL_BASE(TMP3, 0, 0);
4082 
4083 /* Drop frames until we reach STACK_TOP. */
4084 mainloop = LABEL();
4085 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4086 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
4087 jump = JUMP(SLJIT_SIG_LESS_EQUAL);
4088 
4089 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4090 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4091 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
4092 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
4093 JUMPTO(SLJIT_JUMP, mainloop);
4094 
4095 JUMPHERE(jump);
4096 jump = JUMP(SLJIT_SIG_LESS);
4097 /* End of dropping frames. */
4098 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4099 
4100 JUMPHERE(jump);
4101 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
4102 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4103 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4104 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
4105 JUMPTO(SLJIT_JUMP, mainloop);
4106 }
4107 
check_wordboundary(compiler_common * common)4108 static void check_wordboundary(compiler_common *common)
4109 {
4110 DEFINE_COMPILER;
4111 struct sljit_jump *skipread;
4112 jump_list *skipread_list = NULL;
4113 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
4114 struct sljit_jump *jump;
4115 #endif
4116 
4117 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
4118 
4119 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4120 /* Get type of the previous char, and put it to LOCALS1. */
4121 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4122 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4123 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0);
4124 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4125 skip_char_back(common);
4126 check_start_used_ptr(common);
4127 read_char(common);
4128 
4129 /* Testing char type. */
4130 #ifdef SUPPORT_UCP
4131 if (common->use_ucp)
4132   {
4133   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4134   jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4135   add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4136   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4137   OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4138   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4139   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4140   OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4141   OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4142   JUMPHERE(jump);
4143   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
4144   }
4145 else
4146 #endif
4147   {
4148 #ifndef COMPILE_PCRE8
4149   jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4150 #elif defined SUPPORT_UTF
4151   /* Here LOCALS1 has already been zeroed. */
4152   jump = NULL;
4153   if (common->utf)
4154     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4155 #endif /* COMPILE_PCRE8 */
4156   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
4157   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
4158   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4159   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
4160 #ifndef COMPILE_PCRE8
4161   JUMPHERE(jump);
4162 #elif defined SUPPORT_UTF
4163   if (jump != NULL)
4164     JUMPHERE(jump);
4165 #endif /* COMPILE_PCRE8 */
4166   }
4167 JUMPHERE(skipread);
4168 
4169 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4170 check_str_end(common, &skipread_list);
4171 peek_char(common, READ_CHAR_MAX);
4172 
4173 /* Testing char type. This is a code duplication. */
4174 #ifdef SUPPORT_UCP
4175 if (common->use_ucp)
4176   {
4177   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4178   jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4179   add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4180   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4181   OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4182   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4183   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4184   OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4185   OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4186   JUMPHERE(jump);
4187   }
4188 else
4189 #endif
4190   {
4191 #ifndef COMPILE_PCRE8
4192   /* TMP2 may be destroyed by peek_char. */
4193   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4194   jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4195 #elif defined SUPPORT_UTF
4196   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4197   jump = NULL;
4198   if (common->utf)
4199     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4200 #endif
4201   OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
4202   OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
4203   OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4204 #ifndef COMPILE_PCRE8
4205   JUMPHERE(jump);
4206 #elif defined SUPPORT_UTF
4207   if (jump != NULL)
4208     JUMPHERE(jump);
4209 #endif /* COMPILE_PCRE8 */
4210   }
4211 set_jumps(skipread_list, LABEL());
4212 
4213 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4214 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4215 }
4216 
check_class_ranges(compiler_common * common,const pcre_uint8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)4217 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
4218 {
4219 DEFINE_COMPILER;
4220 int ranges[MAX_RANGE_SIZE];
4221 pcre_uint8 bit, cbit, all;
4222 int i, byte, length = 0;
4223 
4224 bit = bits[0] & 0x1;
4225 /* All bits will be zero or one (since bit is zero or one). */
4226 all = -bit;
4227 
4228 for (i = 0; i < 256; )
4229   {
4230   byte = i >> 3;
4231   if ((i & 0x7) == 0 && bits[byte] == all)
4232     i += 8;
4233   else
4234     {
4235     cbit = (bits[byte] >> (i & 0x7)) & 0x1;
4236     if (cbit != bit)
4237       {
4238       if (length >= MAX_RANGE_SIZE)
4239         return FALSE;
4240       ranges[length] = i;
4241       length++;
4242       bit = cbit;
4243       all = -cbit;
4244       }
4245     i++;
4246     }
4247   }
4248 
4249 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
4250   {
4251   if (length >= MAX_RANGE_SIZE)
4252     return FALSE;
4253   ranges[length] = 256;
4254   length++;
4255   }
4256 
4257 if (length < 0 || length > 4)
4258   return FALSE;
4259 
4260 bit = bits[0] & 0x1;
4261 if (invert) bit ^= 0x1;
4262 
4263 /* No character is accepted. */
4264 if (length == 0 && bit == 0)
4265   add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4266 
4267 switch(length)
4268   {
4269   case 0:
4270   /* When bit != 0, all characters are accepted. */
4271   return TRUE;
4272 
4273   case 1:
4274   add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4275   return TRUE;
4276 
4277   case 2:
4278   if (ranges[0] + 1 != ranges[1])
4279     {
4280     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4281     add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4282     }
4283   else
4284     add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4285   return TRUE;
4286 
4287   case 3:
4288   if (bit != 0)
4289     {
4290     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4291     if (ranges[0] + 1 != ranges[1])
4292       {
4293       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4294       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4295       }
4296     else
4297       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4298     return TRUE;
4299     }
4300 
4301   add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
4302   if (ranges[1] + 1 != ranges[2])
4303     {
4304     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
4305     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4306     }
4307   else
4308     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
4309   return TRUE;
4310 
4311   case 4:
4312   if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
4313       && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
4314       && is_powerof2(ranges[2] - ranges[0]))
4315     {
4316     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
4317     if (ranges[2] + 1 != ranges[3])
4318       {
4319       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
4320       add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4321       }
4322     else
4323       add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4324     return TRUE;
4325     }
4326 
4327   if (bit != 0)
4328     {
4329     i = 0;
4330     if (ranges[0] + 1 != ranges[1])
4331       {
4332       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4333       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4334       i = ranges[0];
4335       }
4336     else
4337       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4338 
4339     if (ranges[2] + 1 != ranges[3])
4340       {
4341       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
4342       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4343       }
4344     else
4345       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
4346     return TRUE;
4347     }
4348 
4349   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4350   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
4351   if (ranges[1] + 1 != ranges[2])
4352     {
4353     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
4354     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4355     }
4356   else
4357     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4358   return TRUE;
4359 
4360   default:
4361   SLJIT_ASSERT_STOP();
4362   return FALSE;
4363   }
4364 }
4365 
check_anynewline(compiler_common * common)4366 static void check_anynewline(compiler_common *common)
4367 {
4368 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4369 DEFINE_COMPILER;
4370 
4371 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4372 
4373 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4374 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4375 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4376 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4377 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4378 #ifdef COMPILE_PCRE8
4379 if (common->utf)
4380   {
4381 #endif
4382   OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4383   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4384   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4385 #ifdef COMPILE_PCRE8
4386   }
4387 #endif
4388 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4389 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4390 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4391 }
4392 
check_hspace(compiler_common * common)4393 static void check_hspace(compiler_common *common)
4394 {
4395 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4396 DEFINE_COMPILER;
4397 
4398 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4399 
4400 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
4401 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4402 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
4403 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4404 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
4405 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4406 #ifdef COMPILE_PCRE8
4407 if (common->utf)
4408   {
4409 #endif
4410   OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4411   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
4412   OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4413   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
4414   OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4415   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
4416   OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
4417   OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4418   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
4419   OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4420   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
4421   OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4422   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
4423 #ifdef COMPILE_PCRE8
4424   }
4425 #endif
4426 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4427 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4428 
4429 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4430 }
4431 
check_vspace(compiler_common * common)4432 static void check_vspace(compiler_common *common)
4433 {
4434 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4435 DEFINE_COMPILER;
4436 
4437 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4438 
4439 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4440 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4441 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4442 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4443 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4444 #ifdef COMPILE_PCRE8
4445 if (common->utf)
4446   {
4447 #endif
4448   OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4449   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4450   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4451 #ifdef COMPILE_PCRE8
4452   }
4453 #endif
4454 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4455 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4456 
4457 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4458 }
4459 
4460 #define CHAR1 STR_END
4461 #define CHAR2 STACK_TOP
4462 
do_casefulcmp(compiler_common * common)4463 static void do_casefulcmp(compiler_common *common)
4464 {
4465 DEFINE_COMPILER;
4466 struct sljit_jump *jump;
4467 struct sljit_label *label;
4468 
4469 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4470 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4471 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
4472 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR2, 0);
4473 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4474 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4475 
4476 label = LABEL();
4477 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4478 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4479 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4480 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4481 JUMPTO(SLJIT_NOT_ZERO, label);
4482 
4483 JUMPHERE(jump);
4484 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4485 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
4486 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4487 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4488 }
4489 
4490 #define LCC_TABLE STACK_LIMIT
4491 
do_caselesscmp(compiler_common * common)4492 static void do_caselesscmp(compiler_common *common)
4493 {
4494 DEFINE_COMPILER;
4495 struct sljit_jump *jump;
4496 struct sljit_label *label;
4497 
4498 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4499 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4500 
4501 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
4502 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR1, 0);
4503 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, CHAR2, 0);
4504 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
4505 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4506 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4507 
4508 label = LABEL();
4509 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4510 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4511 #ifndef COMPILE_PCRE8
4512 jump = CMP(SLJIT_GREATER, CHAR1, 0, SLJIT_IMM, 255);
4513 #endif
4514 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
4515 #ifndef COMPILE_PCRE8
4516 JUMPHERE(jump);
4517 jump = CMP(SLJIT_GREATER, CHAR2, 0, SLJIT_IMM, 255);
4518 #endif
4519 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
4520 #ifndef COMPILE_PCRE8
4521 JUMPHERE(jump);
4522 #endif
4523 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4524 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4525 JUMPTO(SLJIT_NOT_ZERO, label);
4526 
4527 JUMPHERE(jump);
4528 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4529 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
4530 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4531 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4532 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4533 }
4534 
4535 #undef LCC_TABLE
4536 #undef CHAR1
4537 #undef CHAR2
4538 
4539 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4540 
do_utf_caselesscmp(pcre_uchar * src1,jit_arguments * args,pcre_uchar * end1)4541 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
4542 {
4543 /* This function would be ineffective to do in JIT level. */
4544 pcre_uint32 c1, c2;
4545 const pcre_uchar *src2 = args->uchar_ptr;
4546 const pcre_uchar *end2 = args->end;
4547 const ucd_record *ur;
4548 const pcre_uint32 *pp;
4549 
4550 while (src1 < end1)
4551   {
4552   if (src2 >= end2)
4553     return (pcre_uchar*)1;
4554   GETCHARINC(c1, src1);
4555   GETCHARINC(c2, src2);
4556   ur = GET_UCD(c2);
4557   if (c1 != c2 && c1 != c2 + ur->other_case)
4558     {
4559     pp = PRIV(ucd_caseless_sets) + ur->caseset;
4560     for (;;)
4561       {
4562       if (c1 < *pp) return NULL;
4563       if (c1 == *pp++) break;
4564       }
4565     }
4566   }
4567 return src2;
4568 }
4569 
4570 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4571 
byte_sequence_compare(compiler_common * common,BOOL caseless,pcre_uchar * cc,compare_context * context,jump_list ** backtracks)4572 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
4573     compare_context *context, jump_list **backtracks)
4574 {
4575 DEFINE_COMPILER;
4576 unsigned int othercasebit = 0;
4577 pcre_uchar *othercasechar = NULL;
4578 #ifdef SUPPORT_UTF
4579 int utflength;
4580 #endif
4581 
4582 if (caseless && char_has_othercase(common, cc))
4583   {
4584   othercasebit = char_get_othercase_bit(common, cc);
4585   SLJIT_ASSERT(othercasebit);
4586   /* Extracting bit difference info. */
4587 #if defined COMPILE_PCRE8
4588   othercasechar = cc + (othercasebit >> 8);
4589   othercasebit &= 0xff;
4590 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4591   /* Note that this code only handles characters in the BMP. If there
4592   ever are characters outside the BMP whose othercase differs in only one
4593   bit from itself (there currently are none), this code will need to be
4594   revised for COMPILE_PCRE32. */
4595   othercasechar = cc + (othercasebit >> 9);
4596   if ((othercasebit & 0x100) != 0)
4597     othercasebit = (othercasebit & 0xff) << 8;
4598   else
4599     othercasebit &= 0xff;
4600 #endif /* COMPILE_PCRE[8|16|32] */
4601   }
4602 
4603 if (context->sourcereg == -1)
4604   {
4605 #if defined COMPILE_PCRE8
4606 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4607   if (context->length >= 4)
4608     OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4609   else if (context->length >= 2)
4610     OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4611   else
4612 #endif
4613     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4614 #elif defined COMPILE_PCRE16
4615 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4616   if (context->length >= 4)
4617     OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4618   else
4619 #endif
4620     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4621 #elif defined COMPILE_PCRE32
4622   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4623 #endif /* COMPILE_PCRE[8|16|32] */
4624   context->sourcereg = TMP2;
4625   }
4626 
4627 #ifdef SUPPORT_UTF
4628 utflength = 1;
4629 if (common->utf && HAS_EXTRALEN(*cc))
4630   utflength += GET_EXTRALEN(*cc);
4631 
4632 do
4633   {
4634 #endif
4635 
4636   context->length -= IN_UCHARS(1);
4637 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4638 
4639   /* Unaligned read is supported. */
4640   if (othercasebit != 0 && othercasechar == cc)
4641     {
4642     context->c.asuchars[context->ucharptr] = *cc | othercasebit;
4643     context->oc.asuchars[context->ucharptr] = othercasebit;
4644     }
4645   else
4646     {
4647     context->c.asuchars[context->ucharptr] = *cc;
4648     context->oc.asuchars[context->ucharptr] = 0;
4649     }
4650   context->ucharptr++;
4651 
4652 #if defined COMPILE_PCRE8
4653   if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
4654 #else
4655   if (context->ucharptr >= 2 || context->length == 0)
4656 #endif
4657     {
4658     if (context->length >= 4)
4659       OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4660     else if (context->length >= 2)
4661       OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4662 #if defined COMPILE_PCRE8
4663     else if (context->length >= 1)
4664       OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4665 #endif /* COMPILE_PCRE8 */
4666     context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4667 
4668     switch(context->ucharptr)
4669       {
4670       case 4 / sizeof(pcre_uchar):
4671       if (context->oc.asint != 0)
4672         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
4673       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
4674       break;
4675 
4676       case 2 / sizeof(pcre_uchar):
4677       if (context->oc.asushort != 0)
4678         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
4679       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
4680       break;
4681 
4682 #ifdef COMPILE_PCRE8
4683       case 1:
4684       if (context->oc.asbyte != 0)
4685         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
4686       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
4687       break;
4688 #endif
4689 
4690       default:
4691       SLJIT_ASSERT_STOP();
4692       break;
4693       }
4694     context->ucharptr = 0;
4695     }
4696 
4697 #else
4698 
4699   /* Unaligned read is unsupported or in 32 bit mode. */
4700   if (context->length >= 1)
4701     OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4702 
4703   context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4704 
4705   if (othercasebit != 0 && othercasechar == cc)
4706     {
4707     OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
4708     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
4709     }
4710   else
4711     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
4712 
4713 #endif
4714 
4715   cc++;
4716 #ifdef SUPPORT_UTF
4717   utflength--;
4718   }
4719 while (utflength > 0);
4720 #endif
4721 
4722 return cc;
4723 }
4724 
4725 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4726 
4727 #define SET_TYPE_OFFSET(value) \
4728   if ((value) != typeoffset) \
4729     { \
4730     if ((value) < typeoffset) \
4731       OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4732     else \
4733       OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4734     } \
4735   typeoffset = (value);
4736 
4737 #define SET_CHAR_OFFSET(value) \
4738   if ((value) != charoffset) \
4739     { \
4740     if ((value) < charoffset) \
4741       OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
4742     else \
4743       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
4744     } \
4745   charoffset = (value);
4746 
compile_xclass_matchingpath(compiler_common * common,pcre_uchar * cc,jump_list ** backtracks)4747 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4748 {
4749 DEFINE_COMPILER;
4750 jump_list *found = NULL;
4751 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
4752 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
4753 struct sljit_jump *jump = NULL;
4754 pcre_uchar *ccbegin;
4755 int compares, invertcmp, numberofcmps;
4756 #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4757 BOOL utf = common->utf;
4758 #endif
4759 
4760 #ifdef SUPPORT_UCP
4761 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4762 BOOL charsaved = FALSE;
4763 int typereg = TMP1, scriptreg = TMP1;
4764 const pcre_uint32 *other_cases;
4765 sljit_uw typeoffset;
4766 #endif
4767 
4768 /* Scanning the necessary info. */
4769 cc++;
4770 ccbegin = cc;
4771 compares = 0;
4772 if (cc[-1] & XCL_MAP)
4773   {
4774   min = 0;
4775   cc += 32 / sizeof(pcre_uchar);
4776   }
4777 
4778 while (*cc != XCL_END)
4779   {
4780   compares++;
4781   if (*cc == XCL_SINGLE)
4782     {
4783     cc ++;
4784     GETCHARINCTEST(c, cc);
4785     if (c > max) max = c;
4786     if (c < min) min = c;
4787 #ifdef SUPPORT_UCP
4788     needschar = TRUE;
4789 #endif
4790     }
4791   else if (*cc == XCL_RANGE)
4792     {
4793     cc ++;
4794     GETCHARINCTEST(c, cc);
4795     if (c < min) min = c;
4796     GETCHARINCTEST(c, cc);
4797     if (c > max) max = c;
4798 #ifdef SUPPORT_UCP
4799     needschar = TRUE;
4800 #endif
4801     }
4802 #ifdef SUPPORT_UCP
4803   else
4804     {
4805     SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4806     cc++;
4807     if (*cc == PT_CLIST)
4808       {
4809       other_cases = PRIV(ucd_caseless_sets) + cc[1];
4810       while (*other_cases != NOTACHAR)
4811         {
4812         if (*other_cases > max) max = *other_cases;
4813         if (*other_cases < min) min = *other_cases;
4814         other_cases++;
4815         }
4816       }
4817     else
4818       {
4819       max = READ_CHAR_MAX;
4820       min = 0;
4821       }
4822 
4823     switch(*cc)
4824       {
4825       case PT_ANY:
4826       break;
4827 
4828       case PT_LAMP:
4829       case PT_GC:
4830       case PT_PC:
4831       case PT_ALNUM:
4832       needstype = TRUE;
4833       break;
4834 
4835       case PT_SC:
4836       needsscript = TRUE;
4837       break;
4838 
4839       case PT_SPACE:
4840       case PT_PXSPACE:
4841       case PT_WORD:
4842       case PT_PXGRAPH:
4843       case PT_PXPRINT:
4844       case PT_PXPUNCT:
4845       needstype = TRUE;
4846       needschar = TRUE;
4847       break;
4848 
4849       case PT_CLIST:
4850       case PT_UCNC:
4851       needschar = TRUE;
4852       break;
4853 
4854       default:
4855       SLJIT_ASSERT_STOP();
4856       break;
4857       }
4858     cc += 2;
4859     }
4860 #endif
4861   }
4862 
4863 /* We are not necessary in utf mode even in 8 bit mode. */
4864 cc = ccbegin;
4865 detect_partial_match(common, backtracks);
4866 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
4867 
4868 if ((cc[-1] & XCL_HASPROP) == 0)
4869   {
4870   if ((cc[-1] & XCL_MAP) != 0)
4871     {
4872     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4873     if (!check_class_ranges(common, (const pcre_uint8 *)cc, (((const pcre_uint8 *)cc)[31] & 0x80) != 0, TRUE, &found))
4874       {
4875       OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4876       OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4877       OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4878       OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4879       OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4880       add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
4881       }
4882 
4883     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4884     JUMPHERE(jump);
4885 
4886     cc += 32 / sizeof(pcre_uchar);
4887     }
4888   else
4889     {
4890     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
4891     add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
4892     }
4893   }
4894 else if ((cc[-1] & XCL_MAP) != 0)
4895   {
4896   OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4897 #ifdef SUPPORT_UCP
4898   charsaved = TRUE;
4899 #endif
4900   if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
4901     {
4902 #ifdef COMPILE_PCRE8
4903     SLJIT_ASSERT(common->utf);
4904 #endif
4905     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4906 
4907     OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4908     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4909     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4910     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4911     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4912     add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
4913 
4914     JUMPHERE(jump);
4915     }
4916 
4917   OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4918   cc += 32 / sizeof(pcre_uchar);
4919   }
4920 
4921 #ifdef SUPPORT_UCP
4922 /* Simple register allocation. TMP1 is preferred if possible. */
4923 if (needstype || needsscript)
4924   {
4925   if (needschar && !charsaved)
4926     OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4927   add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4928   if (needschar)
4929     {
4930     if (needstype)
4931       {
4932       OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4933       typereg = RETURN_ADDR;
4934       }
4935 
4936     if (needsscript)
4937       scriptreg = TMP3;
4938     OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4939     }
4940   else if (needstype && needsscript)
4941     scriptreg = TMP3;
4942   /* In all other cases only one of them was specified, and that can goes to TMP1. */
4943 
4944   if (needsscript)
4945     {
4946     if (scriptreg == TMP1)
4947       {
4948       OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4949       OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4950       }
4951     else
4952       {
4953       OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4954       OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4955       OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4956       }
4957     }
4958   }
4959 #endif
4960 
4961 /* Generating code. */
4962 charoffset = 0;
4963 numberofcmps = 0;
4964 #ifdef SUPPORT_UCP
4965 typeoffset = 0;
4966 #endif
4967 
4968 while (*cc != XCL_END)
4969   {
4970   compares--;
4971   invertcmp = (compares == 0 && list != backtracks);
4972   jump = NULL;
4973 
4974   if (*cc == XCL_SINGLE)
4975     {
4976     cc ++;
4977     GETCHARINCTEST(c, cc);
4978 
4979     if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4980       {
4981       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4982       OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_EQUAL);
4983       numberofcmps++;
4984       }
4985     else if (numberofcmps > 0)
4986       {
4987       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4988       OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4989       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
4990       numberofcmps = 0;
4991       }
4992     else
4993       {
4994       jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4995       numberofcmps = 0;
4996       }
4997     }
4998   else if (*cc == XCL_RANGE)
4999     {
5000     cc ++;
5001     GETCHARINCTEST(c, cc);
5002     SET_CHAR_OFFSET(c);
5003     GETCHARINCTEST(c, cc);
5004 
5005     if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5006       {
5007       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5008       OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5009       numberofcmps++;
5010       }
5011     else if (numberofcmps > 0)
5012       {
5013       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5014       OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5015       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5016       numberofcmps = 0;
5017       }
5018     else
5019       {
5020       jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5021       numberofcmps = 0;
5022       }
5023     }
5024 #ifdef SUPPORT_UCP
5025   else
5026     {
5027     if (*cc == XCL_NOTPROP)
5028       invertcmp ^= 0x1;
5029     cc++;
5030     switch(*cc)
5031       {
5032       case PT_ANY:
5033       if (list != backtracks)
5034         {
5035         if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
5036           continue;
5037         }
5038       else if (cc[-1] == XCL_NOTPROP)
5039         continue;
5040       jump = JUMP(SLJIT_JUMP);
5041       break;
5042 
5043       case PT_LAMP:
5044       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
5045       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5046       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
5047       OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5048       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
5049       OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5050       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5051       break;
5052 
5053       case PT_GC:
5054       c = PRIV(ucp_typerange)[(int)cc[1] * 2];
5055       SET_TYPE_OFFSET(c);
5056       jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
5057       break;
5058 
5059       case PT_PC:
5060       jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
5061       break;
5062 
5063       case PT_SC:
5064       jump = CMP(SLJIT_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
5065       break;
5066 
5067       case PT_SPACE:
5068       case PT_PXSPACE:
5069       SET_CHAR_OFFSET(9);
5070       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
5071       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5072 
5073       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
5074       OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5075 
5076       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
5077       OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5078 
5079       SET_TYPE_OFFSET(ucp_Zl);
5080       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
5081       OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5082       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5083       break;
5084 
5085       case PT_WORD:
5086       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
5087       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5088       /* Fall through. */
5089 
5090       case PT_ALNUM:
5091       SET_TYPE_OFFSET(ucp_Ll);
5092       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
5093       OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5094       SET_TYPE_OFFSET(ucp_Nd);
5095       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
5096       OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5097       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5098       break;
5099 
5100       case PT_CLIST:
5101       other_cases = PRIV(ucd_caseless_sets) + cc[1];
5102 
5103       /* At least three characters are required.
5104          Otherwise this case would be handled by the normal code path. */
5105       SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
5106       SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
5107 
5108       /* Optimizing character pairs, if their difference is power of 2. */
5109       if (is_powerof2(other_cases[1] ^ other_cases[0]))
5110         {
5111         if (charoffset == 0)
5112           OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5113         else
5114           {
5115           OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5116           OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5117           }
5118         OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
5119         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5120         other_cases += 2;
5121         }
5122       else if (is_powerof2(other_cases[2] ^ other_cases[1]))
5123         {
5124         if (charoffset == 0)
5125           OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
5126         else
5127           {
5128           OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5129           OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5130           }
5131         OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
5132         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5133 
5134         OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
5135         OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5136 
5137         other_cases += 3;
5138         }
5139       else
5140         {
5141         OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5142         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5143         }
5144 
5145       while (*other_cases != NOTACHAR)
5146         {
5147         OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5148         OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5149         }
5150       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5151       break;
5152 
5153       case PT_UCNC:
5154       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
5155       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5156       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
5157       OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5158       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
5159       OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5160 
5161       SET_CHAR_OFFSET(0xa0);
5162       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
5163       OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5164       SET_CHAR_OFFSET(0);
5165       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
5166       OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_GREATER_EQUAL);
5167       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5168       break;
5169 
5170       case PT_PXGRAPH:
5171       /* C and Z groups are the farthest two groups. */
5172       SET_TYPE_OFFSET(ucp_Ll);
5173       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5174       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5175 
5176       jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5177 
5178       /* In case of ucp_Cf, we overwrite the result. */
5179       SET_CHAR_OFFSET(0x2066);
5180       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5181       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5182 
5183       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5184       OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5185 
5186       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
5187       OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5188 
5189       JUMPHERE(jump);
5190       jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5191       break;
5192 
5193       case PT_PXPRINT:
5194       /* C and Z groups are the farthest two groups. */
5195       SET_TYPE_OFFSET(ucp_Ll);
5196       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5197       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5198 
5199       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
5200       OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
5201 
5202       jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5203 
5204       /* In case of ucp_Cf, we overwrite the result. */
5205       SET_CHAR_OFFSET(0x2066);
5206       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5207       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5208 
5209       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5210       OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5211 
5212       JUMPHERE(jump);
5213       jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5214       break;
5215 
5216       case PT_PXPUNCT:
5217       SET_TYPE_OFFSET(ucp_Sc);
5218       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
5219       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5220 
5221       SET_CHAR_OFFSET(0);
5222       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff);
5223       OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5224 
5225       SET_TYPE_OFFSET(ucp_Pc);
5226       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
5227       OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5228       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5229       break;
5230       }
5231     cc += 2;
5232     }
5233 #endif
5234 
5235   if (jump != NULL)
5236     add_jump(compiler, compares > 0 ? list : backtracks, jump);
5237   }
5238 
5239 if (found != NULL)
5240   set_jumps(found, LABEL());
5241 }
5242 
5243 #undef SET_TYPE_OFFSET
5244 #undef SET_CHAR_OFFSET
5245 
5246 #endif
5247 
compile_char1_matchingpath(compiler_common * common,pcre_uchar type,pcre_uchar * cc,jump_list ** backtracks)5248 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
5249 {
5250 DEFINE_COMPILER;
5251 int length;
5252 unsigned int c, oc, bit;
5253 compare_context context;
5254 struct sljit_jump *jump[4];
5255 jump_list *end_list;
5256 #ifdef SUPPORT_UTF
5257 struct sljit_label *label;
5258 #ifdef SUPPORT_UCP
5259 pcre_uchar propdata[5];
5260 #endif
5261 #endif /* SUPPORT_UTF */
5262 
5263 switch(type)
5264   {
5265   case OP_SOD:
5266   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5267   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5268   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5269   return cc;
5270 
5271   case OP_SOM:
5272   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5273   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
5274   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5275   return cc;
5276 
5277   case OP_NOT_WORD_BOUNDARY:
5278   case OP_WORD_BOUNDARY:
5279   add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
5280   add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5281   return cc;
5282 
5283   case OP_NOT_DIGIT:
5284   case OP_DIGIT:
5285   /* Digits are usually 0-9, so it is worth to optimize them. */
5286   detect_partial_match(common, backtracks);
5287 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5288   if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
5289     read_char7_type(common, type == OP_NOT_DIGIT);
5290   else
5291 #endif
5292     read_char8_type(common, type == OP_NOT_DIGIT);
5293     /* Flip the starting bit in the negative case. */
5294   OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
5295   add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5296   return cc;
5297 
5298   case OP_NOT_WHITESPACE:
5299   case OP_WHITESPACE:
5300   detect_partial_match(common, backtracks);
5301 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5302   if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
5303     read_char7_type(common, type == OP_NOT_WHITESPACE);
5304   else
5305 #endif
5306     read_char8_type(common, type == OP_NOT_WHITESPACE);
5307   OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
5308   add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5309   return cc;
5310 
5311   case OP_NOT_WORDCHAR:
5312   case OP_WORDCHAR:
5313   detect_partial_match(common, backtracks);
5314 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5315   if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
5316     read_char7_type(common, type == OP_NOT_WORDCHAR);
5317   else
5318 #endif
5319     read_char8_type(common, type == OP_NOT_WORDCHAR);
5320   OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
5321   add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5322   return cc;
5323 
5324   case OP_ANY:
5325   detect_partial_match(common, backtracks);
5326   read_char_range(common, common->nlmin, common->nlmax, TRUE);
5327   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5328     {
5329     jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5330     end_list = NULL;
5331     if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5332       add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5333     else
5334       check_str_end(common, &end_list);
5335 
5336     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5337     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
5338     set_jumps(end_list, LABEL());
5339     JUMPHERE(jump[0]);
5340     }
5341   else
5342     check_newlinechar(common, common->nltype, backtracks, TRUE);
5343   return cc;
5344 
5345   case OP_ALLANY:
5346   detect_partial_match(common, backtracks);
5347 #ifdef SUPPORT_UTF
5348   if (common->utf)
5349     {
5350     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5351     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5352 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
5353 #if defined COMPILE_PCRE8
5354     jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5355     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5356     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5357 #elif defined COMPILE_PCRE16
5358     jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
5359     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
5360     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5361     OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5362     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5363     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5364 #endif
5365     JUMPHERE(jump[0]);
5366 #endif /* COMPILE_PCRE[8|16] */
5367     return cc;
5368     }
5369 #endif
5370   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5371   return cc;
5372 
5373   case OP_ANYBYTE:
5374   detect_partial_match(common, backtracks);
5375   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5376   return cc;
5377 
5378 #ifdef SUPPORT_UTF
5379 #ifdef SUPPORT_UCP
5380   case OP_NOTPROP:
5381   case OP_PROP:
5382   propdata[0] = XCL_HASPROP;
5383   propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
5384   propdata[2] = cc[0];
5385   propdata[3] = cc[1];
5386   propdata[4] = XCL_END;
5387   compile_xclass_matchingpath(common, propdata, backtracks);
5388   return cc + 2;
5389 #endif
5390 #endif
5391 
5392   case OP_ANYNL:
5393   detect_partial_match(common, backtracks);
5394   read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
5395   jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5396   /* We don't need to handle soft partial matching case. */
5397   end_list = NULL;
5398   if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5399     add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5400   else
5401     check_str_end(common, &end_list);
5402   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5403   jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5404   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5405   jump[2] = JUMP(SLJIT_JUMP);
5406   JUMPHERE(jump[0]);
5407   check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
5408   set_jumps(end_list, LABEL());
5409   JUMPHERE(jump[1]);
5410   JUMPHERE(jump[2]);
5411   return cc;
5412 
5413   case OP_NOT_HSPACE:
5414   case OP_HSPACE:
5415   detect_partial_match(common, backtracks);
5416   read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
5417   add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
5418   add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5419   return cc;
5420 
5421   case OP_NOT_VSPACE:
5422   case OP_VSPACE:
5423   detect_partial_match(common, backtracks);
5424   read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
5425   add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
5426   add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5427   return cc;
5428 
5429 #ifdef SUPPORT_UCP
5430   case OP_EXTUNI:
5431   detect_partial_match(common, backtracks);
5432   read_char(common);
5433   add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5434   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5435   /* Optimize register allocation: use a real register. */
5436   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
5437   OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5438 
5439   label = LABEL();
5440   jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5441   OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5442   read_char(common);
5443   add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5444   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5445   OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5446 
5447   OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
5448   OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
5449   OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
5450   OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5451   OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5452   JUMPTO(SLJIT_NOT_ZERO, label);
5453 
5454   OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5455   JUMPHERE(jump[0]);
5456   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5457 
5458   if (common->mode == JIT_PARTIAL_HARD_COMPILE)
5459     {
5460     jump[0] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
5461     /* Since we successfully read a char above, partial matching must occure. */
5462     check_partial(common, TRUE);
5463     JUMPHERE(jump[0]);
5464     }
5465   return cc;
5466 #endif
5467 
5468   case OP_EODN:
5469   /* Requires rather complex checks. */
5470   jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5471   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5472     {
5473     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5474     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5475     if (common->mode == JIT_COMPILE)
5476       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
5477     else
5478       {
5479       jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
5480       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5481       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS);
5482       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5483       OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
5484       add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
5485       check_partial(common, TRUE);
5486       add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5487       JUMPHERE(jump[1]);
5488       }
5489     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5490     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5491     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5492     }
5493   else if (common->nltype == NLTYPE_FIXED)
5494     {
5495     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5496     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5497     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
5498     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
5499     }
5500   else
5501     {
5502     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5503     jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5504     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5505     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5506     jump[2] = JUMP(SLJIT_GREATER);
5507     add_jump(compiler, backtracks, JUMP(SLJIT_LESS));
5508     /* Equal. */
5509     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5510     jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5511     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5512 
5513     JUMPHERE(jump[1]);
5514     if (common->nltype == NLTYPE_ANYCRLF)
5515       {
5516       OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5517       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
5518       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
5519       }
5520     else
5521       {
5522       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
5523       read_char_range(common, common->nlmin, common->nlmax, TRUE);
5524       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
5525       add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
5526       add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
5527       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
5528       }
5529     JUMPHERE(jump[2]);
5530     JUMPHERE(jump[3]);
5531     }
5532   JUMPHERE(jump[0]);
5533   check_partial(common, FALSE);
5534   return cc;
5535 
5536   case OP_EOD:
5537   add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
5538   check_partial(common, FALSE);
5539   return cc;
5540 
5541   case OP_CIRC:
5542   OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5543   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5544   add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
5545   OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5546   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5547   return cc;
5548 
5549   case OP_CIRCM:
5550   OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5551   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5552   jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
5553   OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5554   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5555   jump[0] = JUMP(SLJIT_JUMP);
5556   JUMPHERE(jump[1]);
5557 
5558   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5559   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5560     {
5561     OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5562     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
5563     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5564     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5565     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5566     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5567     }
5568   else
5569     {
5570     skip_char_back(common);
5571     read_char_range(common, common->nlmin, common->nlmax, TRUE);
5572     check_newlinechar(common, common->nltype, backtracks, FALSE);
5573     }
5574   JUMPHERE(jump[0]);
5575   return cc;
5576 
5577   case OP_DOLL:
5578   OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5579   OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5580   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5581 
5582   if (!common->endonly)
5583     compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
5584   else
5585     {
5586     add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
5587     check_partial(common, FALSE);
5588     }
5589   return cc;
5590 
5591   case OP_DOLLM:
5592   jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
5593   OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5594   OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5595   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5596   check_partial(common, FALSE);
5597   jump[0] = JUMP(SLJIT_JUMP);
5598   JUMPHERE(jump[1]);
5599 
5600   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5601     {
5602     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5603     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5604     if (common->mode == JIT_COMPILE)
5605       add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
5606     else
5607       {
5608       jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5609       /* STR_PTR = STR_END - IN_UCHARS(1) */
5610       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5611       check_partial(common, TRUE);
5612       add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5613       JUMPHERE(jump[1]);
5614       }
5615 
5616     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5617     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5618     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5619     }
5620   else
5621     {
5622     peek_char(common, common->nlmax);
5623     check_newlinechar(common, common->nltype, backtracks, FALSE);
5624     }
5625   JUMPHERE(jump[0]);
5626   return cc;
5627 
5628   case OP_CHAR:
5629   case OP_CHARI:
5630   length = 1;
5631 #ifdef SUPPORT_UTF
5632   if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
5633 #endif
5634   if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
5635     {
5636     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5637     add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
5638 
5639     context.length = IN_UCHARS(length);
5640     context.sourcereg = -1;
5641 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5642     context.ucharptr = 0;
5643 #endif
5644     return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
5645     }
5646 
5647   detect_partial_match(common, backtracks);
5648 #ifdef SUPPORT_UTF
5649   if (common->utf)
5650     {
5651     GETCHAR(c, cc);
5652     }
5653   else
5654 #endif
5655     c = *cc;
5656 
5657   if (type == OP_CHAR || !char_has_othercase(common, cc))
5658     {
5659     read_char_range(common, c, c, FALSE);
5660     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5661     return cc + length;
5662     }
5663   oc = char_othercase(common, c);
5664   read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
5665   bit = c ^ oc;
5666   if (is_powerof2(bit))
5667     {
5668     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5669     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5670     return cc + length;
5671     }
5672   jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
5673   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5674   JUMPHERE(jump[0]);
5675   return cc + length;
5676 
5677   case OP_NOT:
5678   case OP_NOTI:
5679   detect_partial_match(common, backtracks);
5680   length = 1;
5681 #ifdef SUPPORT_UTF
5682   if (common->utf)
5683     {
5684 #ifdef COMPILE_PCRE8
5685     c = *cc;
5686     if (c < 128)
5687       {
5688       OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5689       if (type == OP_NOT || !char_has_othercase(common, cc))
5690         add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5691       else
5692         {
5693         /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
5694         OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
5695         add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
5696         }
5697       /* Skip the variable-length character. */
5698       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5699       jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5700       OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5701       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5702       JUMPHERE(jump[0]);
5703       return cc + 1;
5704       }
5705     else
5706 #endif /* COMPILE_PCRE8 */
5707       {
5708       GETCHARLEN(c, cc, length);
5709       }
5710     }
5711   else
5712 #endif /* SUPPORT_UTF */
5713     c = *cc;
5714 
5715   if (type == OP_NOT || !char_has_othercase(common, cc))
5716     {
5717     read_char_range(common, c, c, TRUE);
5718     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5719     }
5720   else
5721     {
5722     oc = char_othercase(common, c);
5723     read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
5724     bit = c ^ oc;
5725     if (is_powerof2(bit))
5726       {
5727       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5728       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5729       }
5730     else
5731       {
5732       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5733       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5734       }
5735     }
5736   return cc + length;
5737 
5738   case OP_CLASS:
5739   case OP_NCLASS:
5740   detect_partial_match(common, backtracks);
5741 
5742 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5743   bit = (common->utf && is_char7_bitset((const pcre_uint8 *)cc, type == OP_NCLASS)) ? 127 : 255;
5744   read_char_range(common, 0, bit, type == OP_NCLASS);
5745 #else
5746   read_char_range(common, 0, 255, type == OP_NCLASS);
5747 #endif
5748 
5749   if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
5750     return cc + 32 / sizeof(pcre_uchar);
5751 
5752 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5753   jump[0] = NULL;
5754   if (common->utf)
5755     {
5756     jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
5757     if (type == OP_CLASS)
5758       {
5759       add_jump(compiler, backtracks, jump[0]);
5760       jump[0] = NULL;
5761       }
5762     }
5763 #elif !defined COMPILE_PCRE8
5764   jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5765   if (type == OP_CLASS)
5766     {
5767     add_jump(compiler, backtracks, jump[0]);
5768     jump[0] = NULL;
5769     }
5770 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
5771 
5772   OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5773   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5774   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5775   OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5776   OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5777   add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
5778 
5779 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5780   if (jump[0] != NULL)
5781     JUMPHERE(jump[0]);
5782 #endif
5783 
5784   return cc + 32 / sizeof(pcre_uchar);
5785 
5786 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5787   case OP_XCLASS:
5788   compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
5789   return cc + GET(cc, 0) - 1;
5790 #endif
5791 
5792   case OP_REVERSE:
5793   length = GET(cc, 0);
5794   if (length == 0)
5795     return cc + LINK_SIZE;
5796   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5797 #ifdef SUPPORT_UTF
5798   if (common->utf)
5799     {
5800     OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5801     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
5802     label = LABEL();
5803     add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
5804     skip_char_back(common);
5805     OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5806     JUMPTO(SLJIT_NOT_ZERO, label);
5807     }
5808   else
5809 #endif
5810     {
5811     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5812     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5813     add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
5814     }
5815   check_start_used_ptr(common);
5816   return cc + LINK_SIZE;
5817   }
5818 SLJIT_ASSERT_STOP();
5819 return cc;
5820 }
5821 
compile_charn_matchingpath(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,jump_list ** backtracks)5822 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
5823 {
5824 /* This function consumes at least one input character. */
5825 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
5826 DEFINE_COMPILER;
5827 pcre_uchar *ccbegin = cc;
5828 compare_context context;
5829 int size;
5830 
5831 context.length = 0;
5832 do
5833   {
5834   if (cc >= ccend)
5835     break;
5836 
5837   if (*cc == OP_CHAR)
5838     {
5839     size = 1;
5840 #ifdef SUPPORT_UTF
5841     if (common->utf && HAS_EXTRALEN(cc[1]))
5842       size += GET_EXTRALEN(cc[1]);
5843 #endif
5844     }
5845   else if (*cc == OP_CHARI)
5846     {
5847     size = 1;
5848 #ifdef SUPPORT_UTF
5849     if (common->utf)
5850       {
5851       if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5852         size = 0;
5853       else if (HAS_EXTRALEN(cc[1]))
5854         size += GET_EXTRALEN(cc[1]);
5855       }
5856     else
5857 #endif
5858     if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5859       size = 0;
5860     }
5861   else
5862     size = 0;
5863 
5864   cc += 1 + size;
5865   context.length += IN_UCHARS(size);
5866   }
5867 while (size > 0 && context.length <= 128);
5868 
5869 cc = ccbegin;
5870 if (context.length > 0)
5871   {
5872   /* We have a fixed-length byte sequence. */
5873   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5874   add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
5875 
5876   context.sourcereg = -1;
5877 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5878   context.ucharptr = 0;
5879 #endif
5880   do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5881   return cc;
5882   }
5883 
5884 /* A non-fixed length character will be checked if length == 0. */
5885 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5886 }
5887 
5888 /* Forward definitions. */
5889 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5890 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5891 
5892 #define PUSH_BACKTRACK(size, ccstart, error) \
5893   do \
5894     { \
5895     backtrack = sljit_alloc_memory(compiler, (size)); \
5896     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5897       return error; \
5898     memset(backtrack, 0, size); \
5899     backtrack->prev = parent->top; \
5900     backtrack->cc = (ccstart); \
5901     parent->top = backtrack; \
5902     } \
5903   while (0)
5904 
5905 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5906   do \
5907     { \
5908     backtrack = sljit_alloc_memory(compiler, (size)); \
5909     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5910       return; \
5911     memset(backtrack, 0, size); \
5912     backtrack->prev = parent->top; \
5913     backtrack->cc = (ccstart); \
5914     parent->top = backtrack; \
5915     } \
5916   while (0)
5917 
5918 #define BACKTRACK_AS(type) ((type *)backtrack)
5919 
compile_dnref_search(compiler_common * common,pcre_uchar * cc,jump_list ** backtracks)5920 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5921 {
5922 /* The OVECTOR offset goes to TMP2. */
5923 DEFINE_COMPILER;
5924 int count = GET2(cc, 1 + IMM2_SIZE);
5925 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
5926 unsigned int offset;
5927 jump_list *found = NULL;
5928 
5929 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
5930 
5931 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
5932 
5933 count--;
5934 while (count-- > 0)
5935   {
5936   offset = GET2(slot, 0) << 1;
5937   GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5938   add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
5939   slot += common->name_entry_size;
5940   }
5941 
5942 offset = GET2(slot, 0) << 1;
5943 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5944 if (backtracks != NULL && !common->jscript_compat)
5945   add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
5946 
5947 set_jumps(found, LABEL());
5948 }
5949 
compile_ref_matchingpath(compiler_common * common,pcre_uchar * cc,jump_list ** backtracks,BOOL withchecks,BOOL emptyfail)5950 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5951 {
5952 DEFINE_COMPILER;
5953 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5954 int offset = 0;
5955 struct sljit_jump *jump = NULL;
5956 struct sljit_jump *partial;
5957 struct sljit_jump *nopartial;
5958 
5959 if (ref)
5960   {
5961   offset = GET2(cc, 1) << 1;
5962   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
5963   /* OVECTOR(1) contains the "string begin - 1" constant. */
5964   if (withchecks && !common->jscript_compat)
5965     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
5966   }
5967 else
5968   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5969 
5970 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5971 if (common->utf && *cc == OP_REFI)
5972   {
5973   SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1 && TMP2 == SLJIT_R2);
5974   if (ref)
5975     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
5976   else
5977     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5978 
5979   if (withchecks)
5980     jump = CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0);
5981 
5982   /* Needed to save important temporary registers. */
5983   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
5984   OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
5985   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
5986   sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
5987   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5988   if (common->mode == JIT_COMPILE)
5989     add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
5990   else
5991     {
5992     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
5993     nopartial = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
5994     check_partial(common, FALSE);
5995     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5996     JUMPHERE(nopartial);
5997     }
5998   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
5999   }
6000 else
6001 #endif /* SUPPORT_UTF && SUPPORT_UCP */
6002   {
6003   if (ref)
6004     OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
6005   else
6006     OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
6007 
6008   if (withchecks)
6009     jump = JUMP(SLJIT_ZERO);
6010 
6011   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6012   partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
6013   if (common->mode == JIT_COMPILE)
6014     add_jump(compiler, backtracks, partial);
6015 
6016   add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6017   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6018 
6019   if (common->mode != JIT_COMPILE)
6020     {
6021     nopartial = JUMP(SLJIT_JUMP);
6022     JUMPHERE(partial);
6023     /* TMP2 -= STR_END - STR_PTR */
6024     OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
6025     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
6026     partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6027     OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
6028     add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6029     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6030     JUMPHERE(partial);
6031     check_partial(common, FALSE);
6032     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6033     JUMPHERE(nopartial);
6034     }
6035   }
6036 
6037 if (jump != NULL)
6038   {
6039   if (emptyfail)
6040     add_jump(compiler, backtracks, jump);
6041   else
6042     JUMPHERE(jump);
6043   }
6044 }
6045 
compile_ref_iterator_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)6046 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6047 {
6048 DEFINE_COMPILER;
6049 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6050 backtrack_common *backtrack;
6051 pcre_uchar type;
6052 int offset = 0;
6053 struct sljit_label *label;
6054 struct sljit_jump *zerolength;
6055 struct sljit_jump *jump = NULL;
6056 pcre_uchar *ccbegin = cc;
6057 int min = 0, max = 0;
6058 BOOL minimize;
6059 
6060 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
6061 
6062 if (ref)
6063   offset = GET2(cc, 1) << 1;
6064 else
6065   cc += IMM2_SIZE;
6066 type = cc[1 + IMM2_SIZE];
6067 
6068 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
6069 minimize = (type & 0x1) != 0;
6070 switch(type)
6071   {
6072   case OP_CRSTAR:
6073   case OP_CRMINSTAR:
6074   min = 0;
6075   max = 0;
6076   cc += 1 + IMM2_SIZE + 1;
6077   break;
6078   case OP_CRPLUS:
6079   case OP_CRMINPLUS:
6080   min = 1;
6081   max = 0;
6082   cc += 1 + IMM2_SIZE + 1;
6083   break;
6084   case OP_CRQUERY:
6085   case OP_CRMINQUERY:
6086   min = 0;
6087   max = 1;
6088   cc += 1 + IMM2_SIZE + 1;
6089   break;
6090   case OP_CRRANGE:
6091   case OP_CRMINRANGE:
6092   min = GET2(cc, 1 + IMM2_SIZE + 1);
6093   max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
6094   cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
6095   break;
6096   default:
6097   SLJIT_ASSERT_STOP();
6098   break;
6099   }
6100 
6101 if (!minimize)
6102   {
6103   if (min == 0)
6104     {
6105     allocate_stack(common, 2);
6106     if (ref)
6107       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6108     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6109     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6110     /* Temporary release of STR_PTR. */
6111     OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6112     /* Handles both invalid and empty cases. Since the minimum repeat,
6113     is zero the invalid case is basically the same as an empty case. */
6114     if (ref)
6115       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6116     else
6117       {
6118       compile_dnref_search(common, ccbegin, NULL);
6119       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6120       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6121       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6122       }
6123     /* Restore if not zero length. */
6124     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6125     }
6126   else
6127     {
6128     allocate_stack(common, 1);
6129     if (ref)
6130       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6131     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6132     if (ref)
6133       {
6134       add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6135       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6136       }
6137     else
6138       {
6139       compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6140       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6141       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6142       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6143       }
6144     }
6145 
6146   if (min > 1 || max > 1)
6147     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
6148 
6149   label = LABEL();
6150   if (!ref)
6151     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
6152   compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
6153 
6154   if (min > 1 || max > 1)
6155     {
6156     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
6157     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6158     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
6159     if (min > 1)
6160       CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
6161     if (max > 1)
6162       {
6163       jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
6164       allocate_stack(common, 1);
6165       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6166       JUMPTO(SLJIT_JUMP, label);
6167       JUMPHERE(jump);
6168       }
6169     }
6170 
6171   if (max == 0)
6172     {
6173     /* Includes min > 1 case as well. */
6174     allocate_stack(common, 1);
6175     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6176     JUMPTO(SLJIT_JUMP, label);
6177     }
6178 
6179   JUMPHERE(zerolength);
6180   BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6181 
6182   count_match(common);
6183   return cc;
6184   }
6185 
6186 allocate_stack(common, ref ? 2 : 3);
6187 if (ref)
6188   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6189 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6190 if (type != OP_CRMINSTAR)
6191   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6192 
6193 if (min == 0)
6194   {
6195   /* Handles both invalid and empty cases. Since the minimum repeat,
6196   is zero the invalid case is basically the same as an empty case. */
6197   if (ref)
6198     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6199   else
6200     {
6201     compile_dnref_search(common, ccbegin, NULL);
6202     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6203     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6204     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6205     }
6206   /* Length is non-zero, we can match real repeats. */
6207   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6208   jump = JUMP(SLJIT_JUMP);
6209   }
6210 else
6211   {
6212   if (ref)
6213     {
6214     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6215     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6216     }
6217   else
6218     {
6219     compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6220     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6221     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6222     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6223     }
6224   }
6225 
6226 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6227 if (max > 0)
6228   add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
6229 
6230 if (!ref)
6231   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
6232 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
6233 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6234 
6235 if (min > 1)
6236   {
6237   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6238   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6239   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6240   CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
6241   }
6242 else if (max > 0)
6243   OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
6244 
6245 if (jump != NULL)
6246   JUMPHERE(jump);
6247 JUMPHERE(zerolength);
6248 
6249 count_match(common);
6250 return cc;
6251 }
6252 
compile_recurse_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)6253 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6254 {
6255 DEFINE_COMPILER;
6256 backtrack_common *backtrack;
6257 recurse_entry *entry = common->entries;
6258 recurse_entry *prev = NULL;
6259 sljit_sw start = GET(cc, 1);
6260 pcre_uchar *start_cc;
6261 BOOL needs_control_head;
6262 
6263 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
6264 
6265 /* Inlining simple patterns. */
6266 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
6267   {
6268   start_cc = common->start + start;
6269   compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
6270   BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
6271   return cc + 1 + LINK_SIZE;
6272   }
6273 
6274 while (entry != NULL)
6275   {
6276   if (entry->start == start)
6277     break;
6278   prev = entry;
6279   entry = entry->next;
6280   }
6281 
6282 if (entry == NULL)
6283   {
6284   entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
6285   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6286     return NULL;
6287   entry->next = NULL;
6288   entry->entry = NULL;
6289   entry->calls = NULL;
6290   entry->start = start;
6291 
6292   if (prev != NULL)
6293     prev->next = entry;
6294   else
6295     common->entries = entry;
6296   }
6297 
6298 if (common->has_set_som && common->mark_ptr != 0)
6299   {
6300   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
6301   allocate_stack(common, 2);
6302   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
6303   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6304   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6305   }
6306 else if (common->has_set_som || common->mark_ptr != 0)
6307   {
6308   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
6309   allocate_stack(common, 1);
6310   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6311   }
6312 
6313 if (entry->entry == NULL)
6314   add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
6315 else
6316   JUMPTO(SLJIT_FAST_CALL, entry->entry);
6317 /* Leave if the match is failed. */
6318 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
6319 return cc + 1 + LINK_SIZE;
6320 }
6321 
do_callout(struct jit_arguments * arguments,PUBL (callout_block)* callout_block,pcre_uchar ** jit_ovector)6322 static int SLJIT_CALL do_callout(struct jit_arguments *arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
6323 {
6324 const pcre_uchar *begin = arguments->begin;
6325 int *offset_vector = arguments->offsets;
6326 int offset_count = arguments->offset_count;
6327 int i;
6328 
6329 if (PUBL(callout) == NULL)
6330   return 0;
6331 
6332 callout_block->version = 2;
6333 callout_block->callout_data = arguments->callout_data;
6334 
6335 /* Offsets in subject. */
6336 callout_block->subject_length = arguments->end - arguments->begin;
6337 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
6338 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
6339 #if defined COMPILE_PCRE8
6340 callout_block->subject = (PCRE_SPTR)begin;
6341 #elif defined COMPILE_PCRE16
6342 callout_block->subject = (PCRE_SPTR16)begin;
6343 #elif defined COMPILE_PCRE32
6344 callout_block->subject = (PCRE_SPTR32)begin;
6345 #endif
6346 
6347 /* Convert and copy the JIT offset vector to the offset_vector array. */
6348 callout_block->capture_top = 0;
6349 callout_block->offset_vector = offset_vector;
6350 for (i = 2; i < offset_count; i += 2)
6351   {
6352   offset_vector[i] = jit_ovector[i] - begin;
6353   offset_vector[i + 1] = jit_ovector[i + 1] - begin;
6354   if (jit_ovector[i] >= begin)
6355     callout_block->capture_top = i;
6356   }
6357 
6358 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
6359 if (offset_count > 0)
6360   offset_vector[0] = -1;
6361 if (offset_count > 1)
6362   offset_vector[1] = -1;
6363 return (*PUBL(callout))(callout_block);
6364 }
6365 
6366 /* Aligning to 8 byte. */
6367 #define CALLOUT_ARG_SIZE \
6368     (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
6369 
6370 #define CALLOUT_ARG_OFFSET(arg) \
6371     (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
6372 
compile_callout_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)6373 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6374 {
6375 DEFINE_COMPILER;
6376 backtrack_common *backtrack;
6377 
6378 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
6379 
6380 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6381 
6382 SLJIT_ASSERT(common->capture_last_ptr != 0);
6383 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
6384 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6385 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
6386 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
6387 
6388 /* These pointer sized fields temporarly stores internal variables. */
6389 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
6390 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
6391 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
6392 
6393 if (common->mark_ptr != 0)
6394   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
6395 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
6396 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
6397 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
6398 
6399 /* Needed to save important temporary registers. */
6400 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6401 OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
6402 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
6403 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
6404 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
6405 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6406 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6407 
6408 /* Check return value. */
6409 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
6410 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER));
6411 if (common->forced_quit_label == NULL)
6412   add_jump(compiler, &common->forced_quit, JUMP(SLJIT_SIG_LESS));
6413 else
6414   JUMPTO(SLJIT_SIG_LESS, common->forced_quit_label);
6415 return cc + 2 + 2 * LINK_SIZE;
6416 }
6417 
6418 #undef CALLOUT_ARG_SIZE
6419 #undef CALLOUT_ARG_OFFSET
6420 
compile_assert_matchingpath(compiler_common * common,pcre_uchar * cc,assert_backtrack * backtrack,BOOL conditional)6421 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
6422 {
6423 DEFINE_COMPILER;
6424 int framesize;
6425 int extrasize;
6426 BOOL needs_control_head;
6427 int private_data_ptr;
6428 backtrack_common altbacktrack;
6429 pcre_uchar *ccbegin;
6430 pcre_uchar opcode;
6431 pcre_uchar bra = OP_BRA;
6432 jump_list *tmp = NULL;
6433 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
6434 jump_list **found;
6435 /* Saving previous accept variables. */
6436 BOOL save_local_exit = common->local_exit;
6437 BOOL save_positive_assert = common->positive_assert;
6438 then_trap_backtrack *save_then_trap = common->then_trap;
6439 struct sljit_label *save_quit_label = common->quit_label;
6440 struct sljit_label *save_accept_label = common->accept_label;
6441 jump_list *save_quit = common->quit;
6442 jump_list *save_positive_assert_quit = common->positive_assert_quit;
6443 jump_list *save_accept = common->accept;
6444 struct sljit_jump *jump;
6445 struct sljit_jump *brajump = NULL;
6446 
6447 /* Assert captures then. */
6448 common->then_trap = NULL;
6449 
6450 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6451   {
6452   SLJIT_ASSERT(!conditional);
6453   bra = *cc;
6454   cc++;
6455   }
6456 private_data_ptr = PRIVATE_DATA(cc);
6457 SLJIT_ASSERT(private_data_ptr != 0);
6458 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6459 backtrack->framesize = framesize;
6460 backtrack->private_data_ptr = private_data_ptr;
6461 opcode = *cc;
6462 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
6463 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
6464 ccbegin = cc;
6465 cc += GET(cc, 1);
6466 
6467 if (bra == OP_BRAMINZERO)
6468   {
6469   /* This is a braminzero backtrack path. */
6470   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6471   free_stack(common, 1);
6472   brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6473   }
6474 
6475 if (framesize < 0)
6476   {
6477   extrasize = needs_control_head ? 2 : 1;
6478   if (framesize == no_frame)
6479     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
6480   allocate_stack(common, extrasize);
6481   if (needs_control_head)
6482     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
6483   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6484   if (needs_control_head)
6485     {
6486     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
6487     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6488     }
6489   }
6490 else
6491   {
6492   extrasize = needs_control_head ? 3 : 2;
6493   allocate_stack(common, framesize + extrasize);
6494   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6495   OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6496   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
6497   if (needs_control_head)
6498     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
6499   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6500   if (needs_control_head)
6501     {
6502     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6503     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6504     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
6505     }
6506   else
6507     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6508   init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
6509   }
6510 
6511 memset(&altbacktrack, 0, sizeof(backtrack_common));
6512 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6513   {
6514   /* Negative assert is stronger than positive assert. */
6515   common->local_exit = TRUE;
6516   common->quit_label = NULL;
6517   common->quit = NULL;
6518   common->positive_assert = FALSE;
6519   }
6520 else
6521   common->positive_assert = TRUE;
6522 common->positive_assert_quit = NULL;
6523 
6524 while (1)
6525   {
6526   common->accept_label = NULL;
6527   common->accept = NULL;
6528   altbacktrack.top = NULL;
6529   altbacktrack.topbacktracks = NULL;
6530 
6531   if (*ccbegin == OP_ALT)
6532     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6533 
6534   altbacktrack.cc = ccbegin;
6535   compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
6536   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6537     {
6538     if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6539       {
6540       common->local_exit = save_local_exit;
6541       common->quit_label = save_quit_label;
6542       common->quit = save_quit;
6543       }
6544     common->positive_assert = save_positive_assert;
6545     common->then_trap = save_then_trap;
6546     common->accept_label = save_accept_label;
6547     common->positive_assert_quit = save_positive_assert_quit;
6548     common->accept = save_accept;
6549     return NULL;
6550     }
6551   common->accept_label = LABEL();
6552   if (common->accept != NULL)
6553     set_jumps(common->accept, common->accept_label);
6554 
6555   /* Reset stack. */
6556   if (framesize < 0)
6557     {
6558     if (framesize == no_frame)
6559       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6560     else
6561       free_stack(common, extrasize);
6562     if (needs_control_head)
6563       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6564     }
6565   else
6566     {
6567     if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
6568       {
6569       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6570       OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6571       if (needs_control_head)
6572         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6573       }
6574     else
6575       {
6576       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6577       if (needs_control_head)
6578         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
6579       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6580       }
6581     }
6582 
6583   if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6584     {
6585     /* We know that STR_PTR was stored on the top of the stack. */
6586     if (conditional)
6587       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
6588     else if (bra == OP_BRAZERO)
6589       {
6590       if (framesize < 0)
6591         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6592       else
6593         {
6594         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6595         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
6596         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
6597         }
6598       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6599       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6600       }
6601     else if (framesize >= 0)
6602       {
6603       /* For OP_BRA and OP_BRAMINZERO. */
6604       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6605       }
6606     }
6607   add_jump(compiler, found, JUMP(SLJIT_JUMP));
6608 
6609   compile_backtrackingpath(common, altbacktrack.top);
6610   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6611     {
6612     if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6613       {
6614       common->local_exit = save_local_exit;
6615       common->quit_label = save_quit_label;
6616       common->quit = save_quit;
6617       }
6618     common->positive_assert = save_positive_assert;
6619     common->then_trap = save_then_trap;
6620     common->accept_label = save_accept_label;
6621     common->positive_assert_quit = save_positive_assert_quit;
6622     common->accept = save_accept;
6623     return NULL;
6624     }
6625   set_jumps(altbacktrack.topbacktracks, LABEL());
6626 
6627   if (*cc != OP_ALT)
6628     break;
6629 
6630   ccbegin = cc;
6631   cc += GET(cc, 1);
6632   }
6633 
6634 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6635   {
6636   SLJIT_ASSERT(common->positive_assert_quit == NULL);
6637   /* Makes the check less complicated below. */
6638   common->positive_assert_quit = common->quit;
6639   }
6640 
6641 /* None of them matched. */
6642 if (common->positive_assert_quit != NULL)
6643   {
6644   jump = JUMP(SLJIT_JUMP);
6645   set_jumps(common->positive_assert_quit, LABEL());
6646   SLJIT_ASSERT(framesize != no_stack);
6647   if (framesize < 0)
6648     OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
6649   else
6650     {
6651     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6652     add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6653     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6654     }
6655   JUMPHERE(jump);
6656   }
6657 
6658 if (needs_control_head)
6659   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
6660 
6661 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
6662   {
6663   /* Assert is failed. */
6664   if (conditional || bra == OP_BRAZERO)
6665     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6666 
6667   if (framesize < 0)
6668     {
6669     /* The topmost item should be 0. */
6670     if (bra == OP_BRAZERO)
6671       {
6672       if (extrasize == 2)
6673         free_stack(common, 1);
6674       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6675       }
6676     else
6677       free_stack(common, extrasize);
6678     }
6679   else
6680     {
6681     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6682     /* The topmost item should be 0. */
6683     if (bra == OP_BRAZERO)
6684       {
6685       free_stack(common, framesize + extrasize - 1);
6686       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6687       }
6688     else
6689       free_stack(common, framesize + extrasize);
6690     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
6691     }
6692   jump = JUMP(SLJIT_JUMP);
6693   if (bra != OP_BRAZERO)
6694     add_jump(compiler, target, jump);
6695 
6696   /* Assert is successful. */
6697   set_jumps(tmp, LABEL());
6698   if (framesize < 0)
6699     {
6700     /* We know that STR_PTR was stored on the top of the stack. */
6701     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6702     /* Keep the STR_PTR on the top of the stack. */
6703     if (bra == OP_BRAZERO)
6704       {
6705       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6706       if (extrasize == 2)
6707         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6708       }
6709     else if (bra == OP_BRAMINZERO)
6710       {
6711       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6712       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6713       }
6714     }
6715   else
6716     {
6717     if (bra == OP_BRA)
6718       {
6719       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6720       OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6721       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
6722       }
6723     else
6724       {
6725       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6726       OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
6727       if (extrasize == 2)
6728         {
6729         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6730         if (bra == OP_BRAMINZERO)
6731           OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6732         }
6733       else
6734         {
6735         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
6736         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
6737         }
6738       }
6739     }
6740 
6741   if (bra == OP_BRAZERO)
6742     {
6743     backtrack->matchingpath = LABEL();
6744     SET_LABEL(jump, backtrack->matchingpath);
6745     }
6746   else if (bra == OP_BRAMINZERO)
6747     {
6748     JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6749     JUMPHERE(brajump);
6750     if (framesize >= 0)
6751       {
6752       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6753       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6754       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6755       }
6756     set_jumps(backtrack->common.topbacktracks, LABEL());
6757     }
6758   }
6759 else
6760   {
6761   /* AssertNot is successful. */
6762   if (framesize < 0)
6763     {
6764     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6765     if (bra != OP_BRA)
6766       {
6767       if (extrasize == 2)
6768         free_stack(common, 1);
6769       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6770       }
6771     else
6772       free_stack(common, extrasize);
6773     }
6774   else
6775     {
6776     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6777     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6778     /* The topmost item should be 0. */
6779     if (bra != OP_BRA)
6780       {
6781       free_stack(common, framesize + extrasize - 1);
6782       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6783       }
6784     else
6785       free_stack(common, framesize + extrasize);
6786     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
6787     }
6788 
6789   if (bra == OP_BRAZERO)
6790     backtrack->matchingpath = LABEL();
6791   else if (bra == OP_BRAMINZERO)
6792     {
6793     JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6794     JUMPHERE(brajump);
6795     }
6796 
6797   if (bra != OP_BRA)
6798     {
6799     SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
6800     set_jumps(backtrack->common.topbacktracks, LABEL());
6801     backtrack->common.topbacktracks = NULL;
6802     }
6803   }
6804 
6805 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6806   {
6807   common->local_exit = save_local_exit;
6808   common->quit_label = save_quit_label;
6809   common->quit = save_quit;
6810   }
6811 common->positive_assert = save_positive_assert;
6812 common->then_trap = save_then_trap;
6813 common->accept_label = save_accept_label;
6814 common->positive_assert_quit = save_positive_assert_quit;
6815 common->accept = save_accept;
6816 return cc + 1 + LINK_SIZE;
6817 }
6818 
match_once_common(compiler_common * common,pcre_uchar ket,int framesize,int private_data_ptr,BOOL has_alternatives,BOOL needs_control_head)6819 static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
6820 {
6821 DEFINE_COMPILER;
6822 int stacksize;
6823 
6824 if (framesize < 0)
6825   {
6826   if (framesize == no_frame)
6827     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6828   else
6829     {
6830     stacksize = needs_control_head ? 1 : 0;
6831     if (ket != OP_KET || has_alternatives)
6832       stacksize++;
6833     free_stack(common, stacksize);
6834     }
6835 
6836   if (needs_control_head)
6837     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
6838 
6839   /* TMP2 which is set here used by OP_KETRMAX below. */
6840   if (ket == OP_KETRMAX)
6841     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6842   else if (ket == OP_KETRMIN)
6843     {
6844     /* Move the STR_PTR to the private_data_ptr. */
6845     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
6846     }
6847   }
6848 else
6849   {
6850   stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
6851   OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
6852   if (needs_control_head)
6853     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);
6854 
6855   if (ket == OP_KETRMAX)
6856     {
6857     /* TMP2 which is set here used by OP_KETRMAX below. */
6858     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6859     }
6860   }
6861 if (needs_control_head)
6862   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
6863 }
6864 
match_capture_common(compiler_common * common,int stacksize,int offset,int private_data_ptr)6865 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
6866 {
6867 DEFINE_COMPILER;
6868 
6869 if (common->capture_last_ptr != 0)
6870   {
6871   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
6872   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6873   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6874   stacksize++;
6875   }
6876 if (common->optimized_cbracket[offset >> 1] == 0)
6877   {
6878   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6879   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6880   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6881   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6882   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6883   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
6884   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
6885   stacksize += 2;
6886   }
6887 return stacksize;
6888 }
6889 
6890 /*
6891   Handling bracketed expressions is probably the most complex part.
6892 
6893   Stack layout naming characters:
6894     S - Push the current STR_PTR
6895     0 - Push a 0 (NULL)
6896     A - Push the current STR_PTR. Needed for restoring the STR_PTR
6897         before the next alternative. Not pushed if there are no alternatives.
6898     M - Any values pushed by the current alternative. Can be empty, or anything.
6899     C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
6900     L - Push the previous local (pointed by localptr) to the stack
6901    () - opional values stored on the stack
6902   ()* - optonal, can be stored multiple times
6903 
6904   The following list shows the regular expression templates, their PCRE byte codes
6905   and stack layout supported by pcre-sljit.
6906 
6907   (?:)                     OP_BRA     | OP_KET                A M
6908   ()                       OP_CBRA    | OP_KET                C M
6909   (?:)+                    OP_BRA     | OP_KETRMAX        0   A M S   ( A M S )*
6910                            OP_SBRA    | OP_KETRMAX        0   L M S   ( L M S )*
6911   (?:)+?                   OP_BRA     | OP_KETRMIN        0   A M S   ( A M S )*
6912                            OP_SBRA    | OP_KETRMIN        0   L M S   ( L M S )*
6913   ()+                      OP_CBRA    | OP_KETRMAX        0   C M S   ( C M S )*
6914                            OP_SCBRA   | OP_KETRMAX        0   C M S   ( C M S )*
6915   ()+?                     OP_CBRA    | OP_KETRMIN        0   C M S   ( C M S )*
6916                            OP_SCBRA   | OP_KETRMIN        0   C M S   ( C M S )*
6917   (?:)?    OP_BRAZERO    | OP_BRA     | OP_KET            S ( A M 0 )
6918   (?:)??   OP_BRAMINZERO | OP_BRA     | OP_KET            S ( A M 0 )
6919   ()?      OP_BRAZERO    | OP_CBRA    | OP_KET            S ( C M 0 )
6920   ()??     OP_BRAMINZERO | OP_CBRA    | OP_KET            S ( C M 0 )
6921   (?:)*    OP_BRAZERO    | OP_BRA     | OP_KETRMAX      S 0 ( A M S )*
6922            OP_BRAZERO    | OP_SBRA    | OP_KETRMAX      S 0 ( L M S )*
6923   (?:)*?   OP_BRAMINZERO | OP_BRA     | OP_KETRMIN      S 0 ( A M S )*
6924            OP_BRAMINZERO | OP_SBRA    | OP_KETRMIN      S 0 ( L M S )*
6925   ()*      OP_BRAZERO    | OP_CBRA    | OP_KETRMAX      S 0 ( C M S )*
6926            OP_BRAZERO    | OP_SCBRA   | OP_KETRMAX      S 0 ( C M S )*
6927   ()*?     OP_BRAMINZERO | OP_CBRA    | OP_KETRMIN      S 0 ( C M S )*
6928            OP_BRAMINZERO | OP_SCBRA   | OP_KETRMIN      S 0 ( C M S )*
6929 
6930 
6931   Stack layout naming characters:
6932     A - Push the alternative index (starting from 0) on the stack.
6933         Not pushed if there is no alternatives.
6934     M - Any values pushed by the current alternative. Can be empty, or anything.
6935 
6936   The next list shows the possible content of a bracket:
6937   (|)     OP_*BRA    | OP_ALT ...         M A
6938   (?()|)  OP_*COND   | OP_ALT             M A
6939   (?>|)   OP_ONCE    | OP_ALT ...         [stack trace] M A
6940   (?>|)   OP_ONCE_NC | OP_ALT ...         [stack trace] M A
6941                                           Or nothing, if trace is unnecessary
6942 */
6943 
compile_bracket_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)6944 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6945 {
6946 DEFINE_COMPILER;
6947 backtrack_common *backtrack;
6948 pcre_uchar opcode;
6949 int private_data_ptr = 0;
6950 int offset = 0;
6951 int i, stacksize;
6952 int repeat_ptr = 0, repeat_length = 0;
6953 int repeat_type = 0, repeat_count = 0;
6954 pcre_uchar *ccbegin;
6955 pcre_uchar *matchingpath;
6956 pcre_uchar *slot;
6957 pcre_uchar bra = OP_BRA;
6958 pcre_uchar ket;
6959 assert_backtrack *assert;
6960 BOOL has_alternatives;
6961 BOOL needs_control_head = FALSE;
6962 struct sljit_jump *jump;
6963 struct sljit_jump *skip;
6964 struct sljit_label *rmax_label = NULL;
6965 struct sljit_jump *braminzero = NULL;
6966 
6967 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6968 
6969 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6970   {
6971   bra = *cc;
6972   cc++;
6973   opcode = *cc;
6974   }
6975 
6976 opcode = *cc;
6977 ccbegin = cc;
6978 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
6979 ket = *matchingpath;
6980 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
6981   {
6982   repeat_ptr = PRIVATE_DATA(matchingpath);
6983   repeat_length = PRIVATE_DATA(matchingpath + 1);
6984   repeat_type = PRIVATE_DATA(matchingpath + 2);
6985   repeat_count = PRIVATE_DATA(matchingpath + 3);
6986   SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
6987   if (repeat_type == OP_UPTO)
6988     ket = OP_KETRMAX;
6989   if (repeat_type == OP_MINUPTO)
6990     ket = OP_KETRMIN;
6991   }
6992 
6993 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
6994   {
6995   /* Drop this bracket_backtrack. */
6996   parent->top = backtrack->prev;
6997   return matchingpath + 1 + LINK_SIZE + repeat_length;
6998   }
6999 
7000 matchingpath = ccbegin + 1 + LINK_SIZE;
7001 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
7002 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
7003 cc += GET(cc, 1);
7004 
7005 has_alternatives = *cc == OP_ALT;
7006 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
7007   has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF || *matchingpath == OP_FAIL) ? FALSE : TRUE;
7008 
7009 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
7010   opcode = OP_SCOND;
7011 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
7012   opcode = OP_ONCE;
7013 
7014 if (opcode == OP_CBRA || opcode == OP_SCBRA)
7015   {
7016   /* Capturing brackets has a pre-allocated space. */
7017   offset = GET2(ccbegin, 1 + LINK_SIZE);
7018   if (common->optimized_cbracket[offset] == 0)
7019     {
7020     private_data_ptr = OVECTOR_PRIV(offset);
7021     offset <<= 1;
7022     }
7023   else
7024     {
7025     offset <<= 1;
7026     private_data_ptr = OVECTOR(offset);
7027     }
7028   BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
7029   matchingpath += IMM2_SIZE;
7030   }
7031 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
7032   {
7033   /* Other brackets simply allocate the next entry. */
7034   private_data_ptr = PRIVATE_DATA(ccbegin);
7035   SLJIT_ASSERT(private_data_ptr != 0);
7036   BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
7037   if (opcode == OP_ONCE)
7038     BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
7039   }
7040 
7041 /* Instructions before the first alternative. */
7042 stacksize = 0;
7043 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
7044   stacksize++;
7045 if (bra == OP_BRAZERO)
7046   stacksize++;
7047 
7048 if (stacksize > 0)
7049   allocate_stack(common, stacksize);
7050 
7051 stacksize = 0;
7052 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
7053   {
7054   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
7055   stacksize++;
7056   }
7057 
7058 if (bra == OP_BRAZERO)
7059   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
7060 
7061 if (bra == OP_BRAMINZERO)
7062   {
7063   /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
7064   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7065   if (ket != OP_KETRMIN)
7066     {
7067     free_stack(common, 1);
7068     braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7069     }
7070   else
7071     {
7072     if (opcode == OP_ONCE || opcode >= OP_SBRA)
7073       {
7074       jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7075       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
7076       /* Nothing stored during the first run. */
7077       skip = JUMP(SLJIT_JUMP);
7078       JUMPHERE(jump);
7079       /* Checking zero-length iteration. */
7080       if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
7081         {
7082         /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
7083         braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7084         }
7085       else
7086         {
7087         /* Except when the whole stack frame must be saved. */
7088         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7089         braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
7090         }
7091       JUMPHERE(skip);
7092       }
7093     else
7094       {
7095       jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7096       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
7097       JUMPHERE(jump);
7098       }
7099     }
7100   }
7101 
7102 if (repeat_type != 0)
7103   {
7104   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
7105   if (repeat_type == OP_EXACT)
7106     rmax_label = LABEL();
7107   }
7108 
7109 if (ket == OP_KETRMIN)
7110   BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
7111 
7112 if (ket == OP_KETRMAX)
7113   {
7114   rmax_label = LABEL();
7115   if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0)
7116     BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
7117   }
7118 
7119 /* Handling capturing brackets and alternatives. */
7120 if (opcode == OP_ONCE)
7121   {
7122   stacksize = 0;
7123   if (needs_control_head)
7124     {
7125     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7126     stacksize++;
7127     }
7128 
7129   if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
7130     {
7131     /* Neither capturing brackets nor recursions are found in the block. */
7132     if (ket == OP_KETRMIN)
7133       {
7134       stacksize += 2;
7135       if (!needs_control_head)
7136         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7137       }
7138     else
7139       {
7140       if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
7141         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
7142       if (ket == OP_KETRMAX || has_alternatives)
7143         stacksize++;
7144       }
7145 
7146     if (stacksize > 0)
7147       allocate_stack(common, stacksize);
7148 
7149     stacksize = 0;
7150     if (needs_control_head)
7151       {
7152       stacksize++;
7153       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7154       }
7155 
7156     if (ket == OP_KETRMIN)
7157       {
7158       if (needs_control_head)
7159         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7160       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
7161       if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
7162         OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
7163       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
7164       }
7165     else if (ket == OP_KETRMAX || has_alternatives)
7166       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
7167     }
7168   else
7169     {
7170     if (ket != OP_KET || has_alternatives)
7171       stacksize++;
7172 
7173     stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
7174     allocate_stack(common, stacksize);
7175 
7176     if (needs_control_head)
7177       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7178 
7179     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7180     OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
7181 
7182     stacksize = needs_control_head ? 1 : 0;
7183     if (ket != OP_KET || has_alternatives)
7184       {
7185       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
7186       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
7187       stacksize++;
7188       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
7189       }
7190     else
7191       {
7192       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
7193       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
7194       }
7195     init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);
7196     }
7197   }
7198 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
7199   {
7200   /* Saving the previous values. */
7201   if (common->optimized_cbracket[offset >> 1] != 0)
7202     {
7203     SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
7204     allocate_stack(common, 2);
7205     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7206     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
7207     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
7208     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
7209     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
7210     }
7211   else
7212     {
7213     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7214     allocate_stack(common, 1);
7215     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
7216     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7217     }
7218   }
7219 else if (opcode == OP_SBRA || opcode == OP_SCOND)
7220   {
7221   /* Saving the previous value. */
7222   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7223   allocate_stack(common, 1);
7224   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
7225   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7226   }
7227 else if (has_alternatives)
7228   {
7229   /* Pushing the starting string pointer. */
7230   allocate_stack(common, 1);
7231   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7232   }
7233 
7234 /* Generating code for the first alternative. */
7235 if (opcode == OP_COND || opcode == OP_SCOND)
7236   {
7237   if (*matchingpath == OP_CREF)
7238     {
7239     SLJIT_ASSERT(has_alternatives);
7240     add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
7241       CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
7242     matchingpath += 1 + IMM2_SIZE;
7243     }
7244   else if (*matchingpath == OP_DNCREF)
7245     {
7246     SLJIT_ASSERT(has_alternatives);
7247 
7248     i = GET2(matchingpath, 1 + IMM2_SIZE);
7249     slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
7250     OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
7251     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
7252     OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
7253     slot += common->name_entry_size;
7254     i--;
7255     while (i-- > 0)
7256       {
7257       OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
7258       OP2(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, STR_PTR, 0);
7259       slot += common->name_entry_size;
7260       }
7261     OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
7262     add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
7263     matchingpath += 1 + 2 * IMM2_SIZE;
7264     }
7265   else if (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF || *matchingpath == OP_FAIL)
7266     {
7267     /* Never has other case. */
7268     BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
7269     SLJIT_ASSERT(!has_alternatives);
7270 
7271     if (*matchingpath == OP_FAIL)
7272       stacksize = 0;
7273     if (*matchingpath == OP_RREF)
7274       {
7275       stacksize = GET2(matchingpath, 1);
7276       if (common->currententry == NULL)
7277         stacksize = 0;
7278       else if (stacksize == RREF_ANY)
7279         stacksize = 1;
7280       else if (common->currententry->start == 0)
7281         stacksize = stacksize == 0;
7282       else
7283         stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
7284 
7285       if (stacksize != 0)
7286         matchingpath += 1 + IMM2_SIZE;
7287       }
7288     else
7289       {
7290       if (common->currententry == NULL || common->currententry->start == 0)
7291         stacksize = 0;
7292       else
7293         {
7294         stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
7295         slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
7296         i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
7297         while (stacksize > 0)
7298           {
7299           if ((int)GET2(slot, 0) == i)
7300             break;
7301           slot += common->name_entry_size;
7302           stacksize--;
7303           }
7304         }
7305 
7306       if (stacksize != 0)
7307         matchingpath += 1 + 2 * IMM2_SIZE;
7308       }
7309 
7310       /* The stacksize == 0 is a common "else" case. */
7311       if (stacksize == 0)
7312         {
7313         if (*cc == OP_ALT)
7314           {
7315           matchingpath = cc + 1 + LINK_SIZE;
7316           cc += GET(cc, 1);
7317           }
7318         else
7319           matchingpath = cc;
7320         }
7321     }
7322   else
7323     {
7324     SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
7325     /* Similar code as PUSH_BACKTRACK macro. */
7326     assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
7327     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7328       return NULL;
7329     memset(assert, 0, sizeof(assert_backtrack));
7330     assert->common.cc = matchingpath;
7331     BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
7332     matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
7333     }
7334   }
7335 
7336 compile_matchingpath(common, matchingpath, cc, backtrack);
7337 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7338   return NULL;
7339 
7340 if (opcode == OP_ONCE)
7341   match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
7342 
7343 stacksize = 0;
7344 if (repeat_type == OP_MINUPTO)
7345   {
7346   /* We need to preserve the counter. TMP2 will be used below. */
7347   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
7348   stacksize++;
7349   }
7350 if (ket != OP_KET || bra != OP_BRA)
7351   stacksize++;
7352 if (offset != 0)
7353   {
7354   if (common->capture_last_ptr != 0)
7355     stacksize++;
7356   if (common->optimized_cbracket[offset >> 1] == 0)
7357     stacksize += 2;
7358   }
7359 if (has_alternatives && opcode != OP_ONCE)
7360   stacksize++;
7361 
7362 if (stacksize > 0)
7363   allocate_stack(common, stacksize);
7364 
7365 stacksize = 0;
7366 if (repeat_type == OP_MINUPTO)
7367   {
7368   /* TMP2 was set above. */
7369   OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
7370   stacksize++;
7371   }
7372 
7373 if (ket != OP_KET || bra != OP_BRA)
7374   {
7375   if (ket != OP_KET)
7376     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
7377   else
7378     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
7379   stacksize++;
7380   }
7381 
7382 if (offset != 0)
7383   stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
7384 
7385 if (has_alternatives)
7386   {
7387   if (opcode != OP_ONCE)
7388     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
7389   if (ket != OP_KETRMAX)
7390     BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
7391   }
7392 
7393 /* Must be after the matchingpath label. */
7394 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
7395   {
7396   SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
7397   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
7398   }
7399 
7400 if (ket == OP_KETRMAX)
7401   {
7402   if (repeat_type != 0)
7403     {
7404     if (has_alternatives)
7405       BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
7406     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
7407     JUMPTO(SLJIT_NOT_ZERO, rmax_label);
7408     /* Drop STR_PTR for greedy plus quantifier. */
7409     if (opcode != OP_ONCE)
7410       free_stack(common, 1);
7411     }
7412   else if (opcode == OP_ONCE || opcode >= OP_SBRA)
7413     {
7414     if (has_alternatives)
7415       BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
7416     /* Checking zero-length iteration. */
7417     if (opcode != OP_ONCE)
7418       {
7419       CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
7420       /* Drop STR_PTR for greedy plus quantifier. */
7421       if (bra != OP_BRAZERO)
7422         free_stack(common, 1);
7423       }
7424     else
7425       /* TMP2 must contain the starting STR_PTR. */
7426       CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
7427     }
7428   else
7429     JUMPTO(SLJIT_JUMP, rmax_label);
7430   BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
7431   }
7432 
7433 if (repeat_type == OP_EXACT)
7434   {
7435   count_match(common);
7436   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
7437   JUMPTO(SLJIT_NOT_ZERO, rmax_label);
7438   }
7439 else if (repeat_type == OP_UPTO)
7440   {
7441   /* We need to preserve the counter. */
7442   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
7443   allocate_stack(common, 1);
7444   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7445   }
7446 
7447 if (bra == OP_BRAZERO)
7448   BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
7449 
7450 if (bra == OP_BRAMINZERO)
7451   {
7452   /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
7453   JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
7454   if (braminzero != NULL)
7455     {
7456     JUMPHERE(braminzero);
7457     /* We need to release the end pointer to perform the
7458     backtrack for the zero-length iteration. When
7459     framesize is < 0, OP_ONCE will do the release itself. */
7460     if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
7461       {
7462       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7463       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
7464       }
7465     else if (ket == OP_KETRMIN && opcode != OP_ONCE)
7466       free_stack(common, 1);
7467     }
7468   /* Continue to the normal backtrack. */
7469   }
7470 
7471 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
7472   count_match(common);
7473 
7474 /* Skip the other alternatives. */
7475 while (*cc == OP_ALT)
7476   cc += GET(cc, 1);
7477 cc += 1 + LINK_SIZE;
7478 
7479 /* Temporarily encoding the needs_control_head in framesize. */
7480 if (opcode == OP_ONCE)
7481   BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
7482 return cc + repeat_length;
7483 }
7484 
compile_bracketpos_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)7485 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7486 {
7487 DEFINE_COMPILER;
7488 backtrack_common *backtrack;
7489 pcre_uchar opcode;
7490 int private_data_ptr;
7491 int cbraprivptr = 0;
7492 BOOL needs_control_head;
7493 int framesize;
7494 int stacksize;
7495 int offset = 0;
7496 BOOL zero = FALSE;
7497 pcre_uchar *ccbegin = NULL;
7498 int stack; /* Also contains the offset of control head. */
7499 struct sljit_label *loop = NULL;
7500 struct jump_list *emptymatch = NULL;
7501 
7502 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
7503 if (*cc == OP_BRAPOSZERO)
7504   {
7505   zero = TRUE;
7506   cc++;
7507   }
7508 
7509 opcode = *cc;
7510 private_data_ptr = PRIVATE_DATA(cc);
7511 SLJIT_ASSERT(private_data_ptr != 0);
7512 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
7513 switch(opcode)
7514   {
7515   case OP_BRAPOS:
7516   case OP_SBRAPOS:
7517   ccbegin = cc + 1 + LINK_SIZE;
7518   break;
7519 
7520   case OP_CBRAPOS:
7521   case OP_SCBRAPOS:
7522   offset = GET2(cc, 1 + LINK_SIZE);
7523   /* This case cannot be optimized in the same was as
7524   normal capturing brackets. */
7525   SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
7526   cbraprivptr = OVECTOR_PRIV(offset);
7527   offset <<= 1;
7528   ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
7529   break;
7530 
7531   default:
7532   SLJIT_ASSERT_STOP();
7533   break;
7534   }
7535 
7536 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
7537 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
7538 if (framesize < 0)
7539   {
7540   if (offset != 0)
7541     {
7542     stacksize = 2;
7543     if (common->capture_last_ptr != 0)
7544       stacksize++;
7545     }
7546   else
7547     stacksize = 1;
7548 
7549   if (needs_control_head)
7550     stacksize++;
7551   if (!zero)
7552     stacksize++;
7553 
7554   BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
7555   allocate_stack(common, stacksize);
7556   if (framesize == no_frame)
7557     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
7558 
7559   stack = 0;
7560   if (offset != 0)
7561     {
7562     stack = 2;
7563     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
7564     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7565     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
7566     if (common->capture_last_ptr != 0)
7567       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
7568     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
7569     if (needs_control_head)
7570       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7571     if (common->capture_last_ptr != 0)
7572       {
7573       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
7574       stack = 3;
7575       }
7576     }
7577   else
7578     {
7579     if (needs_control_head)
7580       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7581     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7582     stack = 1;
7583     }
7584 
7585   if (needs_control_head)
7586     stack++;
7587   if (!zero)
7588     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
7589   if (needs_control_head)
7590     {
7591     stack--;
7592     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
7593     }
7594   }
7595 else
7596   {
7597   stacksize = framesize + 1;
7598   if (!zero)
7599     stacksize++;
7600   if (needs_control_head)
7601     stacksize++;
7602   if (offset == 0)
7603     stacksize++;
7604   BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
7605 
7606   allocate_stack(common, stacksize);
7607   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7608   if (needs_control_head)
7609     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7610   OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
7611 
7612   stack = 0;
7613   if (!zero)
7614     {
7615     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
7616     stack = 1;
7617     }
7618   if (needs_control_head)
7619     {
7620     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
7621     stack++;
7622     }
7623   if (offset == 0)
7624     {
7625     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
7626     stack++;
7627     }
7628   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
7629   init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize, FALSE);
7630   stack -= 1 + (offset == 0);
7631   }
7632 
7633 if (offset != 0)
7634   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
7635 
7636 loop = LABEL();
7637 while (*cc != OP_KETRPOS)
7638   {
7639   backtrack->top = NULL;
7640   backtrack->topbacktracks = NULL;
7641   cc += GET(cc, 1);
7642 
7643   compile_matchingpath(common, ccbegin, cc, backtrack);
7644   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7645     return NULL;
7646 
7647   if (framesize < 0)
7648     {
7649     if (framesize == no_frame)
7650       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7651 
7652     if (offset != 0)
7653       {
7654       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
7655       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
7656       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
7657       if (common->capture_last_ptr != 0)
7658         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
7659       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
7660       }
7661     else
7662       {
7663       if (opcode == OP_SBRAPOS)
7664         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7665       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7666       }
7667 
7668     if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
7669       add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
7670 
7671     if (!zero)
7672       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
7673     }
7674   else
7675     {
7676     if (offset != 0)
7677       {
7678       OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
7679       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
7680       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
7681       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
7682       if (common->capture_last_ptr != 0)
7683         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
7684       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
7685       }
7686     else
7687       {
7688       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7689       OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
7690       if (opcode == OP_SBRAPOS)
7691         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
7692       OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw), STR_PTR, 0);
7693       }
7694 
7695     if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
7696       add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
7697 
7698     if (!zero)
7699       {
7700       if (framesize < 0)
7701         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
7702       else
7703         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7704       }
7705     }
7706 
7707   if (needs_control_head)
7708     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
7709 
7710   JUMPTO(SLJIT_JUMP, loop);
7711   flush_stubs(common);
7712 
7713   compile_backtrackingpath(common, backtrack->top);
7714   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7715     return NULL;
7716   set_jumps(backtrack->topbacktracks, LABEL());
7717 
7718   if (framesize < 0)
7719     {
7720     if (offset != 0)
7721       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
7722     else
7723       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7724     }
7725   else
7726     {
7727     if (offset != 0)
7728       {
7729       /* Last alternative. */
7730       if (*cc == OP_KETRPOS)
7731         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7732       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
7733       }
7734     else
7735       {
7736       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7737       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
7738       }
7739     }
7740 
7741   if (*cc == OP_KETRPOS)
7742     break;
7743   ccbegin = cc + 1 + LINK_SIZE;
7744   }
7745 
7746 /* We don't have to restore the control head in case of a failed match. */
7747 
7748 backtrack->topbacktracks = NULL;
7749 if (!zero)
7750   {
7751   if (framesize < 0)
7752     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
7753   else /* TMP2 is set to [private_data_ptr] above. */
7754     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_sw), SLJIT_IMM, 0));
7755   }
7756 
7757 /* None of them matched. */
7758 set_jumps(emptymatch, LABEL());
7759 count_match(common);
7760 return cc + 1 + LINK_SIZE;
7761 }
7762 
get_iterator_parameters(compiler_common * common,pcre_uchar * cc,pcre_uchar * opcode,pcre_uchar * type,int * max,int * min,pcre_uchar ** end)7763 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *max, int *min, pcre_uchar **end)
7764 {
7765 int class_len;
7766 
7767 *opcode = *cc;
7768 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
7769   {
7770   cc++;
7771   *type = OP_CHAR;
7772   }
7773 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
7774   {
7775   cc++;
7776   *type = OP_CHARI;
7777   *opcode -= OP_STARI - OP_STAR;
7778   }
7779 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
7780   {
7781   cc++;
7782   *type = OP_NOT;
7783   *opcode -= OP_NOTSTAR - OP_STAR;
7784   }
7785 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
7786   {
7787   cc++;
7788   *type = OP_NOTI;
7789   *opcode -= OP_NOTSTARI - OP_STAR;
7790   }
7791 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
7792   {
7793   cc++;
7794   *opcode -= OP_TYPESTAR - OP_STAR;
7795   *type = 0;
7796   }
7797 else
7798   {
7799   SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
7800   *type = *opcode;
7801   cc++;
7802   class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
7803   *opcode = cc[class_len - 1];
7804   if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
7805     {
7806     *opcode -= OP_CRSTAR - OP_STAR;
7807     if (end != NULL)
7808       *end = cc + class_len;
7809     }
7810   else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
7811     {
7812     *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
7813     if (end != NULL)
7814       *end = cc + class_len;
7815     }
7816   else
7817     {
7818     SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
7819     *max = GET2(cc, (class_len + IMM2_SIZE));
7820     *min = GET2(cc, class_len);
7821 
7822     if (*min == 0)
7823       {
7824       SLJIT_ASSERT(*max != 0);
7825       *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : (*opcode == OP_CRMINRANGE ? OP_MINUPTO : OP_POSUPTO);
7826       }
7827     if (*max == *min)
7828       *opcode = OP_EXACT;
7829 
7830     if (end != NULL)
7831       *end = cc + class_len + 2 * IMM2_SIZE;
7832     }
7833   return cc;
7834   }
7835 
7836 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
7837   {
7838   *max = GET2(cc, 0);
7839   cc += IMM2_SIZE;
7840   }
7841 
7842 if (*type == 0)
7843   {
7844   *type = *cc;
7845   if (end != NULL)
7846     *end = next_opcode(common, cc);
7847   cc++;
7848   return cc;
7849   }
7850 
7851 if (end != NULL)
7852   {
7853   *end = cc + 1;
7854 #ifdef SUPPORT_UTF
7855   if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
7856 #endif
7857   }
7858 return cc;
7859 }
7860 
compile_iterator_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)7861 static pcre_uchar *compile_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7862 {
7863 DEFINE_COMPILER;
7864 backtrack_common *backtrack;
7865 pcre_uchar opcode;
7866 pcre_uchar type;
7867 int max = -1, min = -1;
7868 pcre_uchar *end;
7869 jump_list *nomatch = NULL;
7870 struct sljit_jump *jump = NULL;
7871 struct sljit_label *label;
7872 int private_data_ptr = PRIVATE_DATA(cc);
7873 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
7874 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
7875 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
7876 int tmp_base, tmp_offset;
7877 
7878 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
7879 
7880 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &min, &end);
7881 
7882 switch(type)
7883   {
7884   case OP_NOT_DIGIT:
7885   case OP_DIGIT:
7886   case OP_NOT_WHITESPACE:
7887   case OP_WHITESPACE:
7888   case OP_NOT_WORDCHAR:
7889   case OP_WORDCHAR:
7890   case OP_ANY:
7891   case OP_ALLANY:
7892   case OP_ANYBYTE:
7893   case OP_ANYNL:
7894   case OP_NOT_HSPACE:
7895   case OP_HSPACE:
7896   case OP_NOT_VSPACE:
7897   case OP_VSPACE:
7898   case OP_CHAR:
7899   case OP_CHARI:
7900   case OP_NOT:
7901   case OP_NOTI:
7902   case OP_CLASS:
7903   case OP_NCLASS:
7904   tmp_base = TMP3;
7905   tmp_offset = 0;
7906   break;
7907 
7908   default:
7909   SLJIT_ASSERT_STOP();
7910   /* Fall through. */
7911 
7912   case OP_EXTUNI:
7913   case OP_XCLASS:
7914   case OP_NOTPROP:
7915   case OP_PROP:
7916   tmp_base = SLJIT_MEM1(SLJIT_SP);
7917   tmp_offset = POSSESSIVE0;
7918   break;
7919   }
7920 
7921 switch(opcode)
7922   {
7923   case OP_STAR:
7924   case OP_PLUS:
7925   case OP_UPTO:
7926   case OP_CRRANGE:
7927   if (type == OP_ANYNL || type == OP_EXTUNI)
7928     {
7929     SLJIT_ASSERT(private_data_ptr == 0);
7930     if (opcode == OP_STAR || opcode == OP_UPTO)
7931       {
7932       allocate_stack(common, 2);
7933       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7934       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
7935       }
7936     else
7937       {
7938       allocate_stack(common, 1);
7939       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7940       }
7941 
7942     if (opcode == OP_UPTO || opcode == OP_CRRANGE)
7943       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
7944 
7945     label = LABEL();
7946     compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
7947     if (opcode == OP_UPTO || opcode == OP_CRRANGE)
7948       {
7949       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
7950       OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
7951       if (opcode == OP_CRRANGE && min > 0)
7952         CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
7953       if (opcode == OP_UPTO || (opcode == OP_CRRANGE && max > 0))
7954         jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
7955       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
7956       }
7957 
7958     /* We cannot use TMP3 because of this allocate_stack. */
7959     allocate_stack(common, 1);
7960     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7961     JUMPTO(SLJIT_JUMP, label);
7962     if (jump != NULL)
7963       JUMPHERE(jump);
7964     }
7965   else
7966     {
7967     if (opcode == OP_PLUS)
7968       compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
7969     if (private_data_ptr == 0)
7970       allocate_stack(common, 2);
7971     OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
7972     if (opcode <= OP_PLUS)
7973       OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
7974     else
7975       OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
7976     label = LABEL();
7977     compile_char1_matchingpath(common, type, cc, &nomatch);
7978     OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
7979     if (opcode <= OP_PLUS)
7980       JUMPTO(SLJIT_JUMP, label);
7981     else if (opcode == OP_CRRANGE && max == 0)
7982       {
7983       OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1);
7984       JUMPTO(SLJIT_JUMP, label);
7985       }
7986     else
7987       {
7988       OP1(SLJIT_MOV, TMP1, 0, base, offset1);
7989       OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
7990       OP1(SLJIT_MOV, base, offset1, TMP1, 0);
7991       CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, max + 1, label);
7992       }
7993     set_jumps(nomatch, LABEL());
7994     if (opcode == OP_CRRANGE)
7995       add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS, base, offset1, SLJIT_IMM, min + 1));
7996     OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
7997     }
7998   BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
7999   break;
8000 
8001   case OP_MINSTAR:
8002   case OP_MINPLUS:
8003   if (opcode == OP_MINPLUS)
8004     compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
8005   if (private_data_ptr == 0)
8006     allocate_stack(common, 1);
8007   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8008   BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
8009   break;
8010 
8011   case OP_MINUPTO:
8012   case OP_CRMINRANGE:
8013   if (private_data_ptr == 0)
8014     allocate_stack(common, 2);
8015   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8016   OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
8017   if (opcode == OP_CRMINRANGE)
8018     add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
8019   BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
8020   break;
8021 
8022   case OP_QUERY:
8023   case OP_MINQUERY:
8024   if (private_data_ptr == 0)
8025     allocate_stack(common, 1);
8026   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8027   if (opcode == OP_QUERY)
8028     compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
8029   BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
8030   break;
8031 
8032   case OP_EXACT:
8033   OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
8034   label = LABEL();
8035   compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
8036   OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
8037   JUMPTO(SLJIT_NOT_ZERO, label);
8038   break;
8039 
8040   case OP_POSSTAR:
8041   case OP_POSPLUS:
8042   case OP_POSUPTO:
8043   if (opcode == OP_POSPLUS)
8044     compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
8045   if (opcode == OP_POSUPTO)
8046     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, max);
8047   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8048   label = LABEL();
8049   compile_char1_matchingpath(common, type, cc, &nomatch);
8050   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8051   if (opcode != OP_POSUPTO)
8052     JUMPTO(SLJIT_JUMP, label);
8053   else
8054     {
8055     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, 1);
8056     JUMPTO(SLJIT_NOT_ZERO, label);
8057     }
8058   set_jumps(nomatch, LABEL());
8059   OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
8060   break;
8061 
8062   case OP_POSQUERY:
8063   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8064   compile_char1_matchingpath(common, type, cc, &nomatch);
8065   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8066   set_jumps(nomatch, LABEL());
8067   OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
8068   break;
8069 
8070   case OP_CRPOSRANGE:
8071   /* Combination of OP_EXACT and OP_POSSTAR or OP_POSUPTO */
8072   OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, min);
8073   label = LABEL();
8074   compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
8075   OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
8076   JUMPTO(SLJIT_NOT_ZERO, label);
8077 
8078   if (max != 0)
8079     {
8080     SLJIT_ASSERT(max - min > 0);
8081     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, max - min);
8082     }
8083   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8084   label = LABEL();
8085   compile_char1_matchingpath(common, type, cc, &nomatch);
8086   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8087   if (max == 0)
8088     JUMPTO(SLJIT_JUMP, label);
8089   else
8090     {
8091     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, 1);
8092     JUMPTO(SLJIT_NOT_ZERO, label);
8093     }
8094   set_jumps(nomatch, LABEL());
8095   OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
8096   break;
8097 
8098   default:
8099   SLJIT_ASSERT_STOP();
8100   break;
8101   }
8102 
8103 count_match(common);
8104 return end;
8105 }
8106 
compile_fail_accept_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)8107 static SLJIT_INLINE pcre_uchar *compile_fail_accept_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
8108 {
8109 DEFINE_COMPILER;
8110 backtrack_common *backtrack;
8111 
8112 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
8113 
8114 if (*cc == OP_FAIL)
8115   {
8116   add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
8117   return cc + 1;
8118   }
8119 
8120 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
8121   {
8122   /* No need to check notempty conditions. */
8123   if (common->accept_label == NULL)
8124     add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
8125   else
8126     JUMPTO(SLJIT_JUMP, common->accept_label);
8127   return cc + 1;
8128   }
8129 
8130 if (common->accept_label == NULL)
8131   add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
8132 else
8133   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
8134 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8135 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
8136 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8137 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
8138 if (common->accept_label == NULL)
8139   add_jump(compiler, &common->accept, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8140 else
8141   CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->accept_label);
8142 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
8143 if (common->accept_label == NULL)
8144   add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
8145 else
8146   CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
8147 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
8148 return cc + 1;
8149 }
8150 
compile_close_matchingpath(compiler_common * common,pcre_uchar * cc)8151 static SLJIT_INLINE pcre_uchar *compile_close_matchingpath(compiler_common *common, pcre_uchar *cc)
8152 {
8153 DEFINE_COMPILER;
8154 int offset = GET2(cc, 1);
8155 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
8156 
8157 /* Data will be discarded anyway... */
8158 if (common->currententry != NULL)
8159   return cc + 1 + IMM2_SIZE;
8160 
8161 if (!optimized_cbracket)
8162   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
8163 offset <<= 1;
8164 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
8165 if (!optimized_cbracket)
8166   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
8167 return cc + 1 + IMM2_SIZE;
8168 }
8169 
compile_control_verb_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)8170 static SLJIT_INLINE pcre_uchar *compile_control_verb_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
8171 {
8172 DEFINE_COMPILER;
8173 backtrack_common *backtrack;
8174 pcre_uchar opcode = *cc;
8175 pcre_uchar *ccend = cc + 1;
8176 
8177 if (opcode == OP_PRUNE_ARG || opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
8178   ccend += 2 + cc[1];
8179 
8180 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
8181 
8182 if (opcode == OP_SKIP)
8183   {
8184   allocate_stack(common, 1);
8185   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8186   return ccend;
8187   }
8188 
8189 if (opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
8190   {
8191   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8192   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
8193   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
8194   OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
8195   }
8196 
8197 return ccend;
8198 }
8199 
8200 static pcre_uchar then_trap_opcode[1] = { OP_THEN_TRAP };
8201 
compile_then_trap_matchingpath(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,backtrack_common * parent)8202 static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
8203 {
8204 DEFINE_COMPILER;
8205 backtrack_common *backtrack;
8206 BOOL needs_control_head;
8207 int size;
8208 
8209 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
8210 common->then_trap = BACKTRACK_AS(then_trap_backtrack);
8211 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
8212 BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
8213 BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
8214 
8215 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
8216 size = 3 + (size < 0 ? 0 : size);
8217 
8218 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8219 allocate_stack(common, size);
8220 if (size > 3)
8221   OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
8222 else
8223   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
8224 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
8225 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
8226 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
8227 
8228 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
8229 if (size >= 0)
8230   init_frame(common, cc, ccend, size - 1, 0, FALSE);
8231 }
8232 
compile_matchingpath(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,backtrack_common * parent)8233 static void compile_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
8234 {
8235 DEFINE_COMPILER;
8236 backtrack_common *backtrack;
8237 BOOL has_then_trap = FALSE;
8238 then_trap_backtrack *save_then_trap = NULL;
8239 
8240 SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
8241 
8242 if (common->has_then && common->then_offsets[cc - common->start] != 0)
8243   {
8244   SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
8245   has_then_trap = TRUE;
8246   save_then_trap = common->then_trap;
8247   /* Tail item on backtrack. */
8248   compile_then_trap_matchingpath(common, cc, ccend, parent);
8249   }
8250 
8251 while (cc < ccend)
8252   {
8253   switch(*cc)
8254     {
8255     case OP_SOD:
8256     case OP_SOM:
8257     case OP_NOT_WORD_BOUNDARY:
8258     case OP_WORD_BOUNDARY:
8259     case OP_NOT_DIGIT:
8260     case OP_DIGIT:
8261     case OP_NOT_WHITESPACE:
8262     case OP_WHITESPACE:
8263     case OP_NOT_WORDCHAR:
8264     case OP_WORDCHAR:
8265     case OP_ANY:
8266     case OP_ALLANY:
8267     case OP_ANYBYTE:
8268     case OP_NOTPROP:
8269     case OP_PROP:
8270     case OP_ANYNL:
8271     case OP_NOT_HSPACE:
8272     case OP_HSPACE:
8273     case OP_NOT_VSPACE:
8274     case OP_VSPACE:
8275     case OP_EXTUNI:
8276     case OP_EODN:
8277     case OP_EOD:
8278     case OP_CIRC:
8279     case OP_CIRCM:
8280     case OP_DOLL:
8281     case OP_DOLLM:
8282     case OP_NOT:
8283     case OP_NOTI:
8284     case OP_REVERSE:
8285     cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8286     break;
8287 
8288     case OP_SET_SOM:
8289     PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
8290     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
8291     allocate_stack(common, 1);
8292     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
8293     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8294     cc++;
8295     break;
8296 
8297     case OP_CHAR:
8298     case OP_CHARI:
8299     if (common->mode == JIT_COMPILE)
8300       cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8301     else
8302       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8303     break;
8304 
8305     case OP_STAR:
8306     case OP_MINSTAR:
8307     case OP_PLUS:
8308     case OP_MINPLUS:
8309     case OP_QUERY:
8310     case OP_MINQUERY:
8311     case OP_UPTO:
8312     case OP_MINUPTO:
8313     case OP_EXACT:
8314     case OP_POSSTAR:
8315     case OP_POSPLUS:
8316     case OP_POSQUERY:
8317     case OP_POSUPTO:
8318     case OP_STARI:
8319     case OP_MINSTARI:
8320     case OP_PLUSI:
8321     case OP_MINPLUSI:
8322     case OP_QUERYI:
8323     case OP_MINQUERYI:
8324     case OP_UPTOI:
8325     case OP_MINUPTOI:
8326     case OP_EXACTI:
8327     case OP_POSSTARI:
8328     case OP_POSPLUSI:
8329     case OP_POSQUERYI:
8330     case OP_POSUPTOI:
8331     case OP_NOTSTAR:
8332     case OP_NOTMINSTAR:
8333     case OP_NOTPLUS:
8334     case OP_NOTMINPLUS:
8335     case OP_NOTQUERY:
8336     case OP_NOTMINQUERY:
8337     case OP_NOTUPTO:
8338     case OP_NOTMINUPTO:
8339     case OP_NOTEXACT:
8340     case OP_NOTPOSSTAR:
8341     case OP_NOTPOSPLUS:
8342     case OP_NOTPOSQUERY:
8343     case OP_NOTPOSUPTO:
8344     case OP_NOTSTARI:
8345     case OP_NOTMINSTARI:
8346     case OP_NOTPLUSI:
8347     case OP_NOTMINPLUSI:
8348     case OP_NOTQUERYI:
8349     case OP_NOTMINQUERYI:
8350     case OP_NOTUPTOI:
8351     case OP_NOTMINUPTOI:
8352     case OP_NOTEXACTI:
8353     case OP_NOTPOSSTARI:
8354     case OP_NOTPOSPLUSI:
8355     case OP_NOTPOSQUERYI:
8356     case OP_NOTPOSUPTOI:
8357     case OP_TYPESTAR:
8358     case OP_TYPEMINSTAR:
8359     case OP_TYPEPLUS:
8360     case OP_TYPEMINPLUS:
8361     case OP_TYPEQUERY:
8362     case OP_TYPEMINQUERY:
8363     case OP_TYPEUPTO:
8364     case OP_TYPEMINUPTO:
8365     case OP_TYPEEXACT:
8366     case OP_TYPEPOSSTAR:
8367     case OP_TYPEPOSPLUS:
8368     case OP_TYPEPOSQUERY:
8369     case OP_TYPEPOSUPTO:
8370     cc = compile_iterator_matchingpath(common, cc, parent);
8371     break;
8372 
8373     case OP_CLASS:
8374     case OP_NCLASS:
8375     if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRPOSRANGE)
8376       cc = compile_iterator_matchingpath(common, cc, parent);
8377     else
8378       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8379     break;
8380 
8381 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
8382     case OP_XCLASS:
8383     if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
8384       cc = compile_iterator_matchingpath(common, cc, parent);
8385     else
8386       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8387     break;
8388 #endif
8389 
8390     case OP_REF:
8391     case OP_REFI:
8392     if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
8393       cc = compile_ref_iterator_matchingpath(common, cc, parent);
8394     else
8395       {
8396       compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
8397       cc += 1 + IMM2_SIZE;
8398       }
8399     break;
8400 
8401     case OP_DNREF:
8402     case OP_DNREFI:
8403     if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
8404       cc = compile_ref_iterator_matchingpath(common, cc, parent);
8405     else
8406       {
8407       compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8408       compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
8409       cc += 1 + 2 * IMM2_SIZE;
8410       }
8411     break;
8412 
8413     case OP_RECURSE:
8414     cc = compile_recurse_matchingpath(common, cc, parent);
8415     break;
8416 
8417     case OP_CALLOUT:
8418     cc = compile_callout_matchingpath(common, cc, parent);
8419     break;
8420 
8421     case OP_ASSERT:
8422     case OP_ASSERT_NOT:
8423     case OP_ASSERTBACK:
8424     case OP_ASSERTBACK_NOT:
8425     PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
8426     cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
8427     break;
8428 
8429     case OP_BRAMINZERO:
8430     PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
8431     cc = bracketend(cc + 1);
8432     if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
8433       {
8434       allocate_stack(common, 1);
8435       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8436       }
8437     else
8438       {
8439       allocate_stack(common, 2);
8440       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8441       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
8442       }
8443     BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
8444     if (cc[1] > OP_ASSERTBACK_NOT)
8445       count_match(common);
8446     break;
8447 
8448     case OP_ONCE:
8449     case OP_ONCE_NC:
8450     case OP_BRA:
8451     case OP_CBRA:
8452     case OP_COND:
8453     case OP_SBRA:
8454     case OP_SCBRA:
8455     case OP_SCOND:
8456     cc = compile_bracket_matchingpath(common, cc, parent);
8457     break;
8458 
8459     case OP_BRAZERO:
8460     if (cc[1] > OP_ASSERTBACK_NOT)
8461       cc = compile_bracket_matchingpath(common, cc, parent);
8462     else
8463       {
8464       PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
8465       cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
8466       }
8467     break;
8468 
8469     case OP_BRAPOS:
8470     case OP_CBRAPOS:
8471     case OP_SBRAPOS:
8472     case OP_SCBRAPOS:
8473     case OP_BRAPOSZERO:
8474     cc = compile_bracketpos_matchingpath(common, cc, parent);
8475     break;
8476 
8477     case OP_MARK:
8478     PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
8479     SLJIT_ASSERT(common->mark_ptr != 0);
8480     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
8481     allocate_stack(common, common->has_skip_arg ? 5 : 1);
8482     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8483     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
8484     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
8485     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
8486     OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
8487     if (common->has_skip_arg)
8488       {
8489       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8490       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
8491       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
8492       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
8493       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
8494       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
8495       }
8496     cc += 1 + 2 + cc[1];
8497     break;
8498 
8499     case OP_PRUNE:
8500     case OP_PRUNE_ARG:
8501     case OP_SKIP:
8502     case OP_SKIP_ARG:
8503     case OP_THEN:
8504     case OP_THEN_ARG:
8505     case OP_COMMIT:
8506     cc = compile_control_verb_matchingpath(common, cc, parent);
8507     break;
8508 
8509     case OP_FAIL:
8510     case OP_ACCEPT:
8511     case OP_ASSERT_ACCEPT:
8512     cc = compile_fail_accept_matchingpath(common, cc, parent);
8513     break;
8514 
8515     case OP_CLOSE:
8516     cc = compile_close_matchingpath(common, cc);
8517     break;
8518 
8519     case OP_SKIPZERO:
8520     cc = bracketend(cc + 1);
8521     break;
8522 
8523     default:
8524     SLJIT_ASSERT_STOP();
8525     return;
8526     }
8527   if (cc == NULL)
8528     return;
8529   }
8530 
8531 if (has_then_trap)
8532   {
8533   /* Head item on backtrack. */
8534   PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
8535   BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
8536   BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
8537   common->then_trap = save_then_trap;
8538   }
8539 SLJIT_ASSERT(cc == ccend);
8540 }
8541 
8542 #undef PUSH_BACKTRACK
8543 #undef PUSH_BACKTRACK_NOVALUE
8544 #undef BACKTRACK_AS
8545 
8546 #define COMPILE_BACKTRACKINGPATH(current) \
8547   do \
8548     { \
8549     compile_backtrackingpath(common, (current)); \
8550     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
8551       return; \
8552     } \
8553   while (0)
8554 
8555 #define CURRENT_AS(type) ((type *)current)
8556 
compile_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)8557 static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8558 {
8559 DEFINE_COMPILER;
8560 pcre_uchar *cc = current->cc;
8561 pcre_uchar opcode;
8562 pcre_uchar type;
8563 int max = -1, min = -1;
8564 struct sljit_label *label = NULL;
8565 struct sljit_jump *jump = NULL;
8566 jump_list *jumplist = NULL;
8567 int private_data_ptr = PRIVATE_DATA(cc);
8568 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
8569 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
8570 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
8571 
8572 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &min, NULL);
8573 
8574 switch(opcode)
8575   {
8576   case OP_STAR:
8577   case OP_PLUS:
8578   case OP_UPTO:
8579   case OP_CRRANGE:
8580   if (type == OP_ANYNL || type == OP_EXTUNI)
8581     {
8582     SLJIT_ASSERT(private_data_ptr == 0);
8583     set_jumps(current->topbacktracks, LABEL());
8584     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8585     free_stack(common, 1);
8586     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
8587     }
8588   else
8589     {
8590     if (opcode == OP_UPTO)
8591       min = 0;
8592     if (opcode <= OP_PLUS)
8593       {
8594       OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8595       jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
8596       }
8597     else
8598       {
8599       OP1(SLJIT_MOV, TMP1, 0, base, offset1);
8600       OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8601       jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, min + 1);
8602       OP2(SLJIT_SUB, base, offset1, TMP1, 0, SLJIT_IMM, 1);
8603       }
8604     skip_char_back(common);
8605     OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8606     JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
8607     if (opcode == OP_CRRANGE)
8608       set_jumps(current->topbacktracks, LABEL());
8609     JUMPHERE(jump);
8610     if (private_data_ptr == 0)
8611       free_stack(common, 2);
8612     if (opcode == OP_PLUS)
8613       set_jumps(current->topbacktracks, LABEL());
8614     }
8615   break;
8616 
8617   case OP_MINSTAR:
8618   case OP_MINPLUS:
8619   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8620   compile_char1_matchingpath(common, type, cc, &jumplist);
8621   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8622   JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
8623   set_jumps(jumplist, LABEL());
8624   if (private_data_ptr == 0)
8625     free_stack(common, 1);
8626   if (opcode == OP_MINPLUS)
8627     set_jumps(current->topbacktracks, LABEL());
8628   break;
8629 
8630   case OP_MINUPTO:
8631   case OP_CRMINRANGE:
8632   if (opcode == OP_CRMINRANGE)
8633     {
8634     label = LABEL();
8635     set_jumps(current->topbacktracks, label);
8636     }
8637   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8638   compile_char1_matchingpath(common, type, cc, &jumplist);
8639 
8640   OP1(SLJIT_MOV, TMP1, 0, base, offset1);
8641   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8642   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8643   OP1(SLJIT_MOV, base, offset1, TMP1, 0);
8644 
8645   if (opcode == OP_CRMINRANGE)
8646     CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min + 1, label);
8647 
8648   if (opcode == OP_CRMINRANGE && max == 0)
8649     JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
8650   else
8651     CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, max + 2, CURRENT_AS(iterator_backtrack)->matchingpath);
8652 
8653   set_jumps(jumplist, LABEL());
8654   if (private_data_ptr == 0)
8655     free_stack(common, 2);
8656   break;
8657 
8658   case OP_QUERY:
8659   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8660   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
8661   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
8662   jump = JUMP(SLJIT_JUMP);
8663   set_jumps(current->topbacktracks, LABEL());
8664   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8665   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
8666   JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
8667   JUMPHERE(jump);
8668   if (private_data_ptr == 0)
8669     free_stack(common, 1);
8670   break;
8671 
8672   case OP_MINQUERY:
8673   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8674   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
8675   jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
8676   compile_char1_matchingpath(common, type, cc, &jumplist);
8677   JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
8678   set_jumps(jumplist, LABEL());
8679   JUMPHERE(jump);
8680   if (private_data_ptr == 0)
8681     free_stack(common, 1);
8682   break;
8683 
8684   case OP_EXACT:
8685   case OP_POSPLUS:
8686   case OP_CRPOSRANGE:
8687   set_jumps(current->topbacktracks, LABEL());
8688   break;
8689 
8690   case OP_POSSTAR:
8691   case OP_POSQUERY:
8692   case OP_POSUPTO:
8693   break;
8694 
8695   default:
8696   SLJIT_ASSERT_STOP();
8697   break;
8698   }
8699 }
8700 
compile_ref_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)8701 static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8702 {
8703 DEFINE_COMPILER;
8704 pcre_uchar *cc = current->cc;
8705 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
8706 pcre_uchar type;
8707 
8708 type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
8709 
8710 if ((type & 0x1) == 0)
8711   {
8712   /* Maximize case. */
8713   set_jumps(current->topbacktracks, LABEL());
8714   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8715   free_stack(common, 1);
8716   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
8717   return;
8718   }
8719 
8720 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8721 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
8722 set_jumps(current->topbacktracks, LABEL());
8723 free_stack(common, ref ? 2 : 3);
8724 }
8725 
compile_recurse_backtrackingpath(compiler_common * common,struct backtrack_common * current)8726 static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8727 {
8728 DEFINE_COMPILER;
8729 
8730 if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
8731   compile_backtrackingpath(common, current->top);
8732 set_jumps(current->topbacktracks, LABEL());
8733 if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
8734   return;
8735 
8736 if (common->has_set_som && common->mark_ptr != 0)
8737   {
8738   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8739   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8740   free_stack(common, 2);
8741   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP2, 0);
8742   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
8743   }
8744 else if (common->has_set_som || common->mark_ptr != 0)
8745   {
8746   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8747   free_stack(common, 1);
8748   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr, TMP2, 0);
8749   }
8750 }
8751 
compile_assert_backtrackingpath(compiler_common * common,struct backtrack_common * current)8752 static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8753 {
8754 DEFINE_COMPILER;
8755 pcre_uchar *cc = current->cc;
8756 pcre_uchar bra = OP_BRA;
8757 struct sljit_jump *brajump = NULL;
8758 
8759 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
8760 if (*cc == OP_BRAZERO)
8761   {
8762   bra = *cc;
8763   cc++;
8764   }
8765 
8766 if (bra == OP_BRAZERO)
8767   {
8768   SLJIT_ASSERT(current->topbacktracks == NULL);
8769   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8770   }
8771 
8772 if (CURRENT_AS(assert_backtrack)->framesize < 0)
8773   {
8774   set_jumps(current->topbacktracks, LABEL());
8775 
8776   if (bra == OP_BRAZERO)
8777     {
8778     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8779     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
8780     free_stack(common, 1);
8781     }
8782   return;
8783   }
8784 
8785 if (bra == OP_BRAZERO)
8786   {
8787   if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
8788     {
8789     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8790     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
8791     free_stack(common, 1);
8792     return;
8793     }
8794   free_stack(common, 1);
8795   brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
8796   }
8797 
8798 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
8799   {
8800   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
8801   add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
8802   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_backtrack)->framesize * sizeof(sljit_sw));
8803 
8804   set_jumps(current->topbacktracks, LABEL());
8805   }
8806 else
8807   set_jumps(current->topbacktracks, LABEL());
8808 
8809 if (bra == OP_BRAZERO)
8810   {
8811   /* We know there is enough place on the stack. */
8812   OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
8813   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8814   JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
8815   JUMPHERE(brajump);
8816   }
8817 }
8818 
compile_bracket_backtrackingpath(compiler_common * common,struct backtrack_common * current)8819 static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8820 {
8821 DEFINE_COMPILER;
8822 int opcode, stacksize, alt_count, alt_max;
8823 int offset = 0;
8824 int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
8825 int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
8826 pcre_uchar *cc = current->cc;
8827 pcre_uchar *ccbegin;
8828 pcre_uchar *ccprev;
8829 pcre_uchar bra = OP_BRA;
8830 pcre_uchar ket;
8831 assert_backtrack *assert;
8832 sljit_uw *next_update_addr = NULL;
8833 BOOL has_alternatives;
8834 BOOL needs_control_head = FALSE;
8835 struct sljit_jump *brazero = NULL;
8836 struct sljit_jump *alt1 = NULL;
8837 struct sljit_jump *alt2 = NULL;
8838 struct sljit_jump *once = NULL;
8839 struct sljit_jump *cond = NULL;
8840 struct sljit_label *rmin_label = NULL;
8841 struct sljit_label *exact_label = NULL;
8842 
8843 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
8844   {
8845   bra = *cc;
8846   cc++;
8847   }
8848 
8849 opcode = *cc;
8850 ccbegin = bracketend(cc) - 1 - LINK_SIZE;
8851 ket = *ccbegin;
8852 if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
8853   {
8854   repeat_ptr = PRIVATE_DATA(ccbegin);
8855   repeat_type = PRIVATE_DATA(ccbegin + 2);
8856   repeat_count = PRIVATE_DATA(ccbegin + 3);
8857   SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
8858   if (repeat_type == OP_UPTO)
8859     ket = OP_KETRMAX;
8860   if (repeat_type == OP_MINUPTO)
8861     ket = OP_KETRMIN;
8862   }
8863 ccbegin = cc;
8864 cc += GET(cc, 1);
8865 has_alternatives = *cc == OP_ALT;
8866 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
8867   has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
8868 if (opcode == OP_CBRA || opcode == OP_SCBRA)
8869   offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
8870 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
8871   opcode = OP_SCOND;
8872 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
8873   opcode = OP_ONCE;
8874 
8875 alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
8876 
8877 /* Decoding the needs_control_head in framesize. */
8878 if (opcode == OP_ONCE)
8879   {
8880   needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
8881   CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
8882   }
8883 
8884 if (ket != OP_KET && repeat_type != 0)
8885   {
8886   /* TMP1 is used in OP_KETRMIN below. */
8887   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8888   free_stack(common, 1);
8889   if (repeat_type == OP_UPTO)
8890     OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
8891   else
8892     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
8893   }
8894 
8895 if (ket == OP_KETRMAX)
8896   {
8897   if (bra == OP_BRAZERO)
8898     {
8899     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8900     free_stack(common, 1);
8901     brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
8902     }
8903   }
8904 else if (ket == OP_KETRMIN)
8905   {
8906   if (bra != OP_BRAMINZERO)
8907     {
8908     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8909     if (repeat_type != 0)
8910       {
8911       /* TMP1 was set a few lines above. */
8912       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
8913       /* Drop STR_PTR for non-greedy plus quantifier. */
8914       if (opcode != OP_ONCE)
8915         free_stack(common, 1);
8916       }
8917     else if (opcode >= OP_SBRA || opcode == OP_ONCE)
8918       {
8919       /* Checking zero-length iteration. */
8920       if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
8921         CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
8922       else
8923         {
8924         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8925         CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (CURRENT_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
8926         }
8927       /* Drop STR_PTR for non-greedy plus quantifier. */
8928       if (opcode != OP_ONCE)
8929         free_stack(common, 1);
8930       }
8931     else
8932       JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
8933     }
8934   rmin_label = LABEL();
8935   if (repeat_type != 0)
8936     OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
8937   }
8938 else if (bra == OP_BRAZERO)
8939   {
8940   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8941   free_stack(common, 1);
8942   brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
8943   }
8944 else if (repeat_type == OP_EXACT)
8945   {
8946   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
8947   exact_label = LABEL();
8948   }
8949 
8950 if (offset != 0)
8951   {
8952   if (common->capture_last_ptr != 0)
8953     {
8954     SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
8955     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8956     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8957     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
8958     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
8959     free_stack(common, 3);
8960     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
8961     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
8962     }
8963   else if (common->optimized_cbracket[offset >> 1] == 0)
8964     {
8965     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8966     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8967     free_stack(common, 2);
8968     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
8969     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
8970     }
8971   }
8972 
8973 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
8974   {
8975   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
8976     {
8977     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8978     add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
8979     }
8980   once = JUMP(SLJIT_JUMP);
8981   }
8982 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
8983   {
8984   if (has_alternatives)
8985     {
8986     /* Always exactly one alternative. */
8987     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8988     free_stack(common, 1);
8989 
8990     alt_max = 2;
8991     alt1 = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
8992     }
8993   }
8994 else if (has_alternatives)
8995   {
8996   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8997   free_stack(common, 1);
8998 
8999   if (alt_max > 4)
9000     {
9001     /* Table jump if alt_max is greater than 4. */
9002     next_update_addr = allocate_read_only_data(common, alt_max * sizeof(sljit_uw));
9003     if (SLJIT_UNLIKELY(next_update_addr == NULL))
9004       return;
9005     sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)next_update_addr);
9006     add_label_addr(common, next_update_addr++);
9007     }
9008   else
9009     {
9010     if (alt_max == 4)
9011       alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
9012     alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
9013     }
9014   }
9015 
9016 COMPILE_BACKTRACKINGPATH(current->top);
9017 if (current->topbacktracks)
9018   set_jumps(current->topbacktracks, LABEL());
9019 
9020 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
9021   {
9022   /* Conditional block always has at most one alternative. */
9023   if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
9024     {
9025     SLJIT_ASSERT(has_alternatives);
9026     assert = CURRENT_AS(bracket_backtrack)->u.assert;
9027     if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
9028       {
9029       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
9030       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9031       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw));
9032       }
9033     cond = JUMP(SLJIT_JUMP);
9034     set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
9035     }
9036   else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
9037     {
9038     SLJIT_ASSERT(has_alternatives);
9039     cond = JUMP(SLJIT_JUMP);
9040     set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
9041     }
9042   else
9043     SLJIT_ASSERT(!has_alternatives);
9044   }
9045 
9046 if (has_alternatives)
9047   {
9048   alt_count = sizeof(sljit_uw);
9049   do
9050     {
9051     current->top = NULL;
9052     current->topbacktracks = NULL;
9053     current->nextbacktracks = NULL;
9054     /* Conditional blocks always have an additional alternative, even if it is empty. */
9055     if (*cc == OP_ALT)
9056       {
9057       ccprev = cc + 1 + LINK_SIZE;
9058       cc += GET(cc, 1);
9059       if (opcode != OP_COND && opcode != OP_SCOND)
9060         {
9061         if (opcode != OP_ONCE)
9062           {
9063           if (private_data_ptr != 0)
9064             OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9065           else
9066             OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9067           }
9068         else
9069           OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
9070         }
9071       compile_matchingpath(common, ccprev, cc, current);
9072       if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9073         return;
9074       }
9075 
9076     /* Instructions after the current alternative is successfully matched. */
9077     /* There is a similar code in compile_bracket_matchingpath. */
9078     if (opcode == OP_ONCE)
9079       match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
9080 
9081     stacksize = 0;
9082     if (repeat_type == OP_MINUPTO)
9083       {
9084       /* We need to preserve the counter. TMP2 will be used below. */
9085       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
9086       stacksize++;
9087       }
9088     if (ket != OP_KET || bra != OP_BRA)
9089       stacksize++;
9090     if (offset != 0)
9091       {
9092       if (common->capture_last_ptr != 0)
9093         stacksize++;
9094       if (common->optimized_cbracket[offset >> 1] == 0)
9095         stacksize += 2;
9096       }
9097     if (opcode != OP_ONCE)
9098       stacksize++;
9099 
9100     if (stacksize > 0)
9101       allocate_stack(common, stacksize);
9102 
9103     stacksize = 0;
9104     if (repeat_type == OP_MINUPTO)
9105       {
9106       /* TMP2 was set above. */
9107       OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
9108       stacksize++;
9109       }
9110 
9111     if (ket != OP_KET || bra != OP_BRA)
9112       {
9113       if (ket != OP_KET)
9114         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
9115       else
9116         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
9117       stacksize++;
9118       }
9119 
9120     if (offset != 0)
9121       stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
9122 
9123     if (opcode != OP_ONCE)
9124       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
9125 
9126     if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
9127       {
9128       /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
9129       SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
9130       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
9131       }
9132 
9133     JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
9134 
9135     if (opcode != OP_ONCE)
9136       {
9137       if (alt_max > 4)
9138         add_label_addr(common, next_update_addr++);
9139       else
9140         {
9141         if (alt_count != 2 * sizeof(sljit_uw))
9142           {
9143           JUMPHERE(alt1);
9144           if (alt_max == 3 && alt_count == sizeof(sljit_uw))
9145             alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
9146           }
9147         else
9148           {
9149           JUMPHERE(alt2);
9150           if (alt_max == 4)
9151             alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw));
9152           }
9153         }
9154       alt_count += sizeof(sljit_uw);
9155       }
9156 
9157     COMPILE_BACKTRACKINGPATH(current->top);
9158     if (current->topbacktracks)
9159       set_jumps(current->topbacktracks, LABEL());
9160     SLJIT_ASSERT(!current->nextbacktracks);
9161     }
9162   while (*cc == OP_ALT);
9163 
9164   if (cond != NULL)
9165     {
9166     SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
9167     assert = CURRENT_AS(bracket_backtrack)->u.assert;
9168     if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
9169       {
9170       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
9171       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9172       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw));
9173       }
9174     JUMPHERE(cond);
9175     }
9176 
9177   /* Free the STR_PTR. */
9178   if (private_data_ptr == 0)
9179     free_stack(common, 1);
9180   }
9181 
9182 if (offset != 0)
9183   {
9184   /* Using both tmp register is better for instruction scheduling. */
9185   if (common->optimized_cbracket[offset >> 1] != 0)
9186     {
9187     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9188     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9189     free_stack(common, 2);
9190     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
9191     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
9192     }
9193   else
9194     {
9195     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9196     free_stack(common, 1);
9197     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9198     }
9199   }
9200 else if (opcode == OP_SBRA || opcode == OP_SCOND)
9201   {
9202   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
9203   free_stack(common, 1);
9204   }
9205 else if (opcode == OP_ONCE)
9206   {
9207   cc = ccbegin + GET(ccbegin, 1);
9208   stacksize = needs_control_head ? 1 : 0;
9209 
9210   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
9211     {
9212     /* Reset head and drop saved frame. */
9213     stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
9214     }
9215   else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
9216     {
9217     /* The STR_PTR must be released. */
9218     stacksize++;
9219     }
9220   free_stack(common, stacksize);
9221 
9222   JUMPHERE(once);
9223   /* Restore previous private_data_ptr */
9224   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
9225     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracket_backtrack)->u.framesize * sizeof(sljit_sw));
9226   else if (ket == OP_KETRMIN)
9227     {
9228     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9229     /* See the comment below. */
9230     free_stack(common, 2);
9231     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9232     }
9233   }
9234 
9235 if (repeat_type == OP_EXACT)
9236   {
9237   OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
9238   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
9239   CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
9240   }
9241 else if (ket == OP_KETRMAX)
9242   {
9243   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9244   if (bra != OP_BRAZERO)
9245     free_stack(common, 1);
9246 
9247   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
9248   if (bra == OP_BRAZERO)
9249     {
9250     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9251     JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
9252     JUMPHERE(brazero);
9253     free_stack(common, 1);
9254     }
9255   }
9256 else if (ket == OP_KETRMIN)
9257   {
9258   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9259 
9260   /* OP_ONCE removes everything in case of a backtrack, so we don't
9261   need to explicitly release the STR_PTR. The extra release would
9262   affect badly the free_stack(2) above. */
9263   if (opcode != OP_ONCE)
9264     free_stack(common, 1);
9265   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
9266   if (opcode == OP_ONCE)
9267     free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
9268   else if (bra == OP_BRAMINZERO)
9269     free_stack(common, 1);
9270   }
9271 else if (bra == OP_BRAZERO)
9272   {
9273   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9274   JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
9275   JUMPHERE(brazero);
9276   }
9277 }
9278 
compile_bracketpos_backtrackingpath(compiler_common * common,struct backtrack_common * current)9279 static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9280 {
9281 DEFINE_COMPILER;
9282 int offset;
9283 struct sljit_jump *jump;
9284 
9285 if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
9286   {
9287   if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
9288     {
9289     offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
9290     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9291     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9292     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
9293     if (common->capture_last_ptr != 0)
9294       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
9295     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
9296     if (common->capture_last_ptr != 0)
9297       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
9298     }
9299   set_jumps(current->topbacktracks, LABEL());
9300   free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
9301   return;
9302   }
9303 
9304 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
9305 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9306 
9307 if (current->topbacktracks)
9308   {
9309   jump = JUMP(SLJIT_JUMP);
9310   set_jumps(current->topbacktracks, LABEL());
9311   /* Drop the stack frame. */
9312   free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
9313   JUMPHERE(jump);
9314   }
9315 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracketpos_backtrack)->framesize * sizeof(sljit_sw));
9316 }
9317 
compile_braminzero_backtrackingpath(compiler_common * common,struct backtrack_common * current)9318 static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9319 {
9320 assert_backtrack backtrack;
9321 
9322 current->top = NULL;
9323 current->topbacktracks = NULL;
9324 current->nextbacktracks = NULL;
9325 if (current->cc[1] > OP_ASSERTBACK_NOT)
9326   {
9327   /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
9328   compile_bracket_matchingpath(common, current->cc, current);
9329   compile_bracket_backtrackingpath(common, current->top);
9330   }
9331 else
9332   {
9333   memset(&backtrack, 0, sizeof(backtrack));
9334   backtrack.common.cc = current->cc;
9335   backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
9336   /* Manual call of compile_assert_matchingpath. */
9337   compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
9338   }
9339 SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
9340 }
9341 
compile_control_verb_backtrackingpath(compiler_common * common,struct backtrack_common * current)9342 static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9343 {
9344 DEFINE_COMPILER;
9345 pcre_uchar opcode = *current->cc;
9346 struct sljit_label *loop;
9347 struct sljit_jump *jump;
9348 
9349 if (opcode == OP_THEN || opcode == OP_THEN_ARG)
9350   {
9351   if (common->then_trap != NULL)
9352     {
9353     SLJIT_ASSERT(common->control_head_ptr != 0);
9354 
9355     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9356     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
9357     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
9358     jump = JUMP(SLJIT_JUMP);
9359 
9360     loop = LABEL();
9361     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), -(int)sizeof(sljit_sw));
9362     JUMPHERE(jump);
9363     CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(2 * sizeof(sljit_sw)), TMP1, 0, loop);
9364     CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(3 * sizeof(sljit_sw)), TMP2, 0, loop);
9365     add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
9366     return;
9367     }
9368   else if (common->positive_assert)
9369     {
9370     add_jump(compiler, &common->positive_assert_quit, JUMP(SLJIT_JUMP));
9371     return;
9372     }
9373   }
9374 
9375 if (common->local_exit)
9376   {
9377   if (common->quit_label == NULL)
9378     add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
9379   else
9380     JUMPTO(SLJIT_JUMP, common->quit_label);
9381   return;
9382   }
9383 
9384 if (opcode == OP_SKIP_ARG)
9385   {
9386   SLJIT_ASSERT(common->control_head_ptr != 0);
9387   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9388   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
9389   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
9390   sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark));
9391   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
9392 
9393   OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
9394   add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, -1));
9395   return;
9396   }
9397 
9398 if (opcode == OP_SKIP)
9399   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9400 else
9401   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
9402 add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
9403 }
9404 
compile_then_trap_backtrackingpath(compiler_common * common,struct backtrack_common * current)9405 static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9406 {
9407 DEFINE_COMPILER;
9408 struct sljit_jump *jump;
9409 int size;
9410 
9411 if (CURRENT_AS(then_trap_backtrack)->then_trap)
9412   {
9413   common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
9414   return;
9415   }
9416 
9417 size = CURRENT_AS(then_trap_backtrack)->framesize;
9418 size = 3 + (size < 0 ? 0 : size);
9419 
9420 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
9421 free_stack(common, size);
9422 jump = JUMP(SLJIT_JUMP);
9423 
9424 set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
9425 /* STACK_TOP is set by THEN. */
9426 if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
9427   add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9428 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9429 free_stack(common, 3);
9430 
9431 JUMPHERE(jump);
9432 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
9433 }
9434 
compile_backtrackingpath(compiler_common * common,struct backtrack_common * current)9435 static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9436 {
9437 DEFINE_COMPILER;
9438 then_trap_backtrack *save_then_trap = common->then_trap;
9439 
9440 while (current)
9441   {
9442   if (current->nextbacktracks != NULL)
9443     set_jumps(current->nextbacktracks, LABEL());
9444   switch(*current->cc)
9445     {
9446     case OP_SET_SOM:
9447     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9448     free_stack(common, 1);
9449     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
9450     break;
9451 
9452     case OP_STAR:
9453     case OP_MINSTAR:
9454     case OP_PLUS:
9455     case OP_MINPLUS:
9456     case OP_QUERY:
9457     case OP_MINQUERY:
9458     case OP_UPTO:
9459     case OP_MINUPTO:
9460     case OP_EXACT:
9461     case OP_POSSTAR:
9462     case OP_POSPLUS:
9463     case OP_POSQUERY:
9464     case OP_POSUPTO:
9465     case OP_STARI:
9466     case OP_MINSTARI:
9467     case OP_PLUSI:
9468     case OP_MINPLUSI:
9469     case OP_QUERYI:
9470     case OP_MINQUERYI:
9471     case OP_UPTOI:
9472     case OP_MINUPTOI:
9473     case OP_EXACTI:
9474     case OP_POSSTARI:
9475     case OP_POSPLUSI:
9476     case OP_POSQUERYI:
9477     case OP_POSUPTOI:
9478     case OP_NOTSTAR:
9479     case OP_NOTMINSTAR:
9480     case OP_NOTPLUS:
9481     case OP_NOTMINPLUS:
9482     case OP_NOTQUERY:
9483     case OP_NOTMINQUERY:
9484     case OP_NOTUPTO:
9485     case OP_NOTMINUPTO:
9486     case OP_NOTEXACT:
9487     case OP_NOTPOSSTAR:
9488     case OP_NOTPOSPLUS:
9489     case OP_NOTPOSQUERY:
9490     case OP_NOTPOSUPTO:
9491     case OP_NOTSTARI:
9492     case OP_NOTMINSTARI:
9493     case OP_NOTPLUSI:
9494     case OP_NOTMINPLUSI:
9495     case OP_NOTQUERYI:
9496     case OP_NOTMINQUERYI:
9497     case OP_NOTUPTOI:
9498     case OP_NOTMINUPTOI:
9499     case OP_NOTEXACTI:
9500     case OP_NOTPOSSTARI:
9501     case OP_NOTPOSPLUSI:
9502     case OP_NOTPOSQUERYI:
9503     case OP_NOTPOSUPTOI:
9504     case OP_TYPESTAR:
9505     case OP_TYPEMINSTAR:
9506     case OP_TYPEPLUS:
9507     case OP_TYPEMINPLUS:
9508     case OP_TYPEQUERY:
9509     case OP_TYPEMINQUERY:
9510     case OP_TYPEUPTO:
9511     case OP_TYPEMINUPTO:
9512     case OP_TYPEEXACT:
9513     case OP_TYPEPOSSTAR:
9514     case OP_TYPEPOSPLUS:
9515     case OP_TYPEPOSQUERY:
9516     case OP_TYPEPOSUPTO:
9517     case OP_CLASS:
9518     case OP_NCLASS:
9519 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
9520     case OP_XCLASS:
9521 #endif
9522     compile_iterator_backtrackingpath(common, current);
9523     break;
9524 
9525     case OP_REF:
9526     case OP_REFI:
9527     case OP_DNREF:
9528     case OP_DNREFI:
9529     compile_ref_iterator_backtrackingpath(common, current);
9530     break;
9531 
9532     case OP_RECURSE:
9533     compile_recurse_backtrackingpath(common, current);
9534     break;
9535 
9536     case OP_ASSERT:
9537     case OP_ASSERT_NOT:
9538     case OP_ASSERTBACK:
9539     case OP_ASSERTBACK_NOT:
9540     compile_assert_backtrackingpath(common, current);
9541     break;
9542 
9543     case OP_ONCE:
9544     case OP_ONCE_NC:
9545     case OP_BRA:
9546     case OP_CBRA:
9547     case OP_COND:
9548     case OP_SBRA:
9549     case OP_SCBRA:
9550     case OP_SCOND:
9551     compile_bracket_backtrackingpath(common, current);
9552     break;
9553 
9554     case OP_BRAZERO:
9555     if (current->cc[1] > OP_ASSERTBACK_NOT)
9556       compile_bracket_backtrackingpath(common, current);
9557     else
9558       compile_assert_backtrackingpath(common, current);
9559     break;
9560 
9561     case OP_BRAPOS:
9562     case OP_CBRAPOS:
9563     case OP_SBRAPOS:
9564     case OP_SCBRAPOS:
9565     case OP_BRAPOSZERO:
9566     compile_bracketpos_backtrackingpath(common, current);
9567     break;
9568 
9569     case OP_BRAMINZERO:
9570     compile_braminzero_backtrackingpath(common, current);
9571     break;
9572 
9573     case OP_MARK:
9574     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
9575     if (common->has_skip_arg)
9576       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9577     free_stack(common, common->has_skip_arg ? 5 : 1);
9578     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
9579     if (common->has_skip_arg)
9580       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
9581     break;
9582 
9583     case OP_THEN:
9584     case OP_THEN_ARG:
9585     case OP_PRUNE:
9586     case OP_PRUNE_ARG:
9587     case OP_SKIP:
9588     case OP_SKIP_ARG:
9589     compile_control_verb_backtrackingpath(common, current);
9590     break;
9591 
9592     case OP_COMMIT:
9593     if (!common->local_exit)
9594       OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
9595     if (common->quit_label == NULL)
9596       add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
9597     else
9598       JUMPTO(SLJIT_JUMP, common->quit_label);
9599     break;
9600 
9601     case OP_CALLOUT:
9602     case OP_FAIL:
9603     case OP_ACCEPT:
9604     case OP_ASSERT_ACCEPT:
9605     set_jumps(current->topbacktracks, LABEL());
9606     break;
9607 
9608     case OP_THEN_TRAP:
9609     /* A virtual opcode for then traps. */
9610     compile_then_trap_backtrackingpath(common, current);
9611     break;
9612 
9613     default:
9614     SLJIT_ASSERT_STOP();
9615     break;
9616     }
9617   current = current->prev;
9618   }
9619 common->then_trap = save_then_trap;
9620 }
9621 
compile_recurse(compiler_common * common)9622 static SLJIT_INLINE void compile_recurse(compiler_common *common)
9623 {
9624 DEFINE_COMPILER;
9625 pcre_uchar *cc = common->start + common->currententry->start;
9626 pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
9627 pcre_uchar *ccend = bracketend(cc);
9628 BOOL needs_control_head;
9629 int framesize = get_framesize(common, cc, NULL, TRUE, &needs_control_head);
9630 int private_data_size = get_private_data_copy_length(common, ccbegin, ccend, needs_control_head);
9631 int alternativesize;
9632 BOOL needs_frame;
9633 backtrack_common altbacktrack;
9634 struct sljit_jump *jump;
9635 
9636 /* Recurse captures then. */
9637 common->then_trap = NULL;
9638 
9639 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
9640 needs_frame = framesize >= 0;
9641 if (!needs_frame)
9642   framesize = 0;
9643 alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0;
9644 
9645 SLJIT_ASSERT(common->currententry->entry == NULL && common->recursive_head_ptr != 0);
9646 common->currententry->entry = LABEL();
9647 set_jumps(common->currententry->calls, common->currententry->entry);
9648 
9649 sljit_emit_fast_enter(compiler, TMP2, 0);
9650 allocate_stack(common, private_data_size + framesize + alternativesize);
9651 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(private_data_size + framesize + alternativesize - 1), TMP2, 0);
9652 copy_private_data(common, ccbegin, ccend, TRUE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head);
9653 if (needs_control_head)
9654   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9655 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
9656 if (needs_frame)
9657   init_frame(common, cc, NULL, framesize + alternativesize - 1, alternativesize, TRUE);
9658 
9659 if (alternativesize > 0)
9660   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9661 
9662 memset(&altbacktrack, 0, sizeof(backtrack_common));
9663 common->quit_label = NULL;
9664 common->accept_label = NULL;
9665 common->quit = NULL;
9666 common->accept = NULL;
9667 altbacktrack.cc = ccbegin;
9668 cc += GET(cc, 1);
9669 while (1)
9670   {
9671   altbacktrack.top = NULL;
9672   altbacktrack.topbacktracks = NULL;
9673 
9674   if (altbacktrack.cc != ccbegin)
9675     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9676 
9677   compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
9678   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9679     return;
9680 
9681   add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
9682 
9683   compile_backtrackingpath(common, altbacktrack.top);
9684   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9685     return;
9686   set_jumps(altbacktrack.topbacktracks, LABEL());
9687 
9688   if (*cc != OP_ALT)
9689     break;
9690 
9691   altbacktrack.cc = cc + 1 + LINK_SIZE;
9692   cc += GET(cc, 1);
9693   }
9694 
9695 /* None of them matched. */
9696 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
9697 jump = JUMP(SLJIT_JUMP);
9698 
9699 if (common->quit != NULL)
9700   {
9701   set_jumps(common->quit, LABEL());
9702   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
9703   if (needs_frame)
9704     {
9705     OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
9706     add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9707     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
9708     }
9709   OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
9710   common->quit = NULL;
9711   add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
9712   }
9713 
9714 set_jumps(common->accept, LABEL());
9715 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
9716 if (needs_frame)
9717   {
9718   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
9719   add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9720   OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
9721   }
9722 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
9723 
9724 JUMPHERE(jump);
9725 if (common->quit != NULL)
9726   set_jumps(common->quit, LABEL());
9727 copy_private_data(common, ccbegin, ccend, FALSE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head);
9728 free_stack(common, private_data_size + framesize + alternativesize);
9729 if (needs_control_head)
9730   {
9731   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 2 * sizeof(sljit_sw));
9732   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw));
9733   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP1, 0);
9734   OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
9735   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
9736   }
9737 else
9738   {
9739   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw));
9740   OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
9741   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP2, 0);
9742   }
9743 sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), 0);
9744 }
9745 
9746 #undef COMPILE_BACKTRACKINGPATH
9747 #undef CURRENT_AS
9748 
9749 void
PRIV(jit_compile)9750 PRIV(jit_compile)(const REAL_PCRE *re, PUBL(extra) *extra, int mode)
9751 {
9752 struct sljit_compiler *compiler;
9753 backtrack_common rootbacktrack;
9754 compiler_common common_data;
9755 compiler_common *common = &common_data;
9756 const pcre_uint8 *tables = re->tables;
9757 pcre_study_data *study;
9758 int private_data_size;
9759 pcre_uchar *ccend;
9760 executable_functions *functions;
9761 void *executable_func;
9762 sljit_uw executable_size;
9763 sljit_uw total_length;
9764 label_addr_list *label_addr;
9765 struct sljit_label *mainloop_label = NULL;
9766 struct sljit_label *continue_match_label;
9767 struct sljit_label *empty_match_found_label = NULL;
9768 struct sljit_label *empty_match_backtrack_label = NULL;
9769 struct sljit_label *reset_match_label;
9770 struct sljit_label *quit_label;
9771 struct sljit_jump *jump;
9772 struct sljit_jump *minlength_check_failed = NULL;
9773 struct sljit_jump *reqbyte_notfound = NULL;
9774 struct sljit_jump *empty_match = NULL;
9775 
9776 SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0);
9777 study = extra->study_data;
9778 
9779 if (!tables)
9780   tables = PRIV(default_tables);
9781 
9782 memset(&rootbacktrack, 0, sizeof(backtrack_common));
9783 memset(common, 0, sizeof(compiler_common));
9784 rootbacktrack.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size;
9785 
9786 common->start = rootbacktrack.cc;
9787 common->read_only_data_head = NULL;
9788 common->fcc = tables + fcc_offset;
9789 common->lcc = (sljit_sw)(tables + lcc_offset);
9790 common->mode = mode;
9791 common->might_be_empty = study->minlength == 0;
9792 common->nltype = NLTYPE_FIXED;
9793 switch(re->options & PCRE_NEWLINE_BITS)
9794   {
9795   case 0:
9796   /* Compile-time default */
9797   switch(NEWLINE)
9798     {
9799     case -1: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
9800     case -2: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
9801     default: common->newline = NEWLINE; break;
9802     }
9803   break;
9804   case PCRE_NEWLINE_CR: common->newline = CHAR_CR; break;
9805   case PCRE_NEWLINE_LF: common->newline = CHAR_NL; break;
9806   case PCRE_NEWLINE_CR+
9807        PCRE_NEWLINE_LF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
9808   case PCRE_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
9809   case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
9810   default: return;
9811   }
9812 common->nlmax = READ_CHAR_MAX;
9813 common->nlmin = 0;
9814 if ((re->options & PCRE_BSR_ANYCRLF) != 0)
9815   common->bsr_nltype = NLTYPE_ANYCRLF;
9816 else if ((re->options & PCRE_BSR_UNICODE) != 0)
9817   common->bsr_nltype = NLTYPE_ANY;
9818 else
9819   {
9820 #ifdef BSR_ANYCRLF
9821   common->bsr_nltype = NLTYPE_ANYCRLF;
9822 #else
9823   common->bsr_nltype = NLTYPE_ANY;
9824 #endif
9825   }
9826 common->bsr_nlmax = READ_CHAR_MAX;
9827 common->bsr_nlmin = 0;
9828 common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
9829 common->ctypes = (sljit_sw)(tables + ctypes_offset);
9830 common->name_table = ((pcre_uchar *)re) + re->name_table_offset;
9831 common->name_count = re->name_count;
9832 common->name_entry_size = re->name_entry_size;
9833 common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
9834 #ifdef SUPPORT_UTF
9835 /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
9836 common->utf = (re->options & PCRE_UTF8) != 0;
9837 #ifdef SUPPORT_UCP
9838 common->use_ucp = (re->options & PCRE_UCP) != 0;
9839 #endif
9840 if (common->utf)
9841   {
9842   if (common->nltype == NLTYPE_ANY)
9843     common->nlmax = 0x2029;
9844   else if (common->nltype == NLTYPE_ANYCRLF)
9845     common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
9846   else
9847     {
9848     /* We only care about the first newline character. */
9849     common->nlmax = common->newline & 0xff;
9850     }
9851 
9852   if (common->nltype == NLTYPE_FIXED)
9853     common->nlmin = common->newline & 0xff;
9854   else
9855     common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
9856 
9857   if (common->bsr_nltype == NLTYPE_ANY)
9858     common->bsr_nlmax = 0x2029;
9859   else
9860     common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
9861   common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
9862   }
9863 #endif /* SUPPORT_UTF */
9864 ccend = bracketend(common->start);
9865 
9866 /* Calculate the local space size on the stack. */
9867 common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
9868 common->optimized_cbracket = (pcre_uint8 *)SLJIT_MALLOC(re->top_bracket + 1, compiler->allocator_data);
9869 if (!common->optimized_cbracket)
9870   return;
9871 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
9872 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
9873 #else
9874 memset(common->optimized_cbracket, 1, re->top_bracket + 1);
9875 #endif
9876 
9877 SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
9878 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
9879 common->capture_last_ptr = common->ovector_start;
9880 common->ovector_start += sizeof(sljit_sw);
9881 #endif
9882 if (!check_opcode_types(common, common->start, ccend))
9883   {
9884   SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
9885   return;
9886   }
9887 
9888 /* Checking flags and updating ovector_start. */
9889 if (mode == JIT_COMPILE && (re->flags & PCRE_REQCHSET) != 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
9890   {
9891   common->req_char_ptr = common->ovector_start;
9892   common->ovector_start += sizeof(sljit_sw);
9893   }
9894 if (mode != JIT_COMPILE)
9895   {
9896   common->start_used_ptr = common->ovector_start;
9897   common->ovector_start += sizeof(sljit_sw);
9898   if (mode == JIT_PARTIAL_SOFT_COMPILE)
9899     {
9900     common->hit_start = common->ovector_start;
9901     common->ovector_start += 2 * sizeof(sljit_sw);
9902     }
9903   else
9904     {
9905     SLJIT_ASSERT(mode == JIT_PARTIAL_HARD_COMPILE);
9906     common->needs_start_ptr = TRUE;
9907     }
9908   }
9909 if ((re->options & PCRE_FIRSTLINE) != 0)
9910   {
9911   common->first_line_end = common->ovector_start;
9912   common->ovector_start += sizeof(sljit_sw);
9913   }
9914 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
9915 common->control_head_ptr = 1;
9916 #endif
9917 if (common->control_head_ptr != 0)
9918   {
9919   common->control_head_ptr = common->ovector_start;
9920   common->ovector_start += sizeof(sljit_sw);
9921   }
9922 if (common->needs_start_ptr && common->has_set_som)
9923   {
9924   /* Saving the real start pointer is necessary. */
9925   common->start_ptr = common->ovector_start;
9926   common->ovector_start += sizeof(sljit_sw);
9927   }
9928 else
9929   common->needs_start_ptr = FALSE;
9930 
9931 /* Aligning ovector to even number of sljit words. */
9932 if ((common->ovector_start & sizeof(sljit_sw)) != 0)
9933   common->ovector_start += sizeof(sljit_sw);
9934 
9935 if (common->start_ptr == 0)
9936   common->start_ptr = OVECTOR(0);
9937 
9938 /* Capturing brackets cannot be optimized if callouts are allowed. */
9939 if (common->capture_last_ptr != 0)
9940   memset(common->optimized_cbracket, 0, re->top_bracket + 1);
9941 
9942 SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
9943 common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
9944 
9945 total_length = ccend - common->start;
9946 common->private_data_ptrs = (sljit_si *)SLJIT_MALLOC(total_length * (sizeof(sljit_si) + (common->has_then ? 1 : 0)), compiler->allocator_data);
9947 if (!common->private_data_ptrs)
9948   {
9949   SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
9950   return;
9951   }
9952 memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_si));
9953 
9954 private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
9955 set_private_data_ptrs(common, &private_data_size, ccend);
9956 if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
9957   {
9958   SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
9959   SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
9960   return;
9961   }
9962 
9963 if (common->has_then)
9964   {
9965   common->then_offsets = (pcre_uint8 *)(common->private_data_ptrs + total_length);
9966   memset(common->then_offsets, 0, total_length);
9967   set_then_offsets(common, common->start, NULL);
9968   }
9969 
9970 compiler = sljit_create_compiler(NULL);
9971 if (!compiler)
9972   {
9973   SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
9974   SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
9975   return;
9976   }
9977 common->compiler = compiler;
9978 
9979 /* Main pcre_jit_exec entry. */
9980 sljit_emit_enter(compiler, 0, 1, 5, 5, 0, 0, private_data_size);
9981 
9982 /* Register init. */
9983 reset_ovector(common, (re->top_bracket + 1) * 2);
9984 if (common->req_char_ptr != 0)
9985   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
9986 
9987 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
9988 OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
9989 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
9990 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
9991 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
9992 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
9993 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, base));
9994 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, limit));
9995 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
9996 
9997 if (mode == JIT_PARTIAL_SOFT_COMPILE)
9998   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
9999 if (common->mark_ptr != 0)
10000   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
10001 if (common->control_head_ptr != 0)
10002   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
10003 
10004 /* Main part of the matching */
10005 if ((re->options & PCRE_ANCHORED) == 0)
10006   {
10007   mainloop_label = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0, (re->options & PCRE_FIRSTLINE) != 0);
10008   continue_match_label = LABEL();
10009   /* Forward search if possible. */
10010   if ((re->options & PCRE_NO_START_OPTIMIZE) == 0)
10011     {
10012     if (mode == JIT_COMPILE && fast_forward_first_n_chars(common, (re->options & PCRE_FIRSTLINE) != 0))
10013       ;
10014     else if ((re->flags & PCRE_FIRSTSET) != 0)
10015       fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0, (re->options & PCRE_FIRSTLINE) != 0);
10016     else if ((re->flags & PCRE_STARTLINE) != 0)
10017       fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0);
10018     else if (study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)
10019       fast_forward_start_bits(common, study->start_bits, (re->options & PCRE_FIRSTLINE) != 0);
10020     }
10021   }
10022 else
10023   continue_match_label = LABEL();
10024 
10025 if (mode == JIT_COMPILE && study->minlength > 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
10026   {
10027   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
10028   OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength));
10029   minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
10030   }
10031 if (common->req_char_ptr != 0)
10032   reqbyte_notfound = search_requested_char(common, (pcre_uchar)re->req_char, (re->flags & PCRE_RCH_CASELESS) != 0, (re->flags & PCRE_FIRSTSET) != 0);
10033 
10034 /* Store the current STR_PTR in OVECTOR(0). */
10035 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
10036 /* Copy the limit of allowed recursions. */
10037 OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
10038 if (common->capture_last_ptr != 0)
10039   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, -1);
10040 
10041 if (common->needs_start_ptr)
10042   {
10043   SLJIT_ASSERT(common->start_ptr != OVECTOR(0));
10044   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
10045   }
10046 else
10047   SLJIT_ASSERT(common->start_ptr == OVECTOR(0));
10048 
10049 /* Copy the beginning of the string. */
10050 if (mode == JIT_PARTIAL_SOFT_COMPILE)
10051   {
10052   jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
10053   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
10054   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start + sizeof(sljit_sw), STR_PTR, 0);
10055   JUMPHERE(jump);
10056   }
10057 else if (mode == JIT_PARTIAL_HARD_COMPILE)
10058   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
10059 
10060 compile_matchingpath(common, common->start, ccend, &rootbacktrack);
10061 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10062   {
10063   sljit_free_compiler(compiler);
10064   SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
10065   SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
10066   free_read_only_data(common->read_only_data_head, compiler->allocator_data);
10067   return;
10068   }
10069 
10070 if (common->might_be_empty)
10071   {
10072   empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
10073   empty_match_found_label = LABEL();
10074   }
10075 
10076 common->accept_label = LABEL();
10077 if (common->accept != NULL)
10078   set_jumps(common->accept, common->accept_label);
10079 
10080 /* This means we have a match. Update the ovector. */
10081 copy_ovector(common, re->top_bracket + 1);
10082 common->quit_label = common->forced_quit_label = LABEL();
10083 if (common->quit != NULL)
10084   set_jumps(common->quit, common->quit_label);
10085 if (common->forced_quit != NULL)
10086   set_jumps(common->forced_quit, common->forced_quit_label);
10087 if (minlength_check_failed != NULL)
10088   SET_LABEL(minlength_check_failed, common->forced_quit_label);
10089 sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
10090 
10091 if (mode != JIT_COMPILE)
10092   {
10093   common->partialmatchlabel = LABEL();
10094   set_jumps(common->partialmatch, common->partialmatchlabel);
10095   return_with_partial_match(common, common->quit_label);
10096   }
10097 
10098 if (common->might_be_empty)
10099   empty_match_backtrack_label = LABEL();
10100 compile_backtrackingpath(common, rootbacktrack.top);
10101 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10102   {
10103   sljit_free_compiler(compiler);
10104   SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
10105   SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
10106   free_read_only_data(common->read_only_data_head, compiler->allocator_data);
10107   return;
10108   }
10109 
10110 SLJIT_ASSERT(rootbacktrack.prev == NULL);
10111 reset_match_label = LABEL();
10112 
10113 if (mode == JIT_PARTIAL_SOFT_COMPILE)
10114   {
10115   /* Update hit_start only in the first time. */
10116   jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
10117   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr);
10118   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
10119   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
10120   JUMPHERE(jump);
10121   }
10122 
10123 /* Check we have remaining characters. */
10124 if ((re->options & PCRE_ANCHORED) == 0 && (re->options & PCRE_FIRSTLINE) != 0)
10125   {
10126   SLJIT_ASSERT(common->first_line_end != 0);
10127   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
10128   }
10129 
10130 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
10131 
10132 if ((re->options & PCRE_ANCHORED) == 0)
10133   {
10134   if (common->ff_newline_shortcut != NULL)
10135     {
10136     if ((re->options & PCRE_FIRSTLINE) == 0)
10137       CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
10138     /* There cannot be more newlines here. */
10139     }
10140   else
10141     {
10142     if ((re->options & PCRE_FIRSTLINE) == 0)
10143       CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop_label);
10144     else
10145       CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, mainloop_label);
10146     }
10147   }
10148 
10149 /* No more remaining characters. */
10150 if (reqbyte_notfound != NULL)
10151   JUMPHERE(reqbyte_notfound);
10152 
10153 if (mode == JIT_PARTIAL_SOFT_COMPILE)
10154   CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
10155 
10156 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
10157 JUMPTO(SLJIT_JUMP, common->quit_label);
10158 
10159 flush_stubs(common);
10160 
10161 if (common->might_be_empty)
10162   {
10163   JUMPHERE(empty_match);
10164   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
10165   OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
10166   CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_backtrack_label);
10167   OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
10168   CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found_label);
10169   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
10170   CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
10171   JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
10172   }
10173 
10174 common->currententry = common->entries;
10175 common->local_exit = TRUE;
10176 quit_label = common->quit_label;
10177 while (common->currententry != NULL)
10178   {
10179   /* Might add new entries. */
10180   compile_recurse(common);
10181   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10182     {
10183     sljit_free_compiler(compiler);
10184     SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
10185     SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
10186     free_read_only_data(common->read_only_data_head, compiler->allocator_data);
10187     return;
10188     }
10189   flush_stubs(common);
10190   common->currententry = common->currententry->next;
10191   }
10192 common->local_exit = FALSE;
10193 common->quit_label = quit_label;
10194 
10195 /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
10196 /* This is a (really) rare case. */
10197 set_jumps(common->stackalloc, LABEL());
10198 /* RETURN_ADDR is not a saved register. */
10199 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
10200 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
10201 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
10202 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
10203 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top), STACK_TOP, 0);
10204 OP2(SLJIT_ADD, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit), SLJIT_IMM, STACK_GROWTH_RATE);
10205 
10206 sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
10207 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
10208 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
10209 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
10210 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top));
10211 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit));
10212 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
10213 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
10214 
10215 /* Allocation failed. */
10216 JUMPHERE(jump);
10217 /* We break the return address cache here, but this is a really rare case. */
10218 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_JIT_STACKLIMIT);
10219 JUMPTO(SLJIT_JUMP, common->quit_label);
10220 
10221 /* Call limit reached. */
10222 set_jumps(common->calllimit, LABEL());
10223 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_MATCHLIMIT);
10224 JUMPTO(SLJIT_JUMP, common->quit_label);
10225 
10226 if (common->revertframes != NULL)
10227   {
10228   set_jumps(common->revertframes, LABEL());
10229   do_revertframes(common);
10230   }
10231 if (common->wordboundary != NULL)
10232   {
10233   set_jumps(common->wordboundary, LABEL());
10234   check_wordboundary(common);
10235   }
10236 if (common->anynewline != NULL)
10237   {
10238   set_jumps(common->anynewline, LABEL());
10239   check_anynewline(common);
10240   }
10241 if (common->hspace != NULL)
10242   {
10243   set_jumps(common->hspace, LABEL());
10244   check_hspace(common);
10245   }
10246 if (common->vspace != NULL)
10247   {
10248   set_jumps(common->vspace, LABEL());
10249   check_vspace(common);
10250   }
10251 if (common->casefulcmp != NULL)
10252   {
10253   set_jumps(common->casefulcmp, LABEL());
10254   do_casefulcmp(common);
10255   }
10256 if (common->caselesscmp != NULL)
10257   {
10258   set_jumps(common->caselesscmp, LABEL());
10259   do_caselesscmp(common);
10260   }
10261 if (common->reset_match != NULL)
10262   {
10263   set_jumps(common->reset_match, LABEL());
10264   do_reset_match(common, (re->top_bracket + 1) * 2);
10265   CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
10266   OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
10267   JUMPTO(SLJIT_JUMP, reset_match_label);
10268   }
10269 #ifdef SUPPORT_UTF
10270 #ifdef COMPILE_PCRE8
10271 if (common->utfreadchar != NULL)
10272   {
10273   set_jumps(common->utfreadchar, LABEL());
10274   do_utfreadchar(common);
10275   }
10276 if (common->utfreadchar16 != NULL)
10277   {
10278   set_jumps(common->utfreadchar16, LABEL());
10279   do_utfreadchar16(common);
10280   }
10281 if (common->utfreadtype8 != NULL)
10282   {
10283   set_jumps(common->utfreadtype8, LABEL());
10284   do_utfreadtype8(common);
10285   }
10286 #endif /* COMPILE_PCRE8 */
10287 #endif /* SUPPORT_UTF */
10288 #ifdef SUPPORT_UCP
10289 if (common->getucd != NULL)
10290   {
10291   set_jumps(common->getucd, LABEL());
10292   do_getucd(common);
10293   }
10294 #endif
10295 
10296 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
10297 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
10298 
10299 executable_func = sljit_generate_code(compiler);
10300 executable_size = sljit_get_generated_code_size(compiler);
10301 label_addr = common->label_addrs;
10302 while (label_addr != NULL)
10303   {
10304   *label_addr->update_addr = sljit_get_label_addr(label_addr->label);
10305   label_addr = label_addr->next;
10306   }
10307 sljit_free_compiler(compiler);
10308 if (executable_func == NULL)
10309   {
10310   free_read_only_data(common->read_only_data_head, compiler->allocator_data);
10311   return;
10312   }
10313 
10314 /* Reuse the function descriptor if possible. */
10315 if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && extra->executable_jit != NULL)
10316   functions = (executable_functions *)extra->executable_jit;
10317 else
10318   {
10319   /* Note: If your memory-checker has flagged the allocation below as a
10320    * memory leak, it is probably because you either forgot to call
10321    * pcre_free_study() (or pcre16_free_study()) on the pcre_extra (or
10322    * pcre16_extra) object, or you called said function after having
10323    * cleared the PCRE_EXTRA_EXECUTABLE_JIT bit from the "flags" field
10324    * of the object. (The function will only free the JIT data if the
10325    * bit remains set, as the bit indicates that the pointer to the data
10326    * is valid.)
10327    */
10328   functions = SLJIT_MALLOC(sizeof(executable_functions), compiler->allocator_data);
10329   if (functions == NULL)
10330     {
10331     /* This case is highly unlikely since we just recently
10332     freed a lot of memory. Not impossible though. */
10333     sljit_free_code(executable_func);
10334     free_read_only_data(common->read_only_data_head, compiler->allocator_data);
10335     return;
10336     }
10337   memset(functions, 0, sizeof(executable_functions));
10338   functions->top_bracket = (re->top_bracket + 1) * 2;
10339   functions->limit_match = (re->flags & PCRE_MLSET) != 0 ? re->limit_match : 0;
10340   extra->executable_jit = functions;
10341   extra->flags |= PCRE_EXTRA_EXECUTABLE_JIT;
10342   }
10343 
10344 functions->executable_funcs[mode] = executable_func;
10345 functions->read_only_data_heads[mode] = common->read_only_data_head;
10346 functions->executable_sizes[mode] = executable_size;
10347 }
10348 
jit_machine_stack_exec(jit_arguments * arguments,void * executable_func)10349 static SLJIT_NOINLINE int jit_machine_stack_exec(jit_arguments *arguments, void *executable_func)
10350 {
10351 union {
10352    void *executable_func;
10353    jit_function call_executable_func;
10354 } convert_executable_func;
10355 pcre_uint8 local_space[MACHINE_STACK_SIZE];
10356 struct sljit_stack local_stack;
10357 
10358 local_stack.top = (sljit_sw)&local_space;
10359 local_stack.base = local_stack.top;
10360 local_stack.limit = local_stack.base + MACHINE_STACK_SIZE;
10361 local_stack.max_limit = local_stack.limit;
10362 arguments->stack = &local_stack;
10363 convert_executable_func.executable_func = executable_func;
10364 return convert_executable_func.call_executable_func(arguments);
10365 }
10366 
10367 int
PRIV(jit_exec)10368 PRIV(jit_exec)(const PUBL(extra) *extra_data, const pcre_uchar *subject,
10369   int length, int start_offset, int options, int *offsets, int offset_count)
10370 {
10371 executable_functions *functions = (executable_functions *)extra_data->executable_jit;
10372 union {
10373    void *executable_func;
10374    jit_function call_executable_func;
10375 } convert_executable_func;
10376 jit_arguments arguments;
10377 int max_offset_count;
10378 int retval;
10379 int mode = JIT_COMPILE;
10380 
10381 if ((options & PCRE_PARTIAL_HARD) != 0)
10382   mode = JIT_PARTIAL_HARD_COMPILE;
10383 else if ((options & PCRE_PARTIAL_SOFT) != 0)
10384   mode = JIT_PARTIAL_SOFT_COMPILE;
10385 
10386 if (functions->executable_funcs[mode] == NULL)
10387   return PCRE_ERROR_JIT_BADOPTION;
10388 
10389 /* Sanity checks should be handled by pcre_exec. */
10390 arguments.str = subject + start_offset;
10391 arguments.begin = subject;
10392 arguments.end = subject + length;
10393 arguments.mark_ptr = NULL;
10394 /* JIT decreases this value less frequently than the interpreter. */
10395 arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (pcre_uint32)(extra_data->match_limit);
10396 if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
10397   arguments.limit_match = functions->limit_match;
10398 arguments.notbol = (options & PCRE_NOTBOL) != 0;
10399 arguments.noteol = (options & PCRE_NOTEOL) != 0;
10400 arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
10401 arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
10402 arguments.offsets = offsets;
10403 arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL;
10404 arguments.real_offset_count = offset_count;
10405 
10406 /* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
10407 the output vector for storing captured strings, with the remainder used as
10408 workspace. We don't need the workspace here. For compatibility, we limit the
10409 number of captured strings in the same way as pcre_exec(), so that the user
10410 gets the same result with and without JIT. */
10411 
10412 if (offset_count != 2)
10413   offset_count = ((offset_count - (offset_count % 3)) * 2) / 3;
10414 max_offset_count = functions->top_bracket;
10415 if (offset_count > max_offset_count)
10416   offset_count = max_offset_count;
10417 arguments.offset_count = offset_count;
10418 
10419 if (functions->callback)
10420   arguments.stack = (struct sljit_stack *)functions->callback(functions->userdata);
10421 else
10422   arguments.stack = (struct sljit_stack *)functions->userdata;
10423 
10424 if (arguments.stack == NULL)
10425   retval = jit_machine_stack_exec(&arguments, functions->executable_funcs[mode]);
10426 else
10427   {
10428   convert_executable_func.executable_func = functions->executable_funcs[mode];
10429   retval = convert_executable_func.call_executable_func(&arguments);
10430   }
10431 
10432 if (retval * 2 > offset_count)
10433   retval = 0;
10434 if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
10435   *(extra_data->mark) = arguments.mark_ptr;
10436 
10437 return retval;
10438 }
10439 
10440 #if defined COMPILE_PCRE8
10441 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_jit_exec(const pcre * argument_re,const pcre_extra * extra_data,PCRE_SPTR subject,int length,int start_offset,int options,int * offsets,int offset_count,pcre_jit_stack * stack)10442 pcre_jit_exec(const pcre *argument_re, const pcre_extra *extra_data,
10443   PCRE_SPTR subject, int length, int start_offset, int options,
10444   int *offsets, int offset_count, pcre_jit_stack *stack)
10445 #elif defined COMPILE_PCRE16
10446 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
10447 pcre16_jit_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
10448   PCRE_SPTR16 subject, int length, int start_offset, int options,
10449   int *offsets, int offset_count, pcre16_jit_stack *stack)
10450 #elif defined COMPILE_PCRE32
10451 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
10452 pcre32_jit_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
10453   PCRE_SPTR32 subject, int length, int start_offset, int options,
10454   int *offsets, int offset_count, pcre32_jit_stack *stack)
10455 #endif
10456 {
10457 pcre_uchar *subject_ptr = (pcre_uchar *)subject;
10458 executable_functions *functions = (executable_functions *)extra_data->executable_jit;
10459 union {
10460    void *executable_func;
10461    jit_function call_executable_func;
10462 } convert_executable_func;
10463 jit_arguments arguments;
10464 int max_offset_count;
10465 int retval;
10466 int mode = JIT_COMPILE;
10467 
10468 SLJIT_UNUSED_ARG(argument_re);
10469 
10470 /* Plausibility checks */
10471 if ((options & ~PUBLIC_JIT_EXEC_OPTIONS) != 0) return PCRE_ERROR_JIT_BADOPTION;
10472 
10473 if ((options & PCRE_PARTIAL_HARD) != 0)
10474   mode = JIT_PARTIAL_HARD_COMPILE;
10475 else if ((options & PCRE_PARTIAL_SOFT) != 0)
10476   mode = JIT_PARTIAL_SOFT_COMPILE;
10477 
10478 if (functions->executable_funcs[mode] == NULL)
10479   return PCRE_ERROR_JIT_BADOPTION;
10480 
10481 /* Sanity checks should be handled by pcre_exec. */
10482 arguments.stack = (struct sljit_stack *)stack;
10483 arguments.str = subject_ptr + start_offset;
10484 arguments.begin = subject_ptr;
10485 arguments.end = subject_ptr + length;
10486 arguments.mark_ptr = NULL;
10487 /* JIT decreases this value less frequently than the interpreter. */
10488 arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (pcre_uint32)(extra_data->match_limit);
10489 if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
10490   arguments.limit_match = functions->limit_match;
10491 arguments.notbol = (options & PCRE_NOTBOL) != 0;
10492 arguments.noteol = (options & PCRE_NOTEOL) != 0;
10493 arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
10494 arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
10495 arguments.offsets = offsets;
10496 arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL;
10497 arguments.real_offset_count = offset_count;
10498 
10499 /* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
10500 the output vector for storing captured strings, with the remainder used as
10501 workspace. We don't need the workspace here. For compatibility, we limit the
10502 number of captured strings in the same way as pcre_exec(), so that the user
10503 gets the same result with and without JIT. */
10504 
10505 if (offset_count != 2)
10506   offset_count = ((offset_count - (offset_count % 3)) * 2) / 3;
10507 max_offset_count = functions->top_bracket;
10508 if (offset_count > max_offset_count)
10509   offset_count = max_offset_count;
10510 arguments.offset_count = offset_count;
10511 
10512 convert_executable_func.executable_func = functions->executable_funcs[mode];
10513 retval = convert_executable_func.call_executable_func(&arguments);
10514 
10515 if (retval * 2 > offset_count)
10516   retval = 0;
10517 if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
10518   *(extra_data->mark) = arguments.mark_ptr;
10519 
10520 return retval;
10521 }
10522 
10523 void
PRIV(jit_free)10524 PRIV(jit_free)(void *executable_funcs)
10525 {
10526 int i;
10527 executable_functions *functions = (executable_functions *)executable_funcs;
10528 for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
10529   {
10530   if (functions->executable_funcs[i] != NULL)
10531     sljit_free_code(functions->executable_funcs[i]);
10532   free_read_only_data(functions->read_only_data_heads[i], NULL);
10533   }
10534 SLJIT_FREE(functions, compiler->allocator_data);
10535 }
10536 
10537 int
PRIV(jit_get_size)10538 PRIV(jit_get_size)(void *executable_funcs)
10539 {
10540 int i;
10541 sljit_uw size = 0;
10542 sljit_uw *executable_sizes = ((executable_functions *)executable_funcs)->executable_sizes;
10543 for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
10544   size += executable_sizes[i];
10545 return (int)size;
10546 }
10547 
10548 const char*
PRIV(jit_get_target)10549 PRIV(jit_get_target)(void)
10550 {
10551 return sljit_get_platform_name();
10552 }
10553 
10554 #if defined COMPILE_PCRE8
10555 PCRE_EXP_DECL pcre_jit_stack *
pcre_jit_stack_alloc(int startsize,int maxsize)10556 pcre_jit_stack_alloc(int startsize, int maxsize)
10557 #elif defined COMPILE_PCRE16
10558 PCRE_EXP_DECL pcre16_jit_stack *
10559 pcre16_jit_stack_alloc(int startsize, int maxsize)
10560 #elif defined COMPILE_PCRE32
10561 PCRE_EXP_DECL pcre32_jit_stack *
10562 pcre32_jit_stack_alloc(int startsize, int maxsize)
10563 #endif
10564 {
10565 if (startsize < 1 || maxsize < 1)
10566   return NULL;
10567 if (startsize > maxsize)
10568   startsize = maxsize;
10569 startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
10570 maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
10571 return (PUBL(jit_stack)*)sljit_allocate_stack(startsize, maxsize, NULL);
10572 }
10573 
10574 #if defined COMPILE_PCRE8
10575 PCRE_EXP_DECL void
pcre_jit_stack_free(pcre_jit_stack * stack)10576 pcre_jit_stack_free(pcre_jit_stack *stack)
10577 #elif defined COMPILE_PCRE16
10578 PCRE_EXP_DECL void
10579 pcre16_jit_stack_free(pcre16_jit_stack *stack)
10580 #elif defined COMPILE_PCRE32
10581 PCRE_EXP_DECL void
10582 pcre32_jit_stack_free(pcre32_jit_stack *stack)
10583 #endif
10584 {
10585 sljit_free_stack((struct sljit_stack *)stack, NULL);
10586 }
10587 
10588 #if defined COMPILE_PCRE8
10589 PCRE_EXP_DECL void
pcre_assign_jit_stack(pcre_extra * extra,pcre_jit_callback callback,void * userdata)10590 pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
10591 #elif defined COMPILE_PCRE16
10592 PCRE_EXP_DECL void
10593 pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
10594 #elif defined COMPILE_PCRE32
10595 PCRE_EXP_DECL void
10596 pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata)
10597 #endif
10598 {
10599 executable_functions *functions;
10600 if (extra != NULL &&
10601     (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
10602     extra->executable_jit != NULL)
10603   {
10604   functions = (executable_functions *)extra->executable_jit;
10605   functions->callback = callback;
10606   functions->userdata = userdata;
10607   }
10608 }
10609 
10610 #if defined COMPILE_PCRE8
10611 PCRE_EXP_DECL void
pcre_jit_free_unused_memory(void)10612 pcre_jit_free_unused_memory(void)
10613 #elif defined COMPILE_PCRE16
10614 PCRE_EXP_DECL void
10615 pcre16_jit_free_unused_memory(void)
10616 #elif defined COMPILE_PCRE32
10617 PCRE_EXP_DECL void
10618 pcre32_jit_free_unused_memory(void)
10619 #endif
10620 {
10621 sljit_free_unused_memory_exec();
10622 }
10623 
10624 #else  /* SUPPORT_JIT */
10625 
10626 /* These are dummy functions to avoid linking errors when JIT support is not
10627 being compiled. */
10628 
10629 #if defined COMPILE_PCRE8
10630 PCRE_EXP_DECL pcre_jit_stack *
pcre_jit_stack_alloc(int startsize,int maxsize)10631 pcre_jit_stack_alloc(int startsize, int maxsize)
10632 #elif defined COMPILE_PCRE16
10633 PCRE_EXP_DECL pcre16_jit_stack *
10634 pcre16_jit_stack_alloc(int startsize, int maxsize)
10635 #elif defined COMPILE_PCRE32
10636 PCRE_EXP_DECL pcre32_jit_stack *
10637 pcre32_jit_stack_alloc(int startsize, int maxsize)
10638 #endif
10639 {
10640 (void)startsize;
10641 (void)maxsize;
10642 return NULL;
10643 }
10644 
10645 #if defined COMPILE_PCRE8
10646 PCRE_EXP_DECL void
pcre_jit_stack_free(pcre_jit_stack * stack)10647 pcre_jit_stack_free(pcre_jit_stack *stack)
10648 #elif defined COMPILE_PCRE16
10649 PCRE_EXP_DECL void
10650 pcre16_jit_stack_free(pcre16_jit_stack *stack)
10651 #elif defined COMPILE_PCRE32
10652 PCRE_EXP_DECL void
10653 pcre32_jit_stack_free(pcre32_jit_stack *stack)
10654 #endif
10655 {
10656 (void)stack;
10657 }
10658 
10659 #if defined COMPILE_PCRE8
10660 PCRE_EXP_DECL void
pcre_assign_jit_stack(pcre_extra * extra,pcre_jit_callback callback,void * userdata)10661 pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
10662 #elif defined COMPILE_PCRE16
10663 PCRE_EXP_DECL void
10664 pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
10665 #elif defined COMPILE_PCRE32
10666 PCRE_EXP_DECL void
10667 pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata)
10668 #endif
10669 {
10670 (void)extra;
10671 (void)callback;
10672 (void)userdata;
10673 }
10674 
10675 #if defined COMPILE_PCRE8
10676 PCRE_EXP_DECL void
pcre_jit_free_unused_memory(void)10677 pcre_jit_free_unused_memory(void)
10678 #elif defined COMPILE_PCRE16
10679 PCRE_EXP_DECL void
10680 pcre16_jit_free_unused_memory(void)
10681 #elif defined COMPILE_PCRE32
10682 PCRE_EXP_DECL void
10683 pcre32_jit_free_unused_memory(void)
10684 #endif
10685 {
10686 }
10687 
10688 #endif
10689 
10690 /* End of pcre_jit_compile.c */
10691